D6734: git: RFC of a new extension to _directly_ operate on git repositories

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Thu Sep 5 19:20:59 UTC 2019


durin42 updated this revision to Diff 16387.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6734?vs=16293&id=16387

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6734/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6734

AFFECTED FILES
  hgext/git/__init__.py
  hgext/git/dirstate.py
  hgext/git/gitlog.py
  hgext/git/index.py
  setup.py
  tests/test-git-interop.t

CHANGE DETAILS

diff --git a/tests/test-git-interop.t b/tests/test-git-interop.t
new file mode 100644
--- /dev/null
+++ b/tests/test-git-interop.t
@@ -0,0 +1,182 @@
+This test requires pygit2:
+  > python -c 'import pygit2' || exit 80
+
+Setup:
+  > GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME
+  > GIT_AUTHOR_EMAIL='test at example.org'; export GIT_AUTHOR_EMAIL
+  > GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE
+  > GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME
+  > GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL
+  > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE
+
+  > count=10
+  > gitcommit() {
+  >    GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000";
+  >    GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"
+  >    git commit "$@" >/dev/null 2>/dev/null || echo "git commit error"
+  >    count=`expr $count + 1`
+  >  }
+
+  > echo "[extensions]" >> $HGRCPATH
+  > echo "git=" >> $HGRCPATH
+
+Make a new repo with git:
+  $ mkdir foo
+  $ cd foo
+  $ git init
+  Initialized empty Git repository in $TESTTMP/foo/.git/
+Ignore the .hg directory within git:
+  $ echo .hg >> .git/info/exclude
+  $ echo alpha > alpha
+  $ git add alpha
+  $ gitcommit -am 'Add alpha'
+  $ echo beta > beta
+  $ git add beta
+  $ gitcommit -am 'Add beta'
+  $ echo gamma > gamma
+  $ git status
+  On branch master
+  Untracked files:
+    (use "git add <file>..." to include in what will be committed)
+  
+  	gamma
+  
+  nothing added to commit but untracked files present (use "git add" to track)
+
+Without creating the .hg, hg status fails:
+  $ hg status
+  abort: no repository found in '$TESTTMP/foo' (.hg not found)!
+  [255]
+But if you run hg init --git, it works:
+  $ hg init --git
+  $ hg id
+  3d9be8deba43
+  $ hg status
+  ? gamma
+Log works too:
+  $ hg log
+  changeset:   1:3d9be8deba43
+  bookmark:    master
+  user:        test <test at example.org>
+  date:        Mon Jan 01 00:00:11 2007 +0000
+  summary:     Add beta
+  
+  changeset:   0:c5864c9d16fb
+  user:        test <test at example.org>
+  date:        Mon Jan 01 00:00:10 2007 +0000
+  summary:     Add alpha
+  
+
+
+and bookmarks:
+  $ hg bookmarks
+   * master                    1:3d9be8deba43
+
+diff even works transparently in both systems:
+  $ echo blah >> alpha
+  $ git diff
+  diff --git a/alpha b/alpha
+  index 4a58007..faed1b7 100644
+  --- a/alpha
+  +++ b/alpha
+  @@ -1 +1,2 @@
+   alpha
+  +blah
+  $ hg diff --git
+  diff --git a/alpha b/alpha
+  --- a/alpha
+  +++ b/alpha
+  @@ -1,1 +1,2 @@
+   alpha
+  +blah
+
+Remove a file, it shows as such:
+  $ rm alpha
+  $ hg status
+  ! alpha
+  ? gamma
+
+Revert works:
+  $ hg revert alpha --traceback
+  $ hg status
+  ? gamma
+  $ git status
+  On branch master
+  Untracked files:
+    (use "git add <file>..." to include in what will be committed)
+  
+  	gamma
+  
+  nothing added to commit but untracked files present (use "git add" to track)
+
+Add shows sanely in both:
+  $ hg add gamma
+  $ hg status
+  A gamma
+  $ git status
+  On branch master
+  Changes to be committed:
+    (use "git reset HEAD <file>..." to unstage)
+  
+  	new file:   gamma
+  
+
+forget does what it should as well:
+  $ hg forget gamma
+  $ hg status
+  ? gamma
+  $ git status
+  On branch master
+  Untracked files:
+    (use "git add <file>..." to include in what will be committed)
+  
+  	gamma
+  
+  nothing added to commit but untracked files present (use "git add" to track)
+
+hg log FILE
+
+  $ echo a >> alpha
+  $ hg ci -m 'more alpha'
+  $ echo b >> beta
+  $ hg ci -m 'more beta'
+  $ echo a >> alpha
+  $ hg ci -m 'even more alpha'
+  $ hg log -G alpha
+  @  changeset:   4:3d8853b3aed9
+  |  bookmark:    master
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     even more alpha
+  |
+  o  changeset:   2:31e1d4310954
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     more alpha
+  |
+  o  changeset:   0:c5864c9d16fb
+     user:        test <test at example.org>
+     date:        Mon Jan 01 00:00:10 2007 +0000
+     summary:     Add alpha
+  
+  $ hg log -G beta
+  o  changeset:   3:e634e4550ceb
+  |  user:        test
+  |  date:        Thu Jan 01 00:00:00 1970 +0000
+  |  summary:     more beta
+  |
+  o  changeset:   1:3d9be8deba43
+  |  user:        test <test at example.org>
+  |  date:        Mon Jan 01 00:00:11 2007 +0000
+  |  summary:     Add beta
+  |
+
+hg annotate
+
+  $ hg annotate alpha
+  0: alpha
+  2: a
+  4: a
+  $ hg annotate beta
+  1: beta
+  3: b
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -1079,6 +1079,7 @@
             'hgext', 'hgext.convert', 'hgext.fsmonitor',
             'hgext.fastannotate',
             'hgext.fsmonitor.pywatchman',
+            'hgext.git',
             'hgext.highlight',
             'hgext.infinitepush',
             'hgext.largefiles', 'hgext.lfs', 'hgext.narrow',
diff --git a/hgext/git/index.py b/hgext/git/index.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/index.py
@@ -0,0 +1,172 @@
+from __future__ import absolute_import
+
+import os
+import sqlite3
+
+from mercurial.i18n import _
+
+from mercurial import (
+    encoding,
+    error,
+    node as nodemod,
+)
+
+import pygit2
+
+_CURRENT_SCHEMA_VERSION = 1
+_SCHEMA = """
+CREATE TABLE refs (
+  -- node and name are unique together. There may be more than one name for
+  -- a given node, and there may be no name at all for a given node (in the
+  -- case of an anonymous hg head).
+  node TEXT NOT NULL,
+  name TEXT
+);
+
+-- The topological heads of the changelog, which hg depends on.
+CREATE TABLE heads (
+  node TEXT NOT NULL
+);
+
+-- A total ordering of the changelog
+CREATE TABLE changelog (
+  rev INTEGER NOT NULL PRIMARY KEY,
+  node TEXT NOT NULL,
+  p1 TEXT,
+  p2 TEXT
+);
+
+CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
+CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
+
+-- Changed files for each commit, which lets us dynamically build
+-- filelogs.
+CREATE TABLE changedfiles (
+  node TEXT NOT NULL,
+  filename TEXT NOT NULL,
+  -- 40 zeroes for deletions
+  filenode TEXT NOT NULL
+);
+
+CREATE INDEX changedfiles_nodes_idx
+  ON changedfiles(node);
+
+PRAGMA user_version=%d
+""" % _CURRENT_SCHEMA_VERSION
+
+def _createdb(path):
+    # print('open db', path)
+    # import traceback
+    # traceback.print_stack()
+    db = sqlite3.connect(encoding.strfromlocal(path))
+    db.text_factory = bytes
+
+    res = db.execute(r'PRAGMA user_version').fetchone()[0]
+
+    # New database.
+    if res == 0:
+        for statement in _SCHEMA.split(';'):
+            db.execute(statement.strip())
+
+        db.commit()
+
+    elif res == _CURRENT_SCHEMA_VERSION:
+        pass
+
+    else:
+        raise error.Abort(_('sqlite database has unrecognized version'))
+
+    db.execute(r'PRAGMA journal_mode=WAL')
+
+    return db
+
+_OUR_ORDER = (pygit2.GIT_SORT_TOPOLOGICAL |
+              pygit2.GIT_SORT_TIME |
+              pygit2.GIT_SORT_REVERSE)
+
+_DIFF_FLAGS = 1 << 21  # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
+
+def _index_repo(gitrepo, db, progress_cb):
+    # Identify all references so we can tell the walker to visit all of them.
+    all_refs = gitrepo.listall_references()
+    walker = None
+    possible_heads = set()
+    for pos, ref in enumerate(all_refs):
+        progress_cb('refs', pos)
+        try:
+            start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
+        except ValueError:
+            # No commit to be found, so we don't care for hg's purposes.
+            continue
+        possible_heads.add(start.id.hex)
+        if walker is None:
+            walker = gitrepo.walk(start.id, _OUR_ORDER)
+        else:
+            walker.push(start.id)
+    # Empty out the existing changelog. Even for large-ish histories
+    # we can do the top-level "walk all the commits" dance very
+    # quickly as long as we don't need to figure out the changed files
+    # list.
+    db.execute('DELETE FROM changelog')
+    progress_cb('refs', None)
+    # This walker is sure to visit all the revisions in history, but
+    # only once.
+    for pos, commit in enumerate(walker):
+        progress_cb('commits', pos)
+        r = commit.id.raw
+        p1 = p2 = nodemod.nullhex
+        if len(commit.parents) > 2:
+            raise error.ProgrammingError(
+                ("git support can't handle octopus merges, "
+                 "found a commit with %d parents :(") % len(commit.parents))
+        if commit.parents:
+            p1 = commit.parents[0].id.hex
+        if len(commit.parents) == 2:
+            p2 = commit.parents[1].id.hex
+        db.execute(
+            'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
+            (pos, commit.id.hex, p1, p2))
+
+        num_changedfiles = db.execute(
+            "SELECT COUNT(*) from changedfiles WHERE node = ?",
+            (commit.id.hex,)).fetchone()[0]
+        if not num_changedfiles:
+            files = {}
+            # I *think* we only need to check p1 for changed files
+            # (and therefore linkrevs), because any node that would
+            # actually have this commit as a linkrev would be
+            # completely new in this rev.
+            p1 = commit.parents[0].id.hex if commit.parents else None
+            if p1 is not None:
+                patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
+            else:
+                patchgen = commit.tree.diff_to_tree(
+                    swap=True, flags=_DIFF_FLAGS)
+            new_files = (p.delta.new_file for p in patchgen)
+            files = {nf.path: nf.id.hex for nf in new_files
+                     if nf.id.raw != nodemod.nullid}
+            for p, n in files.items():
+                db.execute(
+                    'INSERT INTO changedfiles (node, filename, filenode) '
+                    'VALUES(?, ?, ?)',
+                    (commit.id.hex, p, n))
+    db.execute('DELETE FROM heads')
+    for h in possible_heads:
+        haschild = db.execute(
+            'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?',
+            (h, h)).fetchone()[0]
+        if not haschild:
+            db.execute('INSERT INTO heads (node) VALUES(?)', (h,))
+
+    progress_cb('commits', None)
+
+def get_index(gitrepo):
+    cachepath = os.path.join(gitrepo.path, '..', '.hg', 'cache')
+    if not os.path.exists(cachepath):
+        os.makedirs(cachepath)
+    dbpath = os.path.join(cachepath, 'git-commits.sqlite')
+    db = _createdb(dbpath)
+    # TODO check against gitrepo heads before doing a full index
+    # TODO thread a ui.progress call into this layer
+    _index_repo(gitrepo, db, lambda x, y: None)
+    return db
diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/gitlog.py
@@ -0,0 +1,202 @@
+from __future__ import absolute_import
+
+from mercurial.i18n import _
+
+from mercurial import (
+    ancestor,
+    changelog as hgchangelog,
+    error,
+    manifest,
+    node as nodemod,
+    revlog,
+)
+
+class baselog(object): # revlog.revlog):
+    """Common implementations between changelog and manifestlog."""
+    def __init__(self, gr, db):
+        self.gitrepo = gr
+        self._db = db
+
+    def __len__(self):
+        return int(self._db.execute(
+            'SELECT COUNT(*) FROM changelog').fetchone()[0])
+
+    def rev(self, n):
+        if n == nodemod.nullid:
+            return -1
+        t = self._db.execute(
+            'SELECT rev FROM changelog WHERE node = ?',
+            (nodemod.hex(n),)).fetchone()
+        if t is None:
+            raise error.LookupError(n, '00changelog.i', _('no node'))
+        return t[0]
+
+    def node(self, r):
+        if r == nodemod.nullrev:
+            return nodemod.nullid
+        t = self._db.execute(
+            'SELECT node FROM changelog WHERE rev = ?',
+            (r,)).fetchone()
+        if t is None:
+            raise error.LookupError(r, '00changelog.i', _('no node'))
+        return nodemod.bin(t[0])
+
+
+# TODO: an interface for the changelog type?
+class changelog(baselog):
+
+    @property
+    def filteredrevs(self):
+        # TODO: we should probably add a refs/hg/ namespace for hidden
+        # heads etc, but that's an idea for later.
+        return ()
+
+    @property
+    def nodemap(self):
+        r = {
+            nodemod.bin(v[0]): v[1] for v in
+            self._db.execute('SELECT node, rev FROM changelog')}
+        r[nodemod.nullid] = nodemod.nullrev
+        return r
+
+    def tip(self):
+        t = self._db.execute(
+            'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1').fetchone()
+        if t:
+            return nodemod.hex(t[0])
+        return nodemod.nullid
+
+    def headrevs(self, revs=None):
+        realheads =  [int(x[0]) for x in
+                      self._db.execute(
+                          'SELECT rev FROM changelog '
+                          'INNER JOIN heads ON changelog.node = heads.node')]
+        if revs:
+            return sorted([r for r in revs if r in realheads])
+        return sorted(realheads)
+
+    def changelogrevision(self, nodeorrev):
+        # Ensure we have a node id
+        if isinstance(nodeorrev, int):
+            n = self.node(nodeorrev)
+        else:
+            n = nodeorrev
+        # handle looking up nullid
+        if n == nodemod.nullid:
+            return hgchangelog._changelogrevision(extra={})
+        hn = nodemod.hex(n)
+        # We've got a real commit!
+        files = [r[0] for r in self._db.execute(
+            'SELECT filename FROM changedfiles '
+            'WHERE node = ? and filenode != ?',
+            (hn, nodemod.nullhex))]
+        filesremoved = [r[0] for r in self._db.execute(
+            'SELECT filename FROM changedfiles '
+            'WHERE node = ? and filenode = ?',
+            (hn, nodemod.nullhex))]
+        c = self.gitrepo[hn]
+        return hgchangelog._changelogrevision(
+            manifest=n, # pretend manifest the same as the commit node
+            user='%s <%s>' % (c.author.name, c.author.email),
+            # TODO: a fuzzy memory from hg-git hacking says this should be -offset
+            date=(c.author.time, c.author.offset),
+            files=files,
+            # TODO filesadded in the index
+            filesremoved=filesremoved,
+            description=c.message,
+            # TODO do we want to handle extra? how?
+            extra={b'branch': b'default'},
+        )
+
+    def parentrevs(self, rev):
+        n = self.node(rev)
+        hn = nodemod.hex(n)
+        c = self.gitrepo[hn]
+        p1 = p2 = nodemod.nullrev
+        if c.parents:
+            p1 = self.rev(c.parents[0].id.raw)
+            if len(c.parents) > 2:
+                raise error.Abort('TODO octopus merge handling')
+            if len(c.parents) == 2:
+                p2 = self.rev(c.parents[0].id.raw)
+        return p1, p2
+
+    # Private method is used at least by the tags code.
+    _uncheckedparentrevs = parentrevs
+
+    def commonancestorsheads(self, a, b):
+        # TODO the revlog verson of this has a C path, so we probably
+        # need to optimize this...
+        a, b = self.rev(a), self.rev(b)
+        return [self.node(n) for n in
+                ancestor.commonancestorsheads(self.parentrevs, a, b)]
+
+class gittreemanifest(object):
+    def __init__(self, gt):
+        self._tree = gt
+
+    def __contains__(self, k):
+        return k in self._tree
+
+    def __getitem__(self, k):
+        return self._tree[k].id.raw
+
+    def flags(self, k):
+        # TODO flags handling
+        return ''
+
+    def walk(self, match):
+        for f in self._tree:
+            # TODO recurse into subtrees...
+            yield f.name
+
+
+#@interfaceutil.implementer(repository.imanifestrevisionstored)
+class gittreemanifestctx(object):
+    def __init__(self, gittree):
+        self._tree = gittree
+
+    def read(self):
+        return gittreemanifest(self._tree)
+
+    def find(self, path):
+        self.read()[path]
+
+class manifestlog(baselog):
+
+    def __getitem__(self, node):
+        return self.get('', node)
+
+    def get(self, relpath, node):
+        if node == nodemod.nullid:
+            return manifest.memtreemanifestctx(self, relpath)
+        commit = self.gitrepo[nodemod.hex(node)]
+        t = commit.tree
+        if relpath:
+            parts = relpath.split('/')
+            for p in parts:
+                te = t[p]
+                t = self.gitrepo[te.id]
+        return gittreemanifestctx(t)
+
+class filelog(baselog):
+    def __init__(self, gr, db, path):
+        super(filelog, self).__init__(gr, db)
+        self.path = path
+
+    def read(self, node):
+        return self.gitrepo[nodemod.hex(node)].data
+
+    def lookup(self, node):
+        if len(node) not in (20, 40):
+            node = int(node)
+        if isinstance(node, int):
+            assert False, 'todo revnums for nodes'
+        if len(node) == 40:
+            hnode = node
+            node = nodemod.bin(node)
+        else:
+            hnode = nodemod.hex(node)
+        if hnode in self.gitrepo:
+            return node
+        raise error.LookupError(self.path, node, _('no match found'))
diff --git a/hgext/git/dirstate.py b/hgext/git/dirstate.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/dirstate.py
@@ -0,0 +1,280 @@
+from __future__ import absolute_import
+
+import errno
+import os
+import stat
+
+from mercurial import (
+    dirstate,
+    error,
+    extensions,
+    match as matchmod,
+    node as nodemod,
+    parsers,
+    scmutil,
+    util,
+)
+from mercurial.i18n import _
+
+import pygit2
+
+
+def readpatternfile(orig, filepath, warn, sourceinfo=False):
+    if not ('info/exclude' in filepath or filepath.endswith('.gitignore')):
+        return orig(filepath, warn, sourceinfo=False)
+    result = []
+    warnings = []
+    with open(filepath, 'rb') as fp:
+        for l in fp:
+            l = l.strip()
+            if not l or l.startswith('#'):
+                continue
+            if l.startswith('!'):
+                # on reflection, I think /foo is just glob:
+                warnings.append('unsupported ignore pattern %s' % l)
+                continue
+            if l.startswith('/'):
+              result.append('glob:' + l[1:])
+            else:
+              result.append('relglob:' + l)
+    return result, warnings
+extensions.wrapfunction(matchmod, 'readpatternfile', readpatternfile)
+
+
+class _gitdirstatemap(object):
+    def __init__(self, ui, opener, root):
+        self._ui = ui
+        self._opener = opener
+        self._root = root
+
+_STATUS_MAP = {
+    pygit2.GIT_STATUS_CONFLICTED: 'm',
+    pygit2.GIT_STATUS_CURRENT: 'n',
+    pygit2.GIT_STATUS_IGNORED: '?',
+    pygit2.GIT_STATUS_INDEX_DELETED: 'r',
+    pygit2.GIT_STATUS_INDEX_MODIFIED: 'n',
+    pygit2.GIT_STATUS_INDEX_NEW: 'a',
+    pygit2.GIT_STATUS_INDEX_RENAMED: 'a',
+    pygit2.GIT_STATUS_INDEX_TYPECHANGE: 'n',
+    pygit2.GIT_STATUS_WT_DELETED: 'r',
+    pygit2.GIT_STATUS_WT_MODIFIED: 'n',
+    pygit2.GIT_STATUS_WT_NEW: 'a',
+    pygit2.GIT_STATUS_WT_RENAMED: 'a',
+    pygit2.GIT_STATUS_WT_TYPECHANGE: 'n',
+    pygit2.GIT_STATUS_WT_UNREADABLE: '?',
+}
+
+
+# TODO dirstate wants to be an interface
+class gitdirstate(object): # dirstate.dirstate):
+    _mapcls = _gitdirstatemap
+
+    def __init__(self, ui, gitrepo):
+        self._ui = ui
+        self.git = gitrepo
+
+    def p1(self):
+        return self.git.head.peel().id.raw
+
+    def branch(self):
+        return b'default'
+
+    def parents(self):
+        # TODO how on earth do we find p2 if a merge is in flight?
+        return self.p1(), nodemod.nullid
+
+    def __getitem__(self, filename):
+        try:
+            gs = self.git.status_file(filename)
+        except KeyError:
+            return '?'
+        return _STATUS_MAP[gs]
+
+    def __contains__(self, filename):
+        try:
+            self.git.status_file(filename)
+            return True
+        except KeyError:
+            return False
+
+    def status(self, match, subrepos, ignored, clean, unknown):
+        # TODO handling of clean files - can we get that from git.status()?
+        modified, added, removed, deleted, unknown, ignored, clean = (
+            [], [], [], [], [], [], [])
+        gstatus = self.git.status()
+        for path, status in gstatus.items():
+            if status == pygit2.GIT_STATUS_IGNORED:
+                if path.endswith('/'):
+                    continue
+                ignored.append(path)
+            elif status in (pygit2.GIT_STATUS_WT_MODIFIED,
+                            pygit2.GIT_STATUS_INDEX_MODIFIED):
+                modified.append(path)
+            elif status == pygit2.GIT_STATUS_INDEX_NEW:
+                added.append(path)
+            elif status == pygit2.GIT_STATUS_WT_NEW:
+                unknown.append(path)
+            elif status == pygit2.GIT_STATUS_WT_DELETED:
+                deleted.append(path)
+            elif status == pygit2.GIT_STATUS_INDEX_DELETED:
+                removed.append(path)
+            else:
+                raise error.Abort('unhandled case: status for %r is %r' % (
+                    path, status))
+
+        # TODO are we really always sure of status here?
+        return False, scmutil.status(
+            modified, added, removed, deleted, unknown, ignored, clean)
+
+    def flagfunc(self, buildfallback):
+        # TODO we can do better
+        return buildfallback()
+
+    def getcwd(self):
+        # TODO is this a good way to do this?
+        return os.path.dirname(os.path.dirname(self.git.path))
+
+    def normalize(self, path):
+        assert util.normcase(path) == path, 'TODO handling of case folding'
+        return path
+
+    @property
+    def _checklink(self):
+        return util.checklink(os.path.dirname(self.git.path))
+
+    def copies(self):
+        # TODO support copies?
+        return {}
+
+    # # TODO what the heck is this
+    _filecache = set()
+
+    def pendingparentchange(self):
+        # TODO: we need to implement the context manager bits and
+        # correctly stage/revert index edits.
+        return False
+
+    def write(self, tr):
+        # TODO: what's the plan here?
+        pass
+
+    def normal(self, f, parentfiledata=None):
+        """Mark a file normal and clean."""
+        # TODO: for now we just let libgit2 re-stat the file. We can
+        # clearly do better.
+
+    def normallookup(self, f):
+        """Mark a file normal, but possibly dirty."""
+        # TODO: for now we just let libgit2 re-stat the file. We can
+        # clearly do better.
+
+    @property
+    def _map(self):
+        return {ie.path: None # value should be a dirstatetuple
+                for ie in self.git.index}
+
+    def walk(self, match, subrepos, unknown, ignored, full=True):
+        r = {}
+        cwd = self.getcwd()
+        for ie in self.git.index:
+            try:
+                s = os.stat(os.path.join(cwd, ie.path))
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+                continue
+            r[ie.path] = s
+        return r
+
+# it _feels_ like we could do this instead, and get this data right
+# from the git index:
+#
+# @attr.s
+# class gitstat(object):
+#     st_dev = attr.ib()
+#     st_mode = attr.ib()
+#     st_nlink = attr.ib()
+#     st_size = attr.ib()
+#     st_mtime = attr.ib()
+#     st.ctime = attr.ib()
+
+class old:
+
+    def parents(self):
+        # TODO handle merge state
+        try:
+            commit = self._repo.gitrepo['HEAD']
+        except KeyError:
+            # HEAD was missing or invalid, return nullid
+            return nodemod.nullid, nodemod.nullid
+        return nodemod.bin(commit.id), nodemod.nullid
+
+    def _read_pl(self):
+        return self.parents()
+
+    _pl = property(_read_pl, lambda *args: None)
+
+    def branch(self):
+        return 'default'
+
+    def rebuild(self, parent, files):
+        return dirstate.dirstate.rebuild(self, parent, files)
+
+    def _ignorefiles(self):
+        # TODO find all gitignore files
+        files = [self._join('.gitignore'), self._join(
+            os.path.join('.git', 'info', 'exclude'))]
+        for name, path in self._ui.configitems("ui"):
+            if name == 'ignore' or name.startswith('ignore.'):
+                files.append(util.expandpath(path))
+        return files
+
+    def walk(self, match, subrepos, unknown, ignored, full=True):
+        # wrap matchfn so it excludes all of .git - we don't want to ignore
+        # .git because then hg purge --all (or similar) might destroy the repo
+        mf = match.matchfn
+        def imatch(f):
+            if f.startswith('.git/'): return False
+            return mf(f)
+        match.matchfn = imatch
+        # This is horrible perf-wise, but prevents dirstate.walk from
+        # skipping our match function.
+        match._always = False
+        return dirstate.dirstate.walk(self, match, subrepos, unknown, ignored,
+                                      full=full)
+
+    @property
+    def _index_path(self):
+        return os.path.join(self._root, '.git', 'index')
+
+    def write(self, unused):
+        self.gitrepo.status()
+
+    def _read(self):
+        self._map = {}
+        # TODO actually handle copies
+        self._copymap = {}
+        idx = self.idx
+        p1 = self._repo[self.parents()[0]]
+        for p in idx:
+            _, mtime, _, _, mode, _, _, size, _, flags = idx[p]
+            # throw out nsecs we don't use anyway
+            try:
+                mtime, _ = mtime
+            except TypeError:
+                pass # mtime must already have been a float
+            assume_valid = bool(flags & (1 << 15))
+            update_needed = bool(flags & (1 << 14))
+            stage = (flags >> 12) & 3 # this is used during merge.
+                                    # Not sure quite what it is though.
+            state = 'n' # XXX THIS IS A LIE
+                        # this should be 'a' for adds and 'r' for removes
+
+            # git stores symlinks with a mode of 000, we need it to be 777
+            if mode == stat.S_IFLNK:
+                mode = mode | 0o777
+
+            # this is a crude hack, but makes 'hg forget' work
+            if p not in p1:
+                state = 'a'
+            self._map[p] = parsers.dirstatetuple(state, mode, size, mtime)
diff --git a/hgext/git/__init__.py b/hgext/git/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/__init__.py
@@ -0,0 +1,139 @@
+"""Grant Mercurial the ability to operate on Git repositories. (EXPERIMENTAL)
+
+This is currently super experimental. It probably will consume your
+firstborn a la Rumpelstiltskin, etc.
+"""
+
+from __future__ import absolute_import
+
+import os
+
+from mercurial import (
+    commands,
+    debugcommands,
+    extensions,
+    hg,
+    localrepo,
+    repository,
+    store,
+)
+from mercurial.utils import (
+    interfaceutil,
+)
+
+from . import (
+    dirstate,
+    gitlog,
+    index,
+)
+
+import pygit2
+
+# TODO: extract an interface for this in core
+class gitstore(object): # store.basicstore):
+    def __init__(self, path, vfstype):
+        self.vfs = vfstype(path)
+        self.path = self.vfs.base
+        self.createmode = store._calcmode(self.vfs)
+        # above lines should go away in favor of:
+        # super(gitstore, self).__init__(path, vfstype)
+
+        self.git = pygit2.Repository(os.path.normpath(
+            os.path.join(path, '..', '.git')))
+        self._db = index.get_index(self.git)
+
+    def join(self, f):
+        """Fake store.join method for git repositories.
+
+        For the most part, store.join is used for @storecache
+        decorators to invalidate caches when various files
+        change. We'll map the ones we care about, and ignore the rest.
+        """
+        if f in ('00changelog.i', '00manifest.i'):
+            # This is close enough: in order for the changelog cache
+            # to be invalidated, HEAD will have to change.
+            return os.path.join(self.path, 'HEAD')
+        elif f == 'lock':
+            # TODO: we probably want to map this to a git lock, I
+            # suspect index.lock. We should figure out what the
+            # most-alike file is in git-land. For now we're risking
+            # bad concurrency errors if another git client is used.
+            return os.path.join(self.path, 'hgit-bogus-lock')
+        elif f in ('obsstore', 'phaseroots', 'narrowspec', 'bookmarks'):
+            return os.path.join(self.path, '..', '.hg', f)
+        raise NotImplementedError('Need to pick file for %s.' % f)
+
+    def changelog(self, trypending):
+        # TODO we don't have a plan for trypending in hg's git support yet
+        return gitlog.changelog(self.git, self._db)
+
+    def manifestlog(self, repo, storenarrowmatch):
+        # TODO handle storenarrowmatch and figure out if we need the repo arg
+        return gitlog.manifestlog(self.git, self._db)
+
+    def invalidatecaches(self):
+        pass
+
+def _makestore(orig, requirements, storebasepath, vfstype):
+    if (os.path.exists(os.path.join(storebasepath, 'this-is-git'))
+        and os.path.exists(os.path.join(storebasepath, '..', '.git'))):
+        return gitstore(storebasepath, vfstype)
+    return orig(requirements, storebasepath, vfstype)
+
+class gitfilestorage(object):
+    def file(self, path):
+        if path[0:1] == b'/':
+            path = path[1:]
+        return gitlog.filelog(self.store.git, self.store._db, path)
+
+def _makefilestorage(orig, requirements, features, **kwargs):
+    store = kwargs['store']
+    if isinstance(store, gitstore):
+        return gitfilestorage
+    return orig(requirements, features, **kwargs)
+
+def _setupdothg(ui, path):
+    dothg = os.path.join(path, '.hg')
+    if os.path.exists(dothg):
+        ui.warn('git repo already initialized for hg\n')
+    else:
+        os.mkdir(os.path.join(path, b'.hg'))
+        # TODO is it ok to extend .git/info/exclude like this?
+        with open(os.path.join(path, b'.git',
+                               b'info', b'exclude'), 'ab') as exclude:
+            exclude.write(b'\n.hg\n')
+    with open(os.path.join(dothg, b'this-is-git'), 'w') as f:
+        pass
+    with open(os.path.join(dothg, b'requirements'), 'w') as f:
+        f.write(b'git\n')
+
+def init(orig, ui, dest='.', **opts):
+    if opts.get('git', False):
+        inited = False
+        path = os.path.abspath(dest)
+        # TODO: walk up looking for the git repo
+        gr = pygit2.Repository(os.path.join(path, '.git'))
+        _setupdothg(ui, path)
+        return 0 # debugcommands.debugrebuilddirstate(
+            # ui, hg.repository(ui, path), rev='.')
+    return orig(ui, dest=dest, **opts)
+
+def reposetup(ui, repo):
+    if isinstance(repo.store, gitstore):
+        orig = repo.__class__
+
+        class gitlocalrepo(orig):
+
+            def _makedirstate(self):
+                # TODO narrow support here
+                return dirstate.gitdirstate(self.ui, repo.store.git)
+
+        repo.__class__ = gitlocalrepo
+    return repo
+
+def extsetup(ui):
+    extensions.wrapfunction(localrepo, 'makestore', _makestore)
+    extensions.wrapfunction(localrepo, 'makefilestorage', _makefilestorage)
+    # Inject --git flag for `hg init`
+    entry = extensions.wrapcommand(commands.table, 'init', init)
+    entry[1].extend([('', 'git', None, 'setup up a git repository instead of hg')])



To: durin42, #hg-reviewers
Cc: mjpieters, mercurial-devel


More information about the Mercurial-devel mailing list