D6734: git: RFC of a new extension to _directly_ operate on git repositories

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Mon Dec 16 16:05:25 EST 2019


durin42 updated this revision to Diff 18757.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6734?vs=18756&id=18757

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6734/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6734

AFFECTED FILES
  hgext/git/__init__.py
  hgext/git/dirstate.py
  hgext/git/gitlog.py
  hgext/git/gitutil.py
  hgext/git/index.py
  setup.py
  tests/test-git-interop.t

CHANGE DETAILS

diff --git a/tests/test-git-interop.t b/tests/test-git-interop.t
new file mode 100644
--- /dev/null
+++ b/tests/test-git-interop.t
@@ -0,0 +1,196 @@
+This test requires pygit2:
+  > python -c 'import pygit2' || exit 80
+
+Setup:
+  > GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME
+  > GIT_AUTHOR_EMAIL='test at example.org'; export GIT_AUTHOR_EMAIL
+  > GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE
+  > GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME
+  > GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL
+  > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE
+
+  > count=10
+  > gitcommit() {
+  >    GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000";
+  >    GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"
+  >    git commit "$@" >/dev/null 2>/dev/null || echo "git commit error"
+  >    count=`expr $count + 1`
+  >  }
+
+  > echo "[extensions]" >> $HGRCPATH
+  > echo "git=" >> $HGRCPATH
+
+Make a new repo with git:
+  $ mkdir foo
+  $ cd foo
+  $ git init
+  Initialized empty Git repository in $TESTTMP/foo/.git/
+Ignore the .hg directory within git:
+  $ echo .hg >> .git/info/exclude
+  $ echo alpha > alpha
+  $ git add alpha
+  $ gitcommit -am 'Add alpha'
+  $ echo beta > beta
+  $ git add beta
+  $ gitcommit -am 'Add beta'
+  $ echo gamma > gamma
+  $ git status
+  On branch master
+  Untracked files:
+    (use "git add <file>..." to include in what will be committed)
+  	gamma
+  
+  nothing added to commit but untracked files present (use "git add" to track)
+
+Without creating the .hg, hg status fails:
+  $ hg status
+  abort: no repository found in '$TESTTMP/foo' (.hg not found)!
+  [255]
+But if you run hg init --git, it works:
+  $ hg init --git
+  $ hg id --traceback
+  3d9be8deba43 tip master
+  $ hg status
+  ? gamma
+Log works too:
+  $ hg log
+  changeset:   1:3d9be8deba43
+  bookmark:    master
+  tag:         tip
+  user:        test <test at example.org>
+  date:        Mon Jan 01 00:00:11 2007 +0000
+  summary:     Add beta
+  
+  changeset:   0:c5864c9d16fb
+  user:        test <test at example.org>
+  date:        Mon Jan 01 00:00:10 2007 +0000
+  summary:     Add alpha
+  
+
+
+and bookmarks:
+  $ hg bookmarks
+   * master                    1:3d9be8deba43
+
+diff even works transparently in both systems:
+  $ echo blah >> alpha
+  $ git diff
+  diff --git a/alpha b/alpha
+  index 4a58007..faed1b7 100644
+  --- a/alpha
+  +++ b/alpha
+  @@ -1* +1,2 @@ (glob)
+   alpha
+  +blah
+  $ hg diff --git
+  diff --git a/alpha b/alpha
+  --- a/alpha
+  +++ b/alpha
+  @@ -1,1 +1,2 @@
+   alpha
+  +blah
+
+Remove a file, it shows as such:
+  $ rm alpha
+  $ hg status
+  ! alpha
+  ? gamma
+
+Revert works:
+  $ hg revert alpha --traceback
+  $ hg status
+  ? gamma
+  $ git status
+  On branch master
+  Untracked files:
+    (use "git add <file>..." to include in what will be committed)
+  	gamma
+  
+  nothing added to commit but untracked files present (use "git add" to track)
+
+Add shows sanely in both:
+  $ hg add gamma
+  $ hg status
+  A gamma
+  $ hg files
+  alpha
+  beta
+  gamma
+  $ git ls-files
+  alpha
+  beta
+  gamma
+  $ git status
+  On branch master
+  Changes to be committed:
+    (use "git restore --staged <file>..." to unstage)
+  	new file:   gamma
+  
+
+forget does what it should as well:
+  $ hg forget gamma
+  $ hg status
+  ? gamma
+  $ git status
+  On branch master
+  Untracked files:
+    (use "git add <file>..." to include in what will be committed)
+  	gamma
+  
+  nothing added to commit but untracked files present (use "git add" to track)
+
+hg log FILE
+
+  $ echo a >> alpha
+  $ hg ci -m 'more alpha' --traceback
+  $ echo b >> beta
+  $ hg ci -m 'more beta'
+  $ echo a >> alpha
+  $ hg ci -m 'even more alpha'
+  $ hg log -G alpha
+  @  changeset:   4:bd975ddde71c
+  :  bookmark:    master
+  :  tag:         tip
+  :  user:        test <test>
+  :  date:        Thu Jan 01 00:00:00 1970 +0000
+  :  summary:     even more alpha
+  :
+  o  changeset:   2:77f597222800
+  :  user:        test <test>
+  :  date:        Thu Jan 01 00:00:00 1970 +0000
+  :  summary:     more alpha
+  :
+  o  changeset:   0:c5864c9d16fb
+     user:        test <test at example.org>
+     date:        Mon Jan 01 00:00:10 2007 +0000
+     summary:     Add alpha
+  
+  $ hg log -G beta
+  o  changeset:   3:b40d4fed5e27
+  :  user:        test <test>
+  :  date:        Thu Jan 01 00:00:00 1970 +0000
+  :  summary:     more beta
+  :
+  o  changeset:   1:3d9be8deba43
+  |  user:        test <test at example.org>
+  ~  date:        Mon Jan 01 00:00:11 2007 +0000
+     summary:     Add beta
+  
+
+hg annotate
+
+  $ hg annotate alpha
+  0: alpha
+  2: a
+  4: a
+  $ hg annotate beta
+  1: beta
+  3: b
+
+
+Files in subdirectories. TODO: case-folding support, make this `A`
+instead of `a`.
+
+  $ mkdir a
+  $ echo "This is file mu." > a/mu
+  $ hg ci -A -m 'Introduce file a/mu'
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -1212,6 +1212,7 @@
     'hgext.fsmonitor',
     'hgext.fastannotate',
     'hgext.fsmonitor.pywatchman',
+    'hgext.git',
     'hgext.highlight',
     'hgext.infinitepush',
     'hgext.largefiles',
diff --git a/hgext/git/index.py b/hgext/git/index.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/index.py
@@ -0,0 +1,346 @@
+from __future__ import absolute_import
+
+import collections
+import os
+import sqlite3
+
+import pygit2
+
+from mercurial.i18n import _
+
+from mercurial import (
+    encoding,
+    error,
+    node as nodemod,
+    pycompat,
+)
+
+from . import gitutil
+
+
+_CURRENT_SCHEMA_VERSION = 1
+_SCHEMA = (
+    """
+CREATE TABLE refs (
+  -- node and name are unique together. There may be more than one name for
+  -- a given node, and there may be no name at all for a given node (in the
+  -- case of an anonymous hg head).
+  node TEXT NOT NULL,
+  name TEXT
+);
+
+-- The "possible heads" of the repository, which we use to figure out
+-- if we need to re-walk the changelog.
+CREATE TABLE possible_heads (
+  node TEXT NOT NULL
+);
+
+-- The topological heads of the changelog, which hg depends on.
+CREATE TABLE heads (
+  node TEXT NOT NULL
+);
+
+-- A total ordering of the changelog
+CREATE TABLE changelog (
+  rev INTEGER NOT NULL PRIMARY KEY,
+  node TEXT NOT NULL,
+  p1 TEXT,
+  p2 TEXT
+);
+
+CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
+CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
+
+-- Changed files for each commit, which lets us dynamically build
+-- filelogs.
+CREATE TABLE changedfiles (
+  node TEXT NOT NULL,
+  filename TEXT NOT NULL,
+  -- 40 zeroes for deletions
+  filenode TEXT NOT NULL,
+-- to handle filelog parentage:
+  p1node TEXT,
+  p1filenode TEXT,
+  p2node TEXT,
+  p2filenode TEXT
+);
+
+CREATE INDEX changedfiles_nodes_idx
+  ON changedfiles(node);
+
+PRAGMA user_version=%d
+"""
+    % _CURRENT_SCHEMA_VERSION
+)
+
+
+def _createdb(path):
+    # print('open db', path)
+    # import traceback
+    # traceback.print_stack()
+    db = sqlite3.connect(encoding.strfromlocal(path))
+    db.text_factory = bytes
+
+    res = db.execute('PRAGMA user_version').fetchone()[0]
+
+    # New database.
+    if res == 0:
+        for statement in _SCHEMA.split(';'):
+            db.execute(statement.strip())
+
+        db.commit()
+
+    elif res == _CURRENT_SCHEMA_VERSION:
+        pass
+
+    else:
+        raise error.Abort(_(b'sqlite database has unrecognized version'))
+
+    db.execute('PRAGMA journal_mode=WAL')
+
+    return db
+
+
+_OUR_ORDER = (
+    pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_TIME | pygit2.GIT_SORT_REVERSE
+)
+
+_DIFF_FLAGS = 1 << 21  # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
+
+
+def _find_nearest_ancestor_introducing_node(
+    db, gitrepo, file_path, walk_start, filenode
+):
+    """Find the nearest ancestor that introduces a file node.
+
+    Args:
+      db: a handle to our sqlite database.
+      gitrepo: A pygit2.Repository instance.
+      file_path: the path of a file in the repo
+      walk_start: a pygit2.Oid that is a commit where we should start walking
+                  for our nearest ancestor.
+
+    Returns:
+      A hexlified SHA that is the commit ID of the next-nearest parent.
+    """
+    assert isinstance(file_path, str), 'file_path must be str, got %r' % type(
+        file_path
+    )
+    assert isinstance(filenode, str), 'filenode must be str, got %r' % type(
+        filenode
+    )
+    parent_options = {
+        row[0].decode('ascii')
+        for row in db.execute(
+            'SELECT node FROM changedfiles '
+            'WHERE filename = ? AND filenode = ?',
+            (file_path, filenode),
+        )
+    }
+    inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
+    for w in inner_walker:
+        if w.id.hex in parent_options:
+            return w.id.hex
+    raise error.ProgrammingError(
+        'Unable to find introducing commit for %s node %s from %s',
+        (file_path, filenode, walk_start),
+    )
+
+
+def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):
+    """Given a starting commit and path, fill in a filelog's parent pointers.
+
+    Args:
+      gitrepo: a pygit2.Repository
+      db: a handle to our sqlite database
+      startcommit: a hexlified node id for the commit to start at
+      path: the path of the file whose parent pointers we should fill in.
+      filenode: the hexlified node id of the file at startcommit
+
+    TODO: make filenode optional
+    """
+    assert isinstance(
+        startcommit, str
+    ), 'startcommit must be str, got %r' % type(startcommit)
+    assert isinstance(
+        startfilenode, str
+    ), 'startfilenode must be str, got %r' % type(startfilenode)
+    visit = collections.deque([(startcommit, startfilenode)])
+    while visit:
+        cnode, filenode = visit.popleft()
+        commit = gitrepo[cnode]
+        parents = []
+        for parent in commit.parents:
+            t = parent.tree
+            for comp in path.split('/'):
+                try:
+                    t = gitrepo[t[comp].id]
+                except KeyError:
+                    break
+            else:
+                introducer = _find_nearest_ancestor_introducing_node(
+                    db, gitrepo, path, parent.id, t.id.hex
+                )
+                parents.append((introducer, t.id.hex))
+        p1node = p1fnode = p2node = p2fnode = gitutil.nullgit
+        for par, parfnode in parents:
+            found = int(
+                db.execute(
+                    'SELECT COUNT(*) FROM changedfiles WHERE '
+                    'node = ? AND filename = ? AND filenode = ? AND '
+                    'p1node NOT NULL',
+                    (par, path, parfnode),
+                ).fetchone()[0]
+            )
+            if found == 0:
+                assert par is not None
+                visit.append((par, parfnode))
+        if parents:
+            p1node, p1fnode = parents[0]
+        if len(parents) == 2:
+            p2node, p2fnode = parents[1]
+        if len(parents) > 2:
+            raise error.ProgrammingError(
+                b"git support can't handle octopus merges"
+            )
+        db.execute(
+            'UPDATE changedfiles SET '
+            'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '
+            'WHERE node = ? AND filename = ? AND filenode = ?',
+            (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
+        )
+    db.commit()
+
+
+def _index_repo(gitrepo, db, progress_factory=lambda *args, **kwargs: None):
+    # Identify all references so we can tell the walker to visit all of them.
+    all_refs = gitrepo.listall_references()
+    possible_heads = set()
+    prog = progress_factory(b'refs')
+    for pos, ref in enumerate(all_refs):
+        if prog is not None:
+            prog.update(pos)
+        if not (
+            ref.startswith('refs/heads/')  # local branch
+            or ref.startswith('refs/tags/')  # tag
+            or ref.startswith('refs/remotes/')  # remote branch
+            or ref.startswith('refs/hg/')  # from this extension
+        ):
+            continue
+        try:
+            start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
+        except ValueError:
+            # No commit to be found, so we don't care for hg's purposes.
+            continue
+        possible_heads.add(start.id)
+    # Optimization: if the list of heads hasn't changed, don't
+    # reindex, the changelog. This doesn't matter on small
+    # repositories, but on even moderately deep histories (eg cpython)
+    # this is a very important performance win.
+    #
+    # TODO: we should figure out how to incrementally index history
+    # (preferably by detecting rewinds!) so that we don't have to do a
+    # full changelog walk every time a new commit is created.
+    cache_heads = {x[0] for x in db.execute('SELECT node FROM possible_heads')}
+    walker = None
+    cur_cache_heads = {h.hex for h in possible_heads}
+    if cur_cache_heads == cache_heads:
+        return
+    for start in possible_heads:
+        if walker is None:
+            walker = gitrepo.walk(start, _OUR_ORDER)
+        else:
+            walker.push(start)
+
+    # Empty out the existing changelog. Even for large-ish histories
+    # we can do the top-level "walk all the commits" dance very
+    # quickly as long as we don't need to figure out the changed files
+    # list.
+    db.execute('DELETE FROM changelog')
+    if prog is not None:
+        prog.complete()
+    prog = progress_factory(b'commits')
+    # This walker is sure to visit all the revisions in history, but
+    # only once.
+    for pos, commit in enumerate(walker):
+        if prog is not None:
+            prog.update(pos)
+        p1 = p2 = nodemod.nullhex
+        if len(commit.parents) > 2:
+            raise error.ProgrammingError(
+                (
+                    b"git support can't handle octopus merges, "
+                    b"found a commit with %d parents :("
+                )
+                % len(commit.parents)
+            )
+        if commit.parents:
+            p1 = commit.parents[0].id.hex
+        if len(commit.parents) == 2:
+            p2 = commit.parents[1].id.hex
+        db.execute(
+            'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
+            (pos, commit.id.hex, p1, p2),
+        )
+
+        num_changedfiles = db.execute(
+            "SELECT COUNT(*) from changedfiles WHERE node = ?",
+            (commit.id.hex,),
+        ).fetchone()[0]
+        if not num_changedfiles:
+            files = {}
+            # I *think* we only need to check p1 for changed files
+            # (and therefore linkrevs), because any node that would
+            # actually have this commit as a linkrev would be
+            # completely new in this rev.
+            p1 = commit.parents[0].id.hex if commit.parents else None
+            if p1 is not None:
+                patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
+            else:
+                patchgen = commit.tree.diff_to_tree(
+                    swap=True, flags=_DIFF_FLAGS
+                )
+            new_files = (p.delta.new_file for p in patchgen)
+            files = {
+                nf.path: nf.id.hex
+                for nf in new_files
+                if nf.id.raw != nodemod.nullid
+            }
+            for p, n in files.items():
+                # We intentionally set NULLs for any file parentage
+                # information so it'll get demand-computed later. We
+                # used to do it right here, and it was _very_ slow.
+                db.execute(
+                    'INSERT INTO changedfiles ('
+                    'node, filename, filenode, p1node, p1filenode, p2node, '
+                    'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
+                    (commit.id.hex, p, n, None, None, None, None),
+                )
+    db.execute('DELETE FROM heads')
+    db.execute('DELETE FROM possible_heads')
+    for hid in possible_heads:
+        h = hid.hex
+        db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))
+        haschild = db.execute(
+            'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)
+        ).fetchone()[0]
+        if not haschild:
+            db.execute('INSERT INTO heads (node) VALUES(?)', (h,))
+
+    db.commit()
+    if prog is not None:
+        prog.complete()
+
+
+def get_index(gitrepo, progress_factory=lambda *args, **kwargs: None):
+    cachepath = os.path.join(
+        pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'
+    )
+    if not os.path.exists(cachepath):
+        os.makedirs(cachepath)
+    dbpath = os.path.join(cachepath, b'git-commits.sqlite')
+    db = _createdb(dbpath)
+    # TODO check against gitrepo heads before doing a full index
+    # TODO thread a ui.progress call into this layer
+    _index_repo(gitrepo, db, progress_factory)
+    return db
diff --git a/hgext/git/gitutil.py b/hgext/git/gitutil.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/gitutil.py
@@ -0,0 +1,28 @@
+"""utilities to assist in working with pygit2"""
+from __future__ import absolute_import
+
+from mercurial.node import bin, hex, nullid
+
+from mercurial import pycompat
+
+
+def togitnode(n):
+    """Wrapper to convert a Mercurial binary node to a unicode hexlified node.
+
+    pygit2 and sqlite both need nodes as strings, not bytes.
+    """
+    assert len(n) == 20
+    if pycompat.ispy3:
+        return hex(n).decode('ascii')
+    return hex(n)
+
+
+def fromgitnode(n):
+    """Opposite of togitnode."""
+    assert len(n) == 40
+    if pycompat.ispy3:
+        return bin(n.encode('ascii'))
+    return bin(n)
+
+
+nullgit = togitnode(nullid)
diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/gitlog.py
@@ -0,0 +1,567 @@
+from __future__ import absolute_import
+
+import pygit2
+
+from mercurial.i18n import _
+
+from mercurial import (
+    ancestor,
+    changelog as hgchangelog,
+    dagop,
+    encoding,
+    error,
+    manifest,
+    match as matchmod,
+    node as nodemod,
+    pycompat,
+)
+from mercurial.interfaces import (
+    repository,
+    util as interfaceutil,
+)
+from mercurial.utils import stringutil
+from . import (
+    gitutil,
+    index,
+)
+
+
+class baselog(object):  # revlog.revlog):
+    """Common implementations between changelog and manifestlog."""
+
+    def __init__(self, gr, db):
+        self.gitrepo = gr
+        self._db = db
+
+    def __len__(self):
+        return int(
+            self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0]
+        )
+
+    def rev(self, n):
+        if n == nodemod.nullid:
+            return -1
+        t = self._db.execute(
+            'SELECT rev FROM changelog WHERE node = ?', (gitutil.togitnode(n),)
+        ).fetchone()
+        if t is None:
+            raise error.LookupError(n, b'00changelog.i', _(b'no node %d'))
+        return t[0]
+
+    def node(self, r):
+        if r == nodemod.nullrev:
+            return nodemod.nullid
+        t = self._db.execute(
+            'SELECT node FROM changelog WHERE rev = ?', (r,)
+        ).fetchone()
+        if t is None:
+            raise error.LookupError(r, b'00changelog.i', _(b'no node'))
+        return nodemod.bin(t[0])
+
+    def hasnode(self, n):
+        t = self._db.execute(
+            'SELECT node FROM changelog WHERE node = ?', (n,)
+        ).fetchone()
+        return t is not None
+
+
+class baselogindex(object):
+    def __init__(self, log):
+        self._log = log
+
+    def has_node(self, n):
+        return self._log.rev(n) != -1
+
+    def __len__(self):
+        return len(self._log)
+
+    def __getitem__(self, idx):
+        p1rev, p2rev = self._log.parentrevs(idx)
+        # TODO: it's messy that the index leaks so far out of the
+        # storage layer that we have to implement things like reading
+        # this raw tuple, which exposes revlog internals.
+        return (
+            # Pretend offset is just the index, since we don't really care.
+            idx,
+            # Same with lengths
+            idx,  # length
+            idx,  # rawsize
+            -1,  # delta base
+            idx,  # linkrev TODO is this right?
+            p1rev,
+            p2rev,
+            self._log.node(idx),
+        )
+
+
+# TODO: an interface for the changelog type?
+class changelog(baselog):
+    def __contains__(self, rev):
+        try:
+            self.node(rev)
+            return True
+        except error.LookupError:
+            return False
+
+    @property
+    def filteredrevs(self):
+        # TODO: we should probably add a refs/hg/ namespace for hidden
+        # heads etc, but that's an idea for later.
+        return set()
+
+    @property
+    def index(self):
+        return baselogindex(self)
+
+    @property
+    def nodemap(self):
+        r = {
+            nodemod.bin(v[0]): v[1]
+            for v in self._db.execute('SELECT node, rev FROM changelog')
+        }
+        r[nodemod.nullid] = nodemod.nullrev
+        return r
+
+    def tip(self):
+        t = self._db.execute(
+            'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1'
+        ).fetchone()
+        if t:
+            return nodemod.bin(t[0])
+        return nodemod.nullid
+
+    def revs(self, start=0, stop=None):
+        if stop is None:
+            stop = self.tip()
+        t = self._db.execute(
+            'SELECT rev FROM changelog '
+            'WHERE rev >= ? AND rev <= ? '
+            'ORDER BY REV ASC',
+            (start, stop),
+        )
+        return (int(r[0]) for r in t)
+
+    def _partialmatch(self, id):
+        if nodemod.wdirhex.startswith(id):
+            raise error.WdirUnsupported
+        candidates = [
+            nodemod.bin(x[0])
+            for x in self._db.execute(
+                'SELECT node FROM changelog WHERE node LIKE ?', (id + b'%',)
+            )
+        ]
+        if nodemod.nullhex.startswith(id):
+            candidates.append(nodemod.nullid)
+        if len(candidates) > 1:
+            raise error.AmbiguousPrefixLookupError(
+                id, b'00changelog.i', _(b'ambiguous identifier')
+            )
+        if candidates:
+            return candidates[0]
+        return None
+
+    def flags(self, rev):
+        return 0
+
+    def shortest(self, node, minlength=1):
+        nodehex = nodemod.hex(node)
+        for attempt in pycompat.xrange(minlength, len(nodehex) + 1):
+            candidate = nodehex[:attempt]
+            matches = int(
+                self._db.execute(
+                    'SELECT COUNT(*) FROM changelog WHERE node LIKE ?',
+                    (nodehex + b'%',),
+                ).fetchone()[0]
+            )
+            if matches == 1:
+                return candidate
+        return nodehex
+
+    def headrevs(self, revs=None):
+        realheads = [
+            int(x[0])
+            for x in self._db.execute(
+                'SELECT rev FROM changelog '
+                'INNER JOIN heads ON changelog.node = heads.node'
+            )
+        ]
+        if revs:
+            return sorted([r for r in revs if r in realheads])
+        return sorted(realheads)
+
+    def changelogrevision(self, nodeorrev):
+        # Ensure we have a node id
+        if isinstance(nodeorrev, int):
+            n = self.node(nodeorrev)
+        else:
+            n = nodeorrev
+        # handle looking up nullid
+        if n == nodemod.nullid:
+            return hgchangelog._changelogrevision(extra={})
+        hn = gitutil.togitnode(n)
+        # We've got a real commit!
+        files = [
+            r[0]
+            for r in self._db.execute(
+                'SELECT filename FROM changedfiles '
+                'WHERE node = ? and filenode != ?',
+                (hn, gitutil.nullgit),
+            )
+        ]
+        filesremoved = [
+            r[0]
+            for r in self._db.execute(
+                'SELECT filename FROM changedfiles '
+                'WHERE node = ? and filenode = ?',
+                (hn, nodemod.nullhex),
+            )
+        ]
+        c = self.gitrepo[hn]
+        return hgchangelog._changelogrevision(
+            manifest=n,  # pretend manifest the same as the commit node
+            user=b'%s <%s>'
+            % (c.author.name.encode('utf8'), c.author.email.encode('utf8')),
+            # TODO: a fuzzy memory from hg-git hacking says this should be -offset
+            date=(c.author.time, c.author.offset),
+            files=files,
+            # TODO filesadded in the index
+            filesremoved=filesremoved,
+            description=c.message.encode('utf8'),
+            # TODO do we want to handle extra? how?
+            extra={b'branch': b'default'},
+        )
+
+    def ancestors(self, revs, stoprev=0, inclusive=False):
+        revs = list(revs)
+        tip = self.rev(self.tip())
+        for r in revs:
+            if r > tip:
+                raise IndexError(b'Invalid rev %r' % r)
+        return ancestor.lazyancestors(
+            self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive
+        )
+
+    # Cleanup opportunity: this is *identical* to the revlog.py version
+    def descendants(self, revs):
+        return dagop.descendantrevs(revs, self.revs, self.parentrevs)
+
+    def reachableroots(self, minroot, heads, roots, includepath=False):
+        return dagop._reachablerootspure(
+            self.parentrevs, minroot, roots, heads, includepath
+        )
+
+    # Cleanup opportunity: this is *identical* to the revlog.py version
+    def isancestor(self, a, b):
+        a, b = self.rev(a), self.rev(b)
+        return self.isancestorrev(a, b)
+
+    # Cleanup opportunity: this is *identical* to the revlog.py version
+    def isancestorrev(self, a, b):
+        if a == nodemod.nullrev:
+            return True
+        elif a == b:
+            return True
+        elif a > b:
+            return False
+        return bool(self.reachableroots(a, [b], [a], includepath=False))
+
+    def parentrevs(self, rev):
+        n = self.node(rev)
+        hn = gitutil.togitnode(n)
+        c = self.gitrepo[hn]
+        p1 = p2 = nodemod.nullrev
+        if c.parents:
+            p1 = self.rev(c.parents[0].id.raw)
+            if len(c.parents) > 2:
+                raise error.Abort(b'TODO octopus merge handling')
+            if len(c.parents) == 2:
+                p2 = self.rev(c.parents[0].id.raw)
+        return p1, p2
+
+    # Private method is used at least by the tags code.
+    _uncheckedparentrevs = parentrevs
+
+    def commonancestorsheads(self, a, b):
+        # TODO the revlog verson of this has a C path, so we probably
+        # need to optimize this...
+        a, b = self.rev(a), self.rev(b)
+        return [
+            self.node(n)
+            for n in ancestor.commonancestorsheads(self.parentrevs, a, b)
+        ]
+
+    def branchinfo(self, rev):
+        """Git doesn't do named branches, so just put everything on default."""
+        return b'default', False
+
+    def delayupdate(self, tr):
+        # TODO: I think we can elide this because we're just dropping
+        # an object in the git repo?
+        pass
+
+    def add(
+        self,
+        manifest,
+        files,
+        desc,
+        transaction,
+        p1,
+        p2,
+        user,
+        date=None,
+        extra=None,
+        p1copies=None,
+        p2copies=None,
+        filesadded=None,
+        filesremoved=None,
+    ):
+        parents = []
+        hp1, hp2 = gitutil.togitnode(p1), gitutil.togitnode(p2)
+        if p1 != nodemod.nullid:
+            parents.append(hp1)
+        if p2 and p2 != nodemod.nullid:
+            parents.append(hp2)
+        assert date is not None
+        timestamp, tz = date
+        sig = pygit2.Signature(
+            encoding.unifromlocal(stringutil.person(user)),
+            encoding.unifromlocal(stringutil.email(user)),
+            timestamp,
+            tz,
+        )
+        oid = self.gitrepo.create_commit(
+            None, sig, sig, desc, gitutil.togitnode(manifest), parents
+        )
+        # Set up an internal reference to force the commit into the
+        # changelog. Hypothetically, we could even use this refs/hg/
+        # namespace to allow for anonymous heads on git repos, which
+        # would be neat.
+        self.gitrepo.references.create(
+            'refs/hg/internal/latest-commit', oid, force=True
+        )
+        # Reindex now to pick up changes. We omit the progress
+        # callback because this will be very quick.
+        index._index_repo(self.gitrepo, self._db)
+        return oid.raw
+
+
+# TODO: Make a split between mutable and immutable manifest types here.
+class gittreemanifest(object):
+    def __init__(self, gt, builderfn):
+        self._builderfn = builderfn
+        self._tree = gt
+        self._builder = None
+
+    def __contains__(self, k):
+        k = pycompat.fsdecode(k)
+        if self._builder:
+            return self._builder.get(k) is not None
+        return k in self._tree
+
+    def __getitem__(self, k):
+        if self._builder:
+            match = self._builder.get(k)
+            if match is None:
+                raise error.LookupError(
+                    b'File %r not found in tree %r' % (k, self._tree.id.hex)
+                )
+            return match
+        try:
+            return self._tree[k].id.raw
+        except ValueError:
+            raise error.LookupError(
+                b'File %r not found in tree %r' % (k, self._tree.id.hex)
+            )
+
+    def __setitem__(self, k, v):
+        if self._builder is None:
+            self._builder = self._builderfn()
+        self._builder.insert(
+            pycompat.fsdecode(k), gitutil.togitnode(v), pygit2.GIT_FILEMODE_BLOB
+        )
+
+    def setflag(self, p, flag):
+        up = pycompat.fsdecode(p)
+        oid = self._builder.get(up).id
+        if not flag:
+            self._builder.insert(up, oid, pygit2.GIT_FILEMODE_BLOB)
+        elif flag == b'x':
+            self._builder.insert(up, oid, pygit2.GIT_FILEMODE_BLOB_EXECUTABLE)
+        elif flag == b'l':
+            self._builder.insert(up, oid, pygit2.GIT_FILEMODE_LINK)
+        else:
+            raise ValueError(b'Illegal flag value %r on path %r' % flag, p)
+
+    def flags(self, k):
+        # TODO flags handling
+        return b''
+
+    def _walkonetree(self, tree, match, subdir):
+        for te in tree:
+            # TODO: can we prune dir walks with the matcher?
+            realname = subdir + pycompat.fsencode(te.name)
+            if te.type == r'tree':
+                for inner in self._walkonetree(
+                    self.gitrepo[te.id], match, realname + b'/'
+                ):
+                    yield inner
+            if not match(realname):
+                continue
+            yield pycompat.fsencode(realname)
+
+    def walk(self, match):
+        return self._walkonetree(self._tree, match, b'')
+
+    def get(self, fname, default=None):
+        if fname in self:
+            return self[fname]
+        return default
+
+
+ at interfaceutil.implementer(repository.imanifestrevisionstored)
+class gittreemanifestctx(object):
+    def __init__(self, repo, gittree):
+        self._repo = repo
+        self._tree = gittree
+        self._builder = None
+
+    def _getbuilder(self):
+        if self._builder is None:
+            self._builder = self._repo.TreeBuilder(self._tree)
+        return self._builder
+
+    def read(self):
+        return gittreemanifest(self._tree, self._getbuilder)
+
+    def find(self, path):
+        self.read()[path]
+
+    def copy(self):
+        return gittreemanifestctx(self._repo, self._tree)
+
+    def write(self, transaction, link, p1, p2, added, removed, match=None):
+        # We're not (for now, anyway) going to audit filenames, so we
+        # can ignore added and removed.
+
+        # TODO what does this match argument get used for? hopefully
+        # just narrow?
+        assert not match or isinstance(match, matchmod.alwaysmatcher)
+        return self._getbuilder().write().raw
+
+
+class manifestlog(baselog):
+    def __getitem__(self, node):
+        return self.get(b'', node)
+
+    def get(self, relpath, node):
+        if node == nodemod.nullid:
+            return manifest.memtreemanifestctx(self, relpath)
+        commit = self.gitrepo[gitutil.togitnode(node)]
+        t = commit.tree
+        if relpath:
+            parts = relpath.split(b'/')
+            for p in parts:
+                te = t[p]
+                t = self.gitrepo[te.id]
+        return gittreemanifestctx(self.gitrepo, t)
+
+
+ at interfaceutil.implementer(repository.ifilestorage)
+class filelog(baselog):
+    def __init__(self, gr, db, path):
+        super(filelog, self).__init__(gr, db)
+        assert isinstance(path, bytes)
+        self.path = path
+
+    def read(self, node):
+        if node == nodemod.nullid:
+            return b''
+        return self.gitrepo[gitutil.togitnode(node)].data
+
+    def lookup(self, node):
+        if len(node) not in (20, 40):
+            node = int(node)
+        if isinstance(node, int):
+            assert False, b'todo revnums for nodes'
+        if len(node) == 40:
+            node = nodemod.bin(node)
+        hnode = gitutil.togitnode(node)
+        if hnode in self.gitrepo:
+            return node
+        raise error.LookupError(self.path, node, _(b'no match found'))
+
+    def cmp(self, node, text):
+        """Returns True if text is different than content at `node`."""
+        return self.read(node) != text
+
+    def add(self, text, meta, transaction, link, p1=None, p2=None):
+        assert not meta  # Should we even try to handle this?
+        return self.gitrepo.create_blob(text).raw
+
+    def __iter__(self):
+        for clrev in self._db.execute(
+            '''
+SELECT rev FROM changelog
+INNER JOIN changedfiles ON changelog.node = changedfiles.node
+WHERE changedfiles.filename = ? AND changedfiles.filenode != ?
+        ''',
+            (pycompat.fsdecode(self.path), gitutil.nullgit),
+        ):
+            yield clrev[0]
+
+    def linkrev(self, fr):
+        return fr
+
+    def rev(self, node):
+        row = self._db.execute(
+            '''
+SELECT rev FROM changelog
+INNER JOIN changedfiles ON changelog.node = changedfiles.node
+WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''',
+            (pycompat.fsdecode(self.path), gitutil.togitnode(node)),
+        ).fetchone()
+        if row is None:
+            raise error.LookupError(self.path, node, _(b'no such node'))
+        return int(row[0])
+
+    def node(self, rev):
+        maybe = self._db.execute(
+            '''SELECT filenode FROM changedfiles
+INNER JOIN changelog ON changelog.node = changedfiles.node
+WHERE changelog.rev = ? AND filename = ?
+''',
+            (rev, pycompat.fsdecode(self.path)),
+        ).fetchone()
+        if maybe is None:
+            raise IndexError('gitlog %r out of range %d' % (self.path, rev))
+        return nodemod.bin(maybe[0])
+
+    def parents(self, node):
+        gn = gitutil.togitnode(node)
+        gp = pycompat.fsdecode(self.path)
+        ps = []
+        for p in self._db.execute(
+            '''SELECT p1filenode, p2filenode FROM changedfiles
+WHERE filenode = ? AND filename = ?
+''',
+            (gn, gp),
+        ).fetchone():
+            if p is None:
+                commit = self._db.execute(
+                    "SELECT node FROM changedfiles "
+                    "WHERE filenode = ? AND filename = ?",
+                    (gn, gp),
+                ).fetchone()[0]
+                # This filelog is missing some data. Build the
+                # filelog, then recurse (which will always find data).
+                if pycompat.ispy3:
+                    commit = commit.decode('ascii')
+                index.fill_in_filelog(self.gitrepo, self._db, commit, gp, gn)
+                return self.parents(node)
+            else:
+                ps.append(nodemod.bin(p))
+        return ps
+
+    def renamed(self, node):
+        # TODO: renames/copies
+        return False
diff --git a/hgext/git/dirstate.py b/hgext/git/dirstate.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/dirstate.py
@@ -0,0 +1,283 @@
+from __future__ import absolute_import
+
+import contextlib
+import errno
+import os
+
+import pygit2
+
+from mercurial import (
+    error,
+    extensions,
+    match as matchmod,
+    node as nodemod,
+    pycompat,
+    scmutil,
+    util,
+)
+from mercurial.interfaces import (
+    dirstate as intdirstate,
+    util as interfaceutil,
+)
+
+from . import gitutil
+
+
+def readpatternfile(orig, filepath, warn, sourceinfo=False):
+    if not (b'info/exclude' in filepath or filepath.endswith(b'.gitignore')):
+        return orig(filepath, warn, sourceinfo=False)
+    result = []
+    warnings = []
+    with open(filepath, b'rb') as fp:
+        for l in fp:
+            l = l.strip()
+            if not l or l.startswith(b'#'):
+                continue
+            if l.startswith(b'!'):
+                # on reflection, I think /foo is just glob:
+                warnings.append(b'unsupported ignore pattern %s' % l)
+                continue
+            if l.startswith(b'/'):
+                result.append(b'glob:' + l[1:])
+            else:
+                result.append(b'relglob:' + l)
+    return result, warnings
+
+
+extensions.wrapfunction(matchmod, b'readpatternfile', readpatternfile)
+
+
+_STATUS_MAP = {
+    pygit2.GIT_STATUS_CONFLICTED: b'm',
+    pygit2.GIT_STATUS_CURRENT: b'n',
+    pygit2.GIT_STATUS_IGNORED: b'?',
+    pygit2.GIT_STATUS_INDEX_DELETED: b'r',
+    pygit2.GIT_STATUS_INDEX_MODIFIED: b'n',
+    pygit2.GIT_STATUS_INDEX_NEW: b'a',
+    pygit2.GIT_STATUS_INDEX_RENAMED: b'a',
+    pygit2.GIT_STATUS_INDEX_TYPECHANGE: b'n',
+    pygit2.GIT_STATUS_WT_DELETED: b'r',
+    pygit2.GIT_STATUS_WT_MODIFIED: b'n',
+    pygit2.GIT_STATUS_WT_NEW: b'?',
+    pygit2.GIT_STATUS_WT_RENAMED: b'a',
+    pygit2.GIT_STATUS_WT_TYPECHANGE: b'n',
+    pygit2.GIT_STATUS_WT_UNREADABLE: b'?',
+    pygit2.GIT_STATUS_INDEX_MODIFIED | pygit2.GIT_STATUS_WT_MODIFIED: 'm',
+}
+
+
+ at interfaceutil.implementer(intdirstate.idirstate)
+class gitdirstate(object):
+    def __init__(self, ui, root, gitrepo):
+        self._ui = ui
+        self._root = os.path.dirname(root)
+        self.git = gitrepo
+
+    def p1(self):
+        return self.git.head.peel().id.raw
+
+    def p2(self):
+        # TODO: MERGE_HEAD? something like that, right?
+        return nodemod.nullid
+
+    def setparents(self, p1, p2=nodemod.nullid):
+        assert p2 == nodemod.nullid, b'TODO merging support'
+        self.git.head.set_target(gitutil.togitnode(p1))
+
+    @util.propertycache
+    def identity(self):
+        self.identity = util.filestat.frompath(
+            os.path.join(self.root, b'.git', b'index')
+        )
+
+    def branch(self):
+        return b'default'
+
+    def parents(self):
+        # TODO how on earth do we find p2 if a merge is in flight?
+        return self.p1(), nodemod.nullid
+
+    def __iter__(self):
+        return (pycompat.fsencode(f.path) for f in self.git.index)
+
+    def items(self):
+        for ie in self.git.index:
+            yield ie.path, None  # value should be a dirstatetuple
+
+    # py2,3 compat forward
+    iteritems = items
+
+    def __getitem__(self, filename):
+        try:
+            gs = self.git.status_file(filename)
+        except KeyError:
+            return b'?'
+        return _STATUS_MAP[gs]
+
+    def __contains__(self, filename):
+        try:
+            gs = self.git.status_file(filename)
+            return _STATUS_MAP[gs] != b'?'
+        except KeyError:
+            return False
+
+    def status(self, match, subrepos, ignored, clean, unknown):
+        # TODO handling of clean files - can we get that from git.status()?
+        modified, added, removed, deleted, unknown, ignored, clean = (
+            [],
+            [],
+            [],
+            [],
+            [],
+            [],
+            [],
+        )
+        gstatus = self.git.status()
+        for path, status in gstatus.items():
+            path = pycompat.fsencode(path)
+            if status == pygit2.GIT_STATUS_IGNORED:
+                if path.endswith(b'/'):
+                    continue
+                ignored.append(path)
+            elif status in (
+                pygit2.GIT_STATUS_WT_MODIFIED,
+                pygit2.GIT_STATUS_INDEX_MODIFIED,
+                pygit2.GIT_STATUS_WT_MODIFIED
+                | pygit2.GIT_STATUS_INDEX_MODIFIED,
+            ):
+                modified.append(path)
+            elif status == pygit2.GIT_STATUS_INDEX_NEW:
+                added.append(path)
+            elif status == pygit2.GIT_STATUS_WT_NEW:
+                unknown.append(path)
+            elif status == pygit2.GIT_STATUS_WT_DELETED:
+                deleted.append(path)
+            elif status == pygit2.GIT_STATUS_INDEX_DELETED:
+                removed.append(path)
+            else:
+                raise error.Abort(
+                    b'unhandled case: status for %r is %r' % (path, status)
+                )
+
+        # TODO are we really always sure of status here?
+        return (
+            False,
+            scmutil.status(
+                modified, added, removed, deleted, unknown, ignored, clean
+            ),
+        )
+
+    def flagfunc(self, buildfallback):
+        # TODO we can do better
+        return buildfallback()
+
+    def getcwd(self):
+        # TODO is this a good way to do this?
+        return os.path.dirname(
+            os.path.dirname(pycompat.fsencode(self.git.path))
+        )
+
+    def normalize(self, path):
+        normed = util.normcase(path)
+        assert normed == path, b"TODO handling of case folding: %s != %s" % (
+            normed,
+            path,
+        )
+        return path
+
+    @property
+    def _checklink(self):
+        return util.checklink(os.path.dirname(pycompat.fsencode(self.git.path)))
+
+    def copies(self):
+        # TODO support copies?
+        return {}
+
+    # # TODO what the heck is this
+    _filecache = set()
+
+    def pendingparentchange(self):
+        # TODO: we need to implement the context manager bits and
+        # correctly stage/revert index edits.
+        return False
+
+    def write(self, tr):
+
+        if tr:
+
+            def writeinner(category):
+                self.git.index.write()
+
+            tr.addpending(b'gitdirstate', writeinner)
+        else:
+            self.git.index.write()
+
+    def pathto(self, f, cwd=None):
+        if cwd is None:
+            cwd = self.getcwd()
+        # TODO core dirstate does something about slashes here
+        assert isinstance(f, bytes)
+        r = util.pathto(self._root, cwd, f)
+        return r
+
+    def matches(self, match):
+        for x in self.git.index:
+            p = pycompat.fsencode(x.path)
+            if match(p):
+                yield p
+
+    def normal(self, f, parentfiledata=None):
+        """Mark a file normal and clean."""
+        # TODO: for now we just let libgit2 re-stat the file. We can
+        # clearly do better.
+
+    def normallookup(self, f):
+        """Mark a file normal, but possibly dirty."""
+        # TODO: for now we just let libgit2 re-stat the file. We can
+        # clearly do better.
+
+    def walk(self, match, subrepos, unknown, ignored, full=True):
+        # TODO: we need to use .status() and not iterate the index,
+        # because the index doesn't force a re-walk and so `hg add` of
+        # a new file without an intervening call to status will
+        # silently do nothing.
+        r = {}
+        cwd = self.getcwd()
+        for path, status in self.git.status().items():
+            if path.startswith('.hg/'):
+                continue
+            path = pycompat.fsencode(path)
+            if not match(path):
+                continue
+            # TODO construct the stat info from the status object?
+            try:
+                s = os.stat(os.path.join(cwd, path))
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+                continue
+            r[path] = s
+        return r
+
+    def savebackup(self, tr, backupname):
+        # TODO: figure out a strategy for saving index backups.
+        pass
+
+    def restorebackup(self, tr, backupname):
+        # TODO: figure out a strategy for saving index backups.
+        pass
+
+    def add(self, f):
+        self.git.index.add(pycompat.fsdecode(f))
+
+    def drop(self, f):
+        self.git.index.remove(pycompat.fsdecode(f))
+
+    def copied(self, path):
+        # TODO: track copies?
+        return None
+
+    @contextlib.contextmanager
+    def parentchange(self):
+        # TODO: track this maybe?
+        yield
diff --git a/hgext/git/__init__.py b/hgext/git/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/__init__.py
@@ -0,0 +1,251 @@
+"""grant Mercurial the ability to operate on Git repositories. (EXPERIMENTAL)
+
+This is currently super experimental. It probably will consume your
+firstborn a la Rumpelstiltskin, etc.
+"""
+
+from __future__ import absolute_import
+
+import os
+
+import pygit2
+
+from mercurial.i18n import _
+
+from mercurial import (
+    commands,
+    error,
+    extensions,
+    localrepo,
+    pycompat,
+    store,
+)
+
+from . import (
+    dirstate,
+    gitlog,
+    gitutil,
+    index,
+)
+
+
+# TODO: extract an interface for this in core
+class gitstore(object):  # store.basicstore):
+    def __init__(self, path, vfstype):
+        self.vfs = vfstype(path)
+        self.path = self.vfs.base
+        self.createmode = store._calcmode(self.vfs)
+        # above lines should go away in favor of:
+        # super(gitstore, self).__init__(path, vfstype)
+
+        self.git = pygit2.Repository(
+            os.path.normpath(os.path.join(path, b'..', b'.git'))
+        )
+        self._progress_factory = lambda *args, **kwargs: None
+        self._db_handle = None
+
+    @property
+    def _db(self):
+        # We lazy-create the database because we want to thread a
+        # progress callback down to the indexing process if it's
+        # required, and we don't have a ui handle in makestore().
+        if self._db_handle is None:
+            self._db_handle = index.get_index(self.git, self._progress_factory)
+        return self._db_handle
+
+    def join(self, f):
+        """Fake store.join method for git repositories.
+
+        For the most part, store.join is used for @storecache
+        decorators to invalidate caches when various files
+        change. We'll map the ones we care about, and ignore the rest.
+        """
+        if f in (b'00changelog.i', b'00manifest.i'):
+            # This is close enough: in order for the changelog cache
+            # to be invalidated, HEAD will have to change.
+            return os.path.join(self.path, b'HEAD')
+        elif f == b'lock':
+            # TODO: we probably want to map this to a git lock, I
+            # suspect index.lock. We should figure out what the
+            # most-alike file is in git-land. For now we're risking
+            # bad concurrency errors if another git client is used.
+            return os.path.join(self.path, b'hgit-bogus-lock')
+        elif f in (b'obsstore', b'phaseroots', b'narrowspec', b'bookmarks'):
+            return os.path.join(self.path, b'..', b'.hg', f)
+        raise NotImplementedError(b'Need to pick file for %s.' % f)
+
+    def changelog(self, trypending):
+        # TODO we don't have a plan for trypending in hg's git support yet
+        return gitlog.changelog(self.git, self._db)
+
+    def manifestlog(self, repo, storenarrowmatch):
+        # TODO handle storenarrowmatch and figure out if we need the repo arg
+        return gitlog.manifestlog(self.git, self._db)
+
+    def invalidatecaches(self):
+        pass
+
+    def write(self, tr=None):
+        # normally this handles things like fncache writes, which we don't have
+        pass
+
+
+def _makestore(orig, requirements, storebasepath, vfstype):
+    if os.path.exists(
+        os.path.join(storebasepath, b'this-is-git')
+    ) and os.path.exists(os.path.join(storebasepath, b'..', b'.git')):
+        return gitstore(storebasepath, vfstype)
+    return orig(requirements, storebasepath, vfstype)
+
+
+class gitfilestorage(object):
+    def file(self, path):
+        if path[0:1] == b'/':
+            path = path[1:]
+        return gitlog.filelog(self.store.git, self.store._db, path)
+
+
+def _makefilestorage(orig, requirements, features, **kwargs):
+    store = kwargs['store']
+    if isinstance(store, gitstore):
+        return gitfilestorage
+    return orig(requirements, features, **kwargs)
+
+
+def _setupdothg(ui, path):
+    dothg = os.path.join(path, b'.hg')
+    if os.path.exists(dothg):
+        ui.warn(_(b'git repo already initialized for hg\n'))
+    else:
+        os.mkdir(os.path.join(path, b'.hg'))
+        # TODO is it ok to extend .git/info/exclude like this?
+        with open(
+            os.path.join(path, b'.git', b'info', b'exclude'), 'ab'
+        ) as exclude:
+            exclude.write(b'\n.hg\n')
+    with open(os.path.join(dothg, b'this-is-git'), 'wb') as f:
+        pass
+    with open(os.path.join(dothg, b'requirements'), 'wb') as f:
+        f.write(b'git\n')
+
+
+_BMS_PREFIX = 'refs/heads/'
+
+
+class gitbmstore(object):
+    def __init__(self, gitrepo):
+        self.gitrepo = gitrepo
+
+    def __contains__(self, name):
+        return (
+            _BMS_PREFIX + pycompat.fsdecode(name)
+        ) in self.gitrepo.references
+
+    def __iter__(self):
+        for r in self.gitrepo.listall_references():
+            if r.startswith(_BMS_PREFIX):
+                yield pycompat.fsencode(r[len(_BMS_PREFIX) :])
+
+    def __getitem__(self, k):
+        return (
+            self.gitrepo.references[_BMS_PREFIX + pycompat.fsdecode(k)]
+            .peel()
+            .id.raw
+        )
+
+    def get(self, k, default=None):
+        try:
+            if k in self:
+                return self[k]
+            return default
+        except pygit2.InvalidSpecError:
+            return default
+
+    @property
+    def active(self):
+        h = self.gitrepo.references['HEAD']
+        if not isinstance(h.target, str) or not h.target.startswith(
+            _BMS_PREFIX
+        ):
+            return None
+        return pycompat.fsencode(h.target[len(_BMS_PREFIX) :])
+
+    @active.setter
+    def active(self, mark):
+        raise NotImplementedError
+
+    def names(self, node):
+        r = []
+        for ref in self.gitrepo.listall_references():
+            if not ref.startswith(_BMS_PREFIX):
+                continue
+            if self.gitrepo.references[ref].peel().id.raw != node:
+                continue
+            r.append(pycompat.fsencode(ref[len(_BMS_PREFIX) :]))
+        return r
+
+    # Cleanup opportunity: this is *identical* to core's bookmarks store.
+    def expandname(self, bname):
+        if bname == b'.':
+            if self.active:
+                return self.active
+            raise error.RepoLookupError(_(b"no active bookmark"))
+        return bname
+
+    def applychanges(self, repo, tr, changes):
+        """Apply a list of changes to bookmarks
+        """
+        # TODO: this should respect transactions, but that's going to
+        # require enlarging the gitbmstore to know how to do in-memory
+        # temporary writes and read those back prior to transaction
+        # finalization.
+        for name, node in changes:
+            if node is None:
+                self.gitrepo.references.delete(
+                    _BMS_PREFIX + pycompat.fsdecode(name)
+                )
+            else:
+                self.gitrepo.references.create(
+                    _BMS_PREFIX + pycompat.fsdecode(name),
+                    gitutil.togitnode(node),
+                    force=True,
+                )
+
+
+def init(orig, ui, dest=b'.', **opts):
+    if opts.get('git', False):
+        path = os.path.abspath(dest)
+        # TODO: walk up looking for the git repo
+        _setupdothg(ui, path)
+        return 0
+    return orig(ui, dest=dest, **opts)
+
+
+def reposetup(ui, repo):
+    if isinstance(repo.store, gitstore):
+        orig = repo.__class__
+        repo.store._progress_factory = repo.ui.makeprogress
+
+        class gitlocalrepo(orig):
+            def _makedirstate(self):
+                # TODO narrow support here
+                return dirstate.gitdirstate(
+                    self.ui, self.vfs.base, self.store.git
+                )
+
+            @property
+            def _bookmarks(self):
+                return gitbmstore(self.store.git)
+
+        repo.__class__ = gitlocalrepo
+    return repo
+
+
+def extsetup(ui):
+    extensions.wrapfunction(localrepo, b'makestore', _makestore)
+    extensions.wrapfunction(localrepo, b'makefilestorage', _makefilestorage)
+    # Inject --git flag for `hg init`
+    entry = extensions.wrapcommand(commands.table, b'init', init)
+    entry[1].extend(
+        [(b'', b'git', None, b'setup up a git repository instead of hg')]
+    )



To: durin42, #hg-reviewers
Cc: sheehan, rom1dep, JordiGH, hollisb, mjpieters, mercurial-devel


More information about the Mercurial-devel mailing list