[PATCH remotenames-ext] pull: add option for taking over head discovery

Tue Jun 27 23:36:36 UTC 2017

# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1498606517 25200
#      Tue Jun 27 16:35:17 2017 -0700
# Node ID 5b14e873f26326c0659727869b02d598c1db3099
# Parent  67ef597998876ab259d87558776b68b391bd2364
pull: add option for taking over head discovery

In core Mercurial, setdiscovery.findcommonheads is responsible for figuring out
which local commits are also on the server. With remotenames, we're tracking
which commits are on the server each time we pull, so this data could be used to
answer this question more efficiently.

It is currently on by default. In the worst case, if the local remote names are
old, it means the server may send more data than necessary, which is a no-op on
clients that already have that data.

On large repo's with lots of commits, this can shave 5-20s off of certain pulls.

diff --git a/remotenames.py b/remotenames.py
--- a/remotenames.py
+++ b/remotenames.py
@@ -36,6 +36,7 @@ from mercurial import repair
 from mercurial import repoview
 from mercurial import revset
 from mercurial import scmutil
+from mercurial import setdiscovery
 from mercurial import templatekw
 from mercurial import url
 from mercurial import util
@@ -173,6 +174,72 @@ def expull(orig, repo, remote, *args, **
                 f.write('enabled') # content doesn't matter
     return res
 
+def exfindcommonheads(orig, ui, local, remote, **kwargs):
+    '''Return a tuple (common, anyincoming, remoteheads) used to identify
+    missing nodes from or in remote.
+    '''
+    # The normal findcommonheads implementation tries to find the exact boundary
+    # between what the client has and what the server has. With remotenames, we
+    # have pretty good knowledge about what local commits already exist on the
+    # server, so we can short circuit all the discovery logic by just assuming
+    # the current remotenames are representative of what's on the server. In the
+    # worst case the data might be slightly out of sync and the server sends us
+    # more data than necessary, but this should be rare.
+    if not ui.configbool('remotenames', 'fastheaddiscovery', True):
+        return orig(ui, local, remote, **kwargs)
+
+    cl = local.changelog
+
+    remotepath = activepath(local.ui, remote)
+    remotenodes = []
+    for node, nametype, remotename, rname in readremotenames(local):
+        # Note: It's important that this excludes hidden commits (by doing
+        # node in local), since the callers assume all nodes in common are
+        # visible.
+        node = bin(node)
+        if remotename == remotepath and node in local:
+            remotenodes.append(node)
+
+    # If we have no remotenames, fallback to normal discovery.
+    if not remotenodes:
+        return orig(ui, local, remote, **kwargs)
+
+    remotenodes = set(remotenodes)
+
+    # Check which remote nodes still exist on the server
+    ui.status(_("searching for changes\n"))
+    batch = remote.iterbatch()
+    batch.heads()
+    batch.known(remotenodes)
+    batch.submit()
+    srvheadhashes, yesno = batch.results()
+    common = list(n for i, n in enumerate(remotenodes) if yesno[i])
+
+    # If we don't know of any server commits, fall back to legacy discovery
+    if not common:
+        # If this path is hit, it will print "searching for changes" twice,
+        # which is weird. This should be very rare though, since it only happens
+        # if the client has remote names, but none of those names exist on the
+        # server (i.e. the server has been completely replaced, or stripped).
+        ui.status(_("server has changed since last pull - falling back to the "
+                    "default search strategy\n"))
+        return orig(ui, local, remote, **kwargs)
+
+    if cl.tip() == nullid:
+        if srvheadhashes != [nullid]:
+            return [nullid], True, srvheadhashes
+        return ([nullid], False, [])
+
+    # early exit if we know all the specified remote heads already
+    clrev = cl.rev
+    clcontains = cl.__contains__
+    srvheads = list(clrev(n) for n in srvheadhashes if clcontains(n))
+    if len(srvheads) == len(srvheadhashes):
+        ui.debug("all remote heads known locally\n")
+        return (srvheadhashes, False, srvheadhashes)
+
+    return (common, True, srvheadhashes)
+
 def pullremotenames(repo, remote, bookmarks):
     path = activepath(repo.ui, remote)
     if path:
@@ -646,6 +713,7 @@ def extsetup(ui):
     extensions.wrapfunction(exchange.pushoperation, '__init__', expushop)
     extensions.wrapfunction(exchange, 'push', expush)
     extensions.wrapfunction(exchange, 'pull', expull)
+    extensions.wrapfunction(setdiscovery, 'findcommonheads', exfindcommonheads)
     # _getdynamicblockers was renamed to pinnedrevs in 4.3
     blockername = 'pinnedrevs'
     if not util.safehasattr(repoview, blockername):
diff --git a/tests/test-pushto.t b/tests/test-pushto.t
--- a/tests/test-pushto.t
+++ b/tests/test-pushto.t
@@ -215,6 +215,8 @@ Test that rebasing and pushing works as 
   $ hg pull
   pulling from $TESTTMP/repo1 (glob)
   searching for changes
+  server has changed since last pull - falling back to the default search strategy
+  searching for changes
   adding changesets
   adding manifests
   adding file changes