[PATCH] discovery: avoid discovery when local graph is a subset of remote

Peter Arrenbrecht peter.arrenbrecht at gmail.com
Mon Mar 14 04:06:18 CDT 2011


# HG changeset patch
# User Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
# Date 1300093192 -3600
discovery: avoid discovery when local graph is a subset of remote

The idea is to immediately send local's heads to the server to check whether the server
knows them all. If it does, we can request a changegroup immediately. This required the
introduction of two new wireproto calls:

  known([Node]) -> [1/0]
    Returns 1/0 for each node, indicating whether it's known by the server.

  changegroupdiff(commonheads, heads, ...)
    Returns the changegroup for everything ancestral of heads, but not ancestral of
    commonheads.

Both are actually necessary building blocks for the further tweaks we have planned for
discovery. They are both guarded by the new capability 'changegroupdiff'.

Interesting test output changes are:

-  added 1 changesets with 0 changes to 1 files (+1 heads)
+  added 1 changesets with 0 changes to 0 files (+1 heads)

-> The new changegroupdiff() actually fixes a bug vs. changegroupsubset()
in that it no longer returns unnecessary files when file revs are reused.

warning: repository is unrelated
+  requesting all changes

-> The new use of common instead of bases correctly indicates that an unrelated pull
gets all changes from the server.

diff --git a/hgext/transplant.py b/hgext/transplant.py
--- a/hgext/transplant.py
+++ b/hgext/transplant.py
@@ -548,8 +548,8 @@
     if source:
         sourcerepo = ui.expandpath(source)
         source = hg.repository(ui, sourcerepo)
-        source, incoming, bundle = bundlerepo.getremotechanges(ui, repo, source,
-                                    force=True)
+        source, common, incoming, bundle = bundlerepo.getremotechanges(ui, repo,
+                                            source, force=True)
     else:
         source = repo
 
diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -286,15 +286,17 @@
         repopath, bundlename = parentpath, path
     return bundlerepository(ui, repopath, bundlename)
 
-def getremotechanges(ui, repo, other, revs=None, bundlename=None, force=False):
-    tmp = discovery.findcommonincoming(repo, other, heads=revs, force=force)
+def getremotechanges(ui, repo, other, revs=None, bundlename=None,
+                     force=False, usecommon=False):
+    tmp = discovery.findcommonincoming(repo, other, heads=revs, force=force,
+                                       commononly=usecommon)
     common, incoming, rheads = tmp
     if not incoming:
         try:
             os.unlink(bundlename)
         except:
             pass
-        return other, None, None
+        return other, None, None, None
 
     bundle = None
     if bundlename or not other.local():
@@ -303,7 +305,9 @@
         if revs is None and other.capable('changegroupsubset'):
             revs = rheads
 
-        if revs is None:
+        if usecommon:
+            cg = other.changegroupdiff(common, revs, 'incoming')
+        elif revs is None:
             cg = other.changegroup(incoming, "incoming")
         else:
             cg = other.changegroupsubset(incoming, revs, 'incoming')
@@ -315,5 +319,5 @@
         if not other.local():
             # use the created uncompressed bundlerepo
             other = bundlerepository(ui, repo.root, fname)
-    return (other, incoming, bundle)
+    return (other, common, incoming, bundle)
 
diff --git a/mercurial/discovery.py b/mercurial/discovery.py
--- a/mercurial/discovery.py
+++ b/mercurial/discovery.py
@@ -9,9 +9,10 @@
 from i18n import _
 import util, error
 
-def findcommonincoming(repo, remote, heads=None, force=False):
-    """Return a tuple (common, missing roots, heads) used to identify
-    missing nodes from remote.
+def findcommonincoming(repo, remote, heads=None, force=False, commononly=False):
+    """Return a tuple (common, missing, heads) used to identify missing nodes
+    from remote. "missing" is either a boolean indicating if any nodes are missing
+    (when commononly=True), or else a list of the root nodes of the missing set.
 
     If a list of heads is specified, return only nodes which are heads
     or ancestors of these heads.
@@ -36,6 +37,13 @@
     # and start by examining the heads
     repo.ui.status(_("searching for changes\n"))
 
+    if commononly:
+        myheads = repo.heads()
+        known = remote.known(myheads)
+        if util.all(known):
+            hasincoming = set(heads).difference(set(myheads)) and True
+            return myheads, hasincoming, heads
+
     unknown = []
     for h in heads:
         if h not in m:
diff --git a/mercurial/hg.py b/mercurial/hg.py
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -436,14 +436,19 @@
 
     if revs:
         revs = [other.lookup(rev) for rev in revs]
-    other, incoming, bundle = bundlerepo.getremotechanges(ui, repo, other, revs,
-                                opts["bundle"], opts["force"])
-    if incoming is None:
+    usecommon = other.capable('changegroupdiff')
+    other, common, incoming, bundle = bundlerepo.getremotechanges(ui, repo, other,
+                                       revs, opts["bundle"], opts["force"],
+                                       usecommon=usecommon)
+    if not incoming:
         ui.status(_("no changes found\n"))
         return subreporecurse()
 
     try:
-        chlist = other.changelog.nodesbetween(incoming, revs)[0]
+        if usecommon:
+            chlist = other.changelog.findmissing(common, revs)
+        else:
+            chlist = other.changelog.nodesbetween(incoming, revs)[0]
         displayer = cmdutil.show_changeset(ui, other, opts, buffered)
 
         # XXX once graphlog extension makes it into core,
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -17,6 +17,7 @@
 perms = {
     'changegroup': 'pull',
     'changegroupsubset': 'pull',
+    'changegroupdiff': 'pull',
     'stream_out': 'pull',
     'listkeys': 'pull',
     'unbundle': 'push',
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -20,7 +20,8 @@
 propertycache = util.propertycache
 
 class localrepository(repo.repository):
-    capabilities = set(('lookup', 'changegroupsubset', 'branchmap', 'pushkey'))
+    capabilities = set(('lookup', 'changegroupsubset', 'branchmap', 'pushkey',
+                        'changegroupdiff'))
     supportedformats = set(('revlogv1', 'parentdelta'))
     supported = supportedformats | set(('store', 'fncache', 'shared',
                                         'dotencode'))
@@ -558,6 +559,10 @@
         repo = (remote and remote.local()) and remote or self
         return repo[key].branch()
 
+    def known(self, nodes):
+        nm = self.changelog.nodemap
+        return [(n in nm) for n in nodes]
+
     def local(self):
         return True
 
@@ -1320,20 +1325,23 @@
     def pull(self, remote, heads=None, force=False):
         lock = self.lock()
         try:
+            usecommon = remote.capable('changegroupdiff')
             tmp = discovery.findcommonincoming(self, remote, heads=heads,
-                                               force=force)
+                                               force=force, commononly=usecommon)
             common, fetch, rheads = tmp
             if not fetch:
                 self.ui.status(_("no changes found\n"))
                 result = 0
             else:
-                if heads is None and fetch == [nullid]:
+                if heads is None and list(common) == [nullid]:
                     self.ui.status(_("requesting all changes\n"))
                 elif heads is None and remote.capable('changegroupsubset'):
                     # issue1320, avoid a race if remote changed after discovery
                     heads = rheads
 
-                if heads is None:
+                if usecommon:
+                    cg = remote.changegroupdiff(common, heads or rheads, 'pull')
+                elif heads is None:
                     cg = remote.changegroup(fetch, 'pull')
                 elif not remote.capable('changegroupsubset'):
                     raise util.Abort(_("partial pull cannot be done because "
@@ -1466,6 +1474,34 @@
         node and linknode is the changelog node that should be transmitted as
         the linkrev.
         """
+        cl = self.changelog
+        if not bases:
+            bases = [nullid]
+        missing, bases, heads = cl.nodesbetween(bases, heads)
+        # We assume that all ancestors of bases are known
+        common = set(cl.ancestors(*[cl.rev(n) for n in bases]))
+        return self._changegroupsubset(common, missing, heads, source, extranodes)
+
+    def changegroupdiff(self, common, heads, source, extranodes=None):
+        """Like changegroupsubset, but returns the set difference between the
+        ancestors of heads and the ancestors common.
+
+        The nodes in common might not all be known locally due to the way the
+        current discovery protocol works.
+        """
+        cl = self.changelog
+        if common:
+            nm = cl.nodemap
+            common = [n for n in common if n in nm]
+        else:
+            common = [nullid]
+        if not heads:
+            heads = cl.heads()
+        common, missing = cl.findcommonmissing(common, heads)
+        return self._changegroupsubset(common, missing, heads, source, extranodes)
+
+    def _changegroupsubset(self, commonrevs, msng_cl_lst, heads, source,
+                           extranodes):
 
         # Set up some initial variables
         # Make it easy to refer to self.changelog
@@ -1474,9 +1510,6 @@
         # Some bases may turn out to be superfluous, and some heads may be
         # too.  nodesbetween will return the minimal set of bases and heads
         # necessary to re-create the changegroup.
-        if not bases:
-            bases = [nullid]
-        msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
 
         if extranodes is None:
             # can we go through the fast path ?
@@ -1491,9 +1524,6 @@
 
         self.changegroupinfo(msng_cl_lst, source)
 
-        # We assume that all ancestors of bases are known
-        commonrevs = set(cl.ancestors(*[cl.rev(n) for n in bases]))
-
         # Make it easy to refer to self.manifest
         mnfst = self.manifest
         # We don't know which manifests are missing yet
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -399,11 +399,12 @@
                     yield i
                     break
 
-    def findmissing(self, common=None, heads=None):
-        """Return the ancestors of heads that are not ancestors of common.
+    def findcommonmissing(self, common=None, heads=None):
+        """Return a tuple of the ancestors of common and the ancestors of heads
+        that are not ancestors of common.
 
-        More specifically, return a list of nodes N such that every N
-        satisfies the following constraints:
+        More specifically, the second element is a list of nodes N such that
+        every N satisfies the following constraints:
 
           1. N is an ancestor of some node in 'heads'
           2. N is not an ancestor of any node in 'common'
@@ -441,7 +442,25 @@
                         visit.append(p)
         missing = list(missing)
         missing.sort()
-        return [self.node(r) for r in missing]
+        return has, [self.node(r) for r in missing]
+
+    def findmissing(self, common=None, heads=None):
+        """Return the ancestors of heads that are not ancestors of common.
+
+        More specifically, return a list of nodes N such that every N
+        satisfies the following constraints:
+
+          1. N is an ancestor of some node in 'heads'
+          2. N is not an ancestor of any node in 'common'
+
+        The list is sorted by revision number, meaning it is
+        topologically sorted.
+
+        'heads' and 'common' are both lists of node IDs.  If heads is
+        not supplied, uses all of the revlog's heads.  If common is not
+        supplied, uses nullid."""
+        _common, missing = self.findcommonmissing(common, heads)
+        return missing
 
     def nodesbetween(self, roots=None, heads=None):
         """Return a topological path from 'roots' to 'heads'.
diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py
--- a/mercurial/wireproto.py
+++ b/mercurial/wireproto.py
@@ -38,6 +38,14 @@
         except:
             self._abort(error.ResponseError(_("unexpected response:"), d))
 
+    def known(self, nodes):
+        n = encodelist(nodes)
+        d = self._call("known", nodes=n)
+        try:
+            return [bool(int(f)) for f in d]
+        except:
+            self._abort(error.ResponseError(_("unexpected response:"), d))
+
     def branchmap(self):
         d = self._call("branchmap")
         try:
@@ -113,6 +121,14 @@
                              bases=bases, heads=heads)
         return changegroupmod.unbundle10(self._decompress(f), 'UN')
 
+    def changegroupdiff(self, common, heads, kind):
+        self.requirecap('changegroupdiff', _('look up remote changes'))
+        common = encodelist(common)
+        heads = encodelist(heads)
+        f = self._callstream("changegroupdiff",
+                             common=common, heads=heads)
+        return changegroupmod.unbundle10(self._decompress(f), 'UN')
+
     def unbundle(self, cg, heads, source):
         '''Send cg (a readable file-like object representing the
         changegroup to push, typically a chunkbuffer object) to the
@@ -176,7 +192,7 @@
     return "".join(r)
 
 def capabilities(repo, proto):
-    caps = 'lookup changegroupsubset branchmap pushkey'.split()
+    caps = 'lookup changegroupsubset changegroupdiff branchmap pushkey'.split()
     if _allowstream(repo.ui):
         requiredformats = repo.requirements & repo.supportedformats
         # if our local revlogs are just revlogv1, add 'stream' cap
@@ -199,6 +215,12 @@
     cg = repo.changegroupsubset(bases, heads, 'serve')
     return streamres(proto.groupchunks(cg))
 
+def changegroupdiff(repo, proto, common, heads):
+    common = decodelist(common)
+    heads = decodelist(heads)
+    cg = repo.changegroupdiff(common, heads, 'serve')
+    return streamres(proto.groupchunks(cg))
+
 def heads(repo, proto):
     h = repo.heads()
     return encodelist(h) + "\n"
@@ -228,6 +250,9 @@
         success = 0
     return "%s %s\n" % (success, r)
 
+def known(repo, proto, nodes):
+    return ''.join(b and "1" or "0" for b in repo.known(decodelist(nodes)))
+
 def pushkey(repo, proto, namespace, key, old, new):
     # compatibility with pre-1.8 clients which were accidentally
     # sending raw binary nodes rather than utf-8-encoded hex
@@ -343,8 +368,10 @@
     'capabilities': (capabilities, ''),
     'changegroup': (changegroup, 'roots'),
     'changegroupsubset': (changegroupsubset, 'bases heads'),
+    'changegroupdiff': (changegroupdiff, 'common heads'),
     'heads': (heads, ''),
     'hello': (hello, ''),
+    'known': (known, 'nodes'),
     'listkeys': (listkeys, 'namespace'),
     'lookup': (lookup, 'key'),
     'pushkey': (pushkey, 'namespace key old new'),
diff --git a/tests/test-586.t b/tests/test-586.t
--- a/tests/test-586.t
+++ b/tests/test-586.t
@@ -17,6 +17,7 @@
   pulling from ../a
   searching for changes
   warning: repository is unrelated
+  requesting all changes
   adding changesets
   adding manifests
   adding file changes
@@ -66,6 +67,7 @@
   pulling from ../repob
   searching for changes
   warning: repository is unrelated
+  requesting all changes
   adding changesets
   adding manifests
   adding file changes
diff --git a/tests/test-globalopts.t b/tests/test-globalopts.t
--- a/tests/test-globalopts.t
+++ b/tests/test-globalopts.t
@@ -28,6 +28,7 @@
   pulling from ../b
   searching for changes
   warning: repository is unrelated
+  requesting all changes
   adding changesets
   adding manifests
   adding file changes
diff --git a/tests/test-hgweb-commands.t b/tests/test-hgweb-commands.t
--- a/tests/test-hgweb-commands.t
+++ b/tests/test-hgweb-commands.t
@@ -905,7 +905,7 @@
   $ "$TESTDIR/get-with-headers.py" 127.0.0.1:$HGPORT '?cmd=capabilities'; echo
   200 Script output follows
   
-  lookup changegroupsubset branchmap pushkey unbundle=HG10GZ,HG10BZ,HG10UN
+  lookup changegroupsubset changegroupdiff branchmap pushkey unbundle=HG10GZ,HG10BZ,HG10UN
 
 heads
 
diff --git a/tests/test-http-clone-r.t b/tests/test-http-clone-r.t
--- a/tests/test-http-clone-r.t
+++ b/tests/test-http-clone-r.t
@@ -214,7 +214,7 @@
   adding changesets
   adding manifests
   adding file changes
-  added 1 changesets with 0 changes to 1 files (+1 heads)
+  added 1 changesets with 0 changes to 0 files (+1 heads)
   (run 'hg heads' to see heads, 'hg merge' to merge)
   $ hg verify
   checking changesets
@@ -238,7 +238,7 @@
   adding changesets
   adding manifests
   adding file changes
-  added 2 changesets with 0 changes to 1 files (+1 heads)
+  added 2 changesets with 0 changes to 0 files (+1 heads)
   (run 'hg heads' to see heads, 'hg merge' to merge)
   $ hg verify
   checking changesets
diff --git a/tests/test-http-proxy.t b/tests/test-http-proxy.t
--- a/tests/test-http-proxy.t
+++ b/tests/test-http-proxy.t
@@ -103,22 +103,22 @@
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroup&roots=0000000000000000000000000000000000000000 HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroupdiff&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroup&roots=0000000000000000000000000000000000000000 HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroupdiff&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroup&roots=0000000000000000000000000000000000000000 HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroupdiff&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=capabilities HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=heads HTTP/1.1" - - (glob)
-  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroup&roots=0000000000000000000000000000000000000000 HTTP/1.1" - - (glob)
+  * - - [*] "GET http://localhost:$HGPORT/?cmd=changegroupdiff&common=0000000000000000000000000000000000000000&heads=83180e7845de420a1bb46896fd5fe05294f8d629 HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
   * - - [*] "GET http://localhost:$HGPORT/?cmd=listkeys&namespace=bookmarks HTTP/1.1" - - (glob)
 
diff --git a/tests/test-schemes.t b/tests/test-schemes.t
--- a/tests/test-schemes.t
+++ b/tests/test-schemes.t
@@ -29,6 +29,7 @@
   comparing with parts://localhost
   sending heads command
   searching for changes
+  sending known command
   no changes found
   [1]
 
diff --git a/tests/test-ssh-clone-r.t b/tests/test-ssh-clone-r.t
--- a/tests/test-ssh-clone-r.t
+++ b/tests/test-ssh-clone-r.t
@@ -232,7 +232,7 @@
   adding changesets
   adding manifests
   adding file changes
-  added 1 changesets with 0 changes to 1 files (+1 heads)
+  added 1 changesets with 0 changes to 0 files (+1 heads)
   (run 'hg heads' to see heads, 'hg merge' to merge)
   $ hg verify
   checking changesets
@@ -256,7 +256,7 @@
   adding changesets
   adding manifests
   adding file changes
-  added 2 changesets with 0 changes to 1 files (+1 heads)
+  added 2 changesets with 0 changes to 0 files (+1 heads)
   (run 'hg heads' to see heads, 'hg merge' to merge)
   $ hg verify
   checking changesets
diff --git a/tests/test-unrelated-pull.t b/tests/test-unrelated-pull.t
--- a/tests/test-unrelated-pull.t
+++ b/tests/test-unrelated-pull.t
@@ -23,6 +23,7 @@
   pulling from ../a
   searching for changes
   warning: repository is unrelated
+  requesting all changes
   adding changesets
   adding manifests
   adding file changes


More information about the Mercurial-devel mailing list