D4489: exchangev2: fetch manifest revisions

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Sep 12 13:13:03 EDT 2018


indygreg updated this revision to Diff 10968.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4489?vs=10802&id=10968

REVISION DETAIL
  https://phab.mercurial-scm.org/D4489

AFFECTED FILES
  mercurial/exchangev2.py
  tests/test-wireproto-exchangev2.t

CHANGE DETAILS

diff --git a/tests/test-wireproto-exchangev2.t b/tests/test-wireproto-exchangev2.t
--- a/tests/test-wireproto-exchangev2.t
+++ b/tests/test-wireproto-exchangev2.t
@@ -76,6 +76,24 @@
   add changeset e96ae20f4188
   add changeset caa2a465451d
   checking for updated bookmarks
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'nodes': [
+      '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+      '\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8',
+      '\xec\x80NH\x8c \x88\xc25\t\x9a\x10 u\x13\xbe\xcd\xc3\xdd\xa5',
+      '\x04\\\x7f9\'\xda\x13\xe7Z\xf8\xf0\xe4\xf0HI\xe4a\xa9x\x0f',
+      '7\x9c\xb0\xc2\xe6d\\y\xdd\xc5\x9a\x1dG\'\xa9\xfb\x83\n\xeb&'
+    ],
+    'tree': ''
+  }
+  received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+  received frame(size=922; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
   updating the branch cache
   new changesets 3390ef850073:caa2a465451d (3 drafts)
 
@@ -101,6 +119,16 @@
   o  0 3390ef850073fbc2f0dfff2244342c8e9229013a public
   
 
+All manifests should have been transferred
+
+  $ hg -R client-simple debugindex -m
+     rev linkrev nodeid       p1           p2
+       0       0 992f4779029a 000000000000 000000000000
+       1       1 a988fb43583e 992f4779029a 000000000000
+       2       2 ec804e488c20 a988fb43583e 000000000000
+       3       3 045c7f3927da 992f4779029a 000000000000
+       4       4 379cb0c2e664 045c7f3927da 000000000000
+
 Cloning only a specific revision works
 
   $ hg --debug clone -U -r 4432d83626e8 http://localhost:$HGPORT client-singlehead
@@ -146,6 +174,21 @@
   add changeset 3390ef850073
   add changeset 4432d83626e8
   checking for updated bookmarks
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'nodes': [
+      '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+      '\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8'
+    ],
+    'tree': ''
+  }
+  received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+  received frame(size=376; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
   updating the branch cache
   new changesets 3390ef850073:4432d83626e8
 
@@ -157,6 +200,11 @@
   o  0 3390ef850073fbc2f0dfff2244342c8e9229013a public
   
 
+  $ hg debugindex -m
+     rev linkrev nodeid       p1           p2
+       0       0 992f4779029a 000000000000 000000000000
+       1       1 a988fb43583e 992f4779029a 000000000000
+
 Incremental pull works
 
   $ hg --debug pull
@@ -204,6 +252,22 @@
   add changeset e96ae20f4188
   add changeset caa2a465451d
   checking for updated bookmarks
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'nodes': [
+      '\xec\x80NH\x8c \x88\xc25\t\x9a\x10 u\x13\xbe\xcd\xc3\xdd\xa5',
+      '\x04\\\x7f9\'\xda\x13\xe7Z\xf8\xf0\xe4\xf0HI\xe4a\xa9x\x0f',
+      '7\x9c\xb0\xc2\xe6d\\y\xdd\xc5\x9a\x1dG\'\xa9\xfb\x83\n\xeb&'
+    ],
+    'tree': ''
+  }
+  received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+  received frame(size=559; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
   updating the branch cache
   new changesets cd2534766bec:caa2a465451d (3 drafts)
   (run 'hg update' to get a working copy)
@@ -220,6 +284,14 @@
   o  0 3390ef850073fbc2f0dfff2244342c8e9229013a public
   
 
+  $ hg debugindex -m
+     rev linkrev nodeid       p1           p2
+       0       0 992f4779029a 000000000000 000000000000
+       1       1 a988fb43583e 992f4779029a 000000000000
+       2       2 ec804e488c20 a988fb43583e 000000000000
+       3       3 045c7f3927da 992f4779029a 000000000000
+       4       4 379cb0c2e664 045c7f3927da 000000000000
+
 Phase-only update works
 
   $ hg -R ../server-simple phase --public -r caa2a465451dd
@@ -331,6 +403,24 @@
   checking for updated bookmarks
   adding remote bookmark book-1
   adding remote bookmark book-2
+  sending 1 commands
+  sending command manifestdata: {
+    'fields': set([
+      'parents',
+      'revision'
+    ]),
+    'nodes': [
+      '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+      '\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8',
+      '\xec\x80NH\x8c \x88\xc25\t\x9a\x10 u\x13\xbe\xcd\xc3\xdd\xa5',
+      '\x04\\\x7f9\'\xda\x13\xe7Z\xf8\xf0\xe4\xf0HI\xe4a\xa9x\x0f',
+      '7\x9c\xb0\xc2\xe6d\\y\xdd\xc5\x9a\x1dG\'\xa9\xfb\x83\n\xeb&'
+    ],
+    'tree': ''
+  }
+  received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+  received frame(size=922; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+  received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
   updating the branch cache
   new changesets 3390ef850073:caa2a465451d (1 drafts)
 
diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py
--- a/mercurial/exchangev2.py
+++ b/mercurial/exchangev2.py
@@ -16,6 +16,7 @@
 )
 from . import (
     bookmarks,
+    error,
     mdiff,
     phases,
     pycompat,
@@ -57,6 +58,8 @@
                                remote.url(), pullop.gettransaction,
                                explicit=pullop.explicitbookmarks)
 
+    _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])
+
 def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
     """Determine which changesets need to be pulled."""
 
@@ -121,14 +124,22 @@
                                     unit=_('chunks'),
                                     total=meta.get(b'totalitems'))
 
+    manifestnodes = {}
+
     def linkrev(node):
         repo.ui.debug('add changeset %s\n' % short(node))
         # Linkrev for changelog is always self.
         return len(cl)
 
     def onchangeset(cl, node):
         progress.increment()
 
+        revision = cl.changelogrevision(node)
+
+        # We need to preserve the mapping of changelog revision to node
+        # so we can set the linkrev accordingly when manifests are added.
+        manifestnodes[cl.rev(node)] = revision.manifest
+
     nodesbyphase = {phase: set() for phase in phases.phasenames}
     remotebookmarks = {}
 
@@ -178,4 +189,106 @@
         'added': added,
         'nodesbyphase': nodesbyphase,
         'bookmarks': remotebookmarks,
+        'manifestnodes': manifestnodes,
     }
+
+def _fetchmanifests(repo, tr, remote, manifestnodes):
+    rootmanifest = repo.manifestlog.getstorage(b'')
+
+    # Some manifests can be shared between changesets. Filter out revisions
+    # we already know about.
+    fetchnodes = []
+    linkrevs = {}
+    seen = set()
+
+    for clrev, node in sorted(manifestnodes.iteritems()):
+        if node in seen:
+            continue
+
+        try:
+            rootmanifest.rev(node)
+        except error.LookupError:
+            fetchnodes.append(node)
+            linkrevs[node] = clrev
+
+        seen.add(node)
+
+    # TODO handle tree manifests
+
+    # addgroup() expects 7-tuple describing revisions. This normalizes
+    # the wire data to that format.
+    def iterrevisions(objs, progress):
+        for manifest in objs:
+            node = manifest[b'node']
+
+            if b'deltasize' in manifest:
+                basenode = manifest[b'deltabasenode']
+                delta = next(objs)
+            elif b'revisionsize' in manifest:
+                basenode = nullid
+                revision = next(objs)
+                delta = mdiff.trivialdiffheader(len(revision)) + revision
+            else:
+                continue
+
+            yield (
+                node,
+                manifest[b'parents'][0],
+                manifest[b'parents'][1],
+                # The value passed in is passed to the lookup function passed
+                # to addgroup(). We already have a map of manifest node to
+                # changelog revision number. So we just pass in the
+                # manifest node here and use linkrevs.__getitem__ as the
+                # resolution function.
+                node,
+                basenode,
+                delta,
+                # Flags not yet supported.
+                0
+            )
+
+            progress.increment()
+
+    progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
+                                    total=len(fetchnodes))
+
+    # Fetch manifests 10,000 per command.
+    # TODO have server advertise preferences?
+    # TODO make size configurable on client?
+    batchsize = 10000
+
+    # We send commands 1 at a time to the remote. This is not the most
+    # efficient because we incur a round trip at the end of each batch.
+    # However, the existing frame-based reactor keeps consuming server
+    # data in the background. And this results in response data buffering
+    # in memory. This can consume gigabytes of memory.
+    # TODO send multiple commands in a request once background buffering
+    # issues are resolved.
+
+    added = []
+
+    for i in pycompat.xrange(0, len(fetchnodes), batchsize):
+        batch = [node for node in fetchnodes[i:i + batchsize]]
+        if not batch:
+            continue
+
+        with remote.commandexecutor() as e:
+            objs = e.callcommand(b'manifestdata', {
+                b'tree': b'',
+                b'nodes': batch,
+                b'fields': {b'parents', b'revision'},
+            }).result()
+
+            # Chomp off header object.
+            next(objs)
+
+            added.extend(rootmanifest.addgroup(
+                iterrevisions(objs, progress),
+                linkrevs.__getitem__,
+                weakref.proxy(tr)))
+
+    progress.complete()
+
+    return {
+        'added': added,
+    }



To: indygreg, #hg-reviewers, durin42
Cc: mercurial-devel


More information about the Mercurial-devel mailing list