D4489: exchangev2: fetch manifest revisions
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Wed Sep 12 13:13:03 EDT 2018
indygreg updated this revision to Diff 10968.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D4489?vs=10802&id=10968
REVISION DETAIL
https://phab.mercurial-scm.org/D4489
AFFECTED FILES
mercurial/exchangev2.py
tests/test-wireproto-exchangev2.t
CHANGE DETAILS
diff --git a/tests/test-wireproto-exchangev2.t b/tests/test-wireproto-exchangev2.t
--- a/tests/test-wireproto-exchangev2.t
+++ b/tests/test-wireproto-exchangev2.t
@@ -76,6 +76,24 @@
add changeset e96ae20f4188
add changeset caa2a465451d
checking for updated bookmarks
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'nodes': [
+ '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+ '\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8',
+ '\xec\x80NH\x8c \x88\xc25\t\x9a\x10 u\x13\xbe\xcd\xc3\xdd\xa5',
+ '\x04\\\x7f9\'\xda\x13\xe7Z\xf8\xf0\xe4\xf0HI\xe4a\xa9x\x0f',
+ '7\x9c\xb0\xc2\xe6d\\y\xdd\xc5\x9a\x1dG\'\xa9\xfb\x83\n\xeb&'
+ ],
+ 'tree': ''
+ }
+ received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+ received frame(size=922; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
updating the branch cache
new changesets 3390ef850073:caa2a465451d (3 drafts)
@@ -101,6 +119,16 @@
o 0 3390ef850073fbc2f0dfff2244342c8e9229013a public
+All manifests should have been transferred
+
+ $ hg -R client-simple debugindex -m
+ rev linkrev nodeid p1 p2
+ 0 0 992f4779029a 000000000000 000000000000
+ 1 1 a988fb43583e 992f4779029a 000000000000
+ 2 2 ec804e488c20 a988fb43583e 000000000000
+ 3 3 045c7f3927da 992f4779029a 000000000000
+ 4 4 379cb0c2e664 045c7f3927da 000000000000
+
Cloning only a specific revision works
$ hg --debug clone -U -r 4432d83626e8 http://localhost:$HGPORT client-singlehead
@@ -146,6 +174,21 @@
add changeset 3390ef850073
add changeset 4432d83626e8
checking for updated bookmarks
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'nodes': [
+ '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+ '\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8'
+ ],
+ 'tree': ''
+ }
+ received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+ received frame(size=376; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
updating the branch cache
new changesets 3390ef850073:4432d83626e8
@@ -157,6 +200,11 @@
o 0 3390ef850073fbc2f0dfff2244342c8e9229013a public
+ $ hg debugindex -m
+ rev linkrev nodeid p1 p2
+ 0 0 992f4779029a 000000000000 000000000000
+ 1 1 a988fb43583e 992f4779029a 000000000000
+
Incremental pull works
$ hg --debug pull
@@ -204,6 +252,22 @@
add changeset e96ae20f4188
add changeset caa2a465451d
checking for updated bookmarks
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'nodes': [
+ '\xec\x80NH\x8c \x88\xc25\t\x9a\x10 u\x13\xbe\xcd\xc3\xdd\xa5',
+ '\x04\\\x7f9\'\xda\x13\xe7Z\xf8\xf0\xe4\xf0HI\xe4a\xa9x\x0f',
+ '7\x9c\xb0\xc2\xe6d\\y\xdd\xc5\x9a\x1dG\'\xa9\xfb\x83\n\xeb&'
+ ],
+ 'tree': ''
+ }
+ received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+ received frame(size=559; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
updating the branch cache
new changesets cd2534766bec:caa2a465451d (3 drafts)
(run 'hg update' to get a working copy)
@@ -220,6 +284,14 @@
o 0 3390ef850073fbc2f0dfff2244342c8e9229013a public
+ $ hg debugindex -m
+ rev linkrev nodeid p1 p2
+ 0 0 992f4779029a 000000000000 000000000000
+ 1 1 a988fb43583e 992f4779029a 000000000000
+ 2 2 ec804e488c20 a988fb43583e 000000000000
+ 3 3 045c7f3927da 992f4779029a 000000000000
+ 4 4 379cb0c2e664 045c7f3927da 000000000000
+
Phase-only update works
$ hg -R ../server-simple phase --public -r caa2a465451dd
@@ -331,6 +403,24 @@
checking for updated bookmarks
adding remote bookmark book-1
adding remote bookmark book-2
+ sending 1 commands
+ sending command manifestdata: {
+ 'fields': set([
+ 'parents',
+ 'revision'
+ ]),
+ 'nodes': [
+ '\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A',
+ '\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8',
+ '\xec\x80NH\x8c \x88\xc25\t\x9a\x10 u\x13\xbe\xcd\xc3\xdd\xa5',
+ '\x04\\\x7f9\'\xda\x13\xe7Z\xf8\xf0\xe4\xf0HI\xe4a\xa9x\x0f',
+ '7\x9c\xb0\xc2\xe6d\\y\xdd\xc5\x9a\x1dG\'\xa9\xfb\x83\n\xeb&'
+ ],
+ 'tree': ''
+ }
+ received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation)
+ received frame(size=922; request=1; stream=2; streamflags=; type=command-response; flags=continuation)
+ received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos)
updating the branch cache
new changesets 3390ef850073:caa2a465451d (1 drafts)
diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py
--- a/mercurial/exchangev2.py
+++ b/mercurial/exchangev2.py
@@ -16,6 +16,7 @@
)
from . import (
bookmarks,
+ error,
mdiff,
phases,
pycompat,
@@ -57,6 +58,8 @@
remote.url(), pullop.gettransaction,
explicit=pullop.explicitbookmarks)
+ _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])
+
def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
"""Determine which changesets need to be pulled."""
@@ -121,14 +124,22 @@
unit=_('chunks'),
total=meta.get(b'totalitems'))
+ manifestnodes = {}
+
def linkrev(node):
repo.ui.debug('add changeset %s\n' % short(node))
# Linkrev for changelog is always self.
return len(cl)
def onchangeset(cl, node):
progress.increment()
+ revision = cl.changelogrevision(node)
+
+ # We need to preserve the mapping of changelog revision to node
+ # so we can set the linkrev accordingly when manifests are added.
+ manifestnodes[cl.rev(node)] = revision.manifest
+
nodesbyphase = {phase: set() for phase in phases.phasenames}
remotebookmarks = {}
@@ -178,4 +189,106 @@
'added': added,
'nodesbyphase': nodesbyphase,
'bookmarks': remotebookmarks,
+ 'manifestnodes': manifestnodes,
}
+
+def _fetchmanifests(repo, tr, remote, manifestnodes):
+ rootmanifest = repo.manifestlog.getstorage(b'')
+
+ # Some manifests can be shared between changesets. Filter out revisions
+ # we already know about.
+ fetchnodes = []
+ linkrevs = {}
+ seen = set()
+
+ for clrev, node in sorted(manifestnodes.iteritems()):
+ if node in seen:
+ continue
+
+ try:
+ rootmanifest.rev(node)
+ except error.LookupError:
+ fetchnodes.append(node)
+ linkrevs[node] = clrev
+
+ seen.add(node)
+
+ # TODO handle tree manifests
+
+ # addgroup() expects 7-tuple describing revisions. This normalizes
+ # the wire data to that format.
+ def iterrevisions(objs, progress):
+ for manifest in objs:
+ node = manifest[b'node']
+
+ if b'deltasize' in manifest:
+ basenode = manifest[b'deltabasenode']
+ delta = next(objs)
+ elif b'revisionsize' in manifest:
+ basenode = nullid
+ revision = next(objs)
+ delta = mdiff.trivialdiffheader(len(revision)) + revision
+ else:
+ continue
+
+ yield (
+ node,
+ manifest[b'parents'][0],
+ manifest[b'parents'][1],
+ # The value passed in is passed to the lookup function passed
+ # to addgroup(). We already have a map of manifest node to
+ # changelog revision number. So we just pass in the
+ # manifest node here and use linkrevs.__getitem__ as the
+ # resolution function.
+ node,
+ basenode,
+ delta,
+ # Flags not yet supported.
+ 0
+ )
+
+ progress.increment()
+
+ progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
+ total=len(fetchnodes))
+
+ # Fetch manifests 10,000 per command.
+ # TODO have server advertise preferences?
+ # TODO make size configurable on client?
+ batchsize = 10000
+
+ # We send commands 1 at a time to the remote. This is not the most
+ # efficient because we incur a round trip at the end of each batch.
+ # However, the existing frame-based reactor keeps consuming server
+ # data in the background. And this results in response data buffering
+ # in memory. This can consume gigabytes of memory.
+ # TODO send multiple commands in a request once background buffering
+ # issues are resolved.
+
+ added = []
+
+ for i in pycompat.xrange(0, len(fetchnodes), batchsize):
+ batch = [node for node in fetchnodes[i:i + batchsize]]
+ if not batch:
+ continue
+
+ with remote.commandexecutor() as e:
+ objs = e.callcommand(b'manifestdata', {
+ b'tree': b'',
+ b'nodes': batch,
+ b'fields': {b'parents', b'revision'},
+ }).result()
+
+ # Chomp off header object.
+ next(objs)
+
+ added.extend(rootmanifest.addgroup(
+ iterrevisions(objs, progress),
+ linkrevs.__getitem__,
+ weakref.proxy(tr)))
+
+ progress.complete()
+
+ return {
+ 'added': added,
+ }
To: indygreg, #hg-reviewers, durin42
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list