[PATCH 1 of 6 cg3 v3] changegroup: avoid iterating the whole manifest

Augie Fackler raf at durin42.com
Fri Dec 4 19:38:50 UTC 2015


# HG changeset patch
# User Augie Fackler <augie at google.com>
# Date 1449243298 18000
#      Fri Dec 04 10:34:58 2015 -0500
# Node ID bfdd624c667b6f72f2069a3537becad05e162c08
# Parent  30a20167ae29e1874163b59a489de0cb1d859565
# EXP-Topic cg3
changegroup: avoid iterating the whole manifest

The old code gathered the list of all files that changed anywhere in
history and then gathered changed file nodes by walking the entirety
of each manifest to be sent in order to gather changed file
nodes. That's going to be unfortunate for narrowhg, and it's probably
already inefficient for medium-to-large repositories.

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -613,7 +613,8 @@ class cg1packer(object):
         clrevorder = {}
         mfs = {} # needed manifests
         fnodes = {} # needed file nodes
-        changedfiles = set()
+        # maps manifest node id -> set(changed files)
+        mfchangedfiles = {}
 
         # Callback for the changelog, used to collect changed files and manifest
         # nodes.
@@ -621,9 +622,12 @@ class cg1packer(object):
         def lookupcl(x):
             c = cl.read(x)
             clrevorder[x] = len(clrevorder)
-            changedfiles.update(c[3])
+            n = c[0]
             # record the first changeset introducing this manifest version
-            mfs.setdefault(c[0], x)
+            mfs.setdefault(n, x)
+            # Record a complete list of potentially-changed files in
+            # this manifest.
+            mfchangedfiles.setdefault(n, set()).update(c[3])
             return x
 
         self._verbosenote(_('uncompressed size of bundle content:\n'))
@@ -668,8 +672,12 @@ class cg1packer(object):
             clnode = mfs[x]
             if not fastpathlinkrev:
                 mdata = ml.readfast(x)
-                for f, n in mdata.iteritems():
-                    if f in changedfiles:
+                for f in mfchangedfiles[x]:
+                    if True:
+                        try:
+                            n = mdata[f]
+                        except KeyError:
+                            continue
                         # record the first changeset introducing this filelog
                         # version
                         fclnodes = fnodes.setdefault(f, {})
@@ -696,6 +704,9 @@ class cg1packer(object):
                 return dict(genfilenodes())
             return fnodes.get(fname, {})
 
+        changedfiles = set()
+        for x in mfchangedfiles.itervalues():
+            changedfiles.update(x)
         for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
                                         source):
             yield chunk


More information about the Mercurial-devel mailing list