D4133: changegroup: factor changelog chunk generation into own function

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Thu Aug 9 14:15:35 EDT 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rHGf7228c907ef4: changegroup: factor changelog chunk generation into own function (authored by indygreg, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4133?vs=10004&id=10140

REVISION DETAIL
  https://phab.mercurial-scm.org/D4133

AFFECTED FILES
  mercurial/changegroup.py

CHANGE DETAILS

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -715,14 +715,100 @@
             yield chunk
 
     def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
-        '''yield a sequence of changegroup chunks (strings)'''
+        """Yield a sequence of changegroup byte chunks."""
+
         repo = self._repo
         cl = repo.changelog
 
+        self._verbosenote(_('uncompressed size of bundle content:\n'))
+        size = 0
+
+        clstate, chunks = self._generatechangelog(cl, clnodes)
+        for chunk in chunks:
+            size += len(chunk)
+            yield chunk
+
+        self._verbosenote(_('%8.i (changelog)\n') % size)
+
+        clrevorder = clstate['clrevorder']
+        mfs = clstate['mfs']
+        changedfiles = clstate['changedfiles']
+
+        # We need to make sure that the linkrev in the changegroup refers to
+        # the first changeset that introduced the manifest or file revision.
+        # The fastpath is usually safer than the slowpath, because the filelogs
+        # are walked in revlog order.
+        #
+        # When taking the slowpath with reorder=None and the manifest revlog
+        # uses generaldelta, the manifest may be walked in the "wrong" order.
+        # Without 'clrevorder', we would get an incorrect linkrev (see fix in
+        # cc0ff93d0c0c).
+        #
+        # When taking the fastpath, we are only vulnerable to reordering
+        # of the changelog itself. The changelog never uses generaldelta, so
+        # it is only reordered when reorder=True. To handle this case, we
+        # simply take the slowpath, which already has the 'clrevorder' logic.
+        # This was also fixed in cc0ff93d0c0c.
+        fastpathlinkrev = fastpathlinkrev and not self._reorder
+        # Treemanifests don't work correctly with fastpathlinkrev
+        # either, because we don't discover which directory nodes to
+        # send along with files. This could probably be fixed.
+        fastpathlinkrev = fastpathlinkrev and (
+            'treemanifest' not in repo.requirements)
+
+        fnodes = {}  # needed file nodes
+
+        for chunk in self.generatemanifests(commonrevs, clrevorder,
+                fastpathlinkrev, mfs, fnodes, source):
+            yield chunk
+
+        if self._ellipses:
+            mfdicts = None
+            if self._isshallow:
+                mfdicts = [(self._repo.manifestlog[n].read(), lr)
+                           for (n, lr) in mfs.iteritems()]
+
+        mfs.clear()
+        clrevs = set(cl.rev(x) for x in clnodes)
+
+        if not fastpathlinkrev:
+            def linknodes(unused, fname):
+                return fnodes.get(fname, {})
+        else:
+            cln = cl.node
+            def linknodes(filerevlog, fname):
+                llr = filerevlog.linkrev
+                fln = filerevlog.node
+                revs = ((r, llr(r)) for r in filerevlog)
+                return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
+
+        if self._ellipses:
+            # We need to pass the mfdicts variable down into
+            # generatefiles(), but more than one command might have
+            # wrapped generatefiles so we can't modify the function
+            # signature. Instead, we pass the data to ourselves using an
+            # instance attribute. I'm sorry.
+            self._mfdicts = mfdicts
+
+        for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
+                                        source):
+            yield chunk
+
+        yield self._close()
+
+        if clnodes:
+            repo.hook('outgoing', node=hex(clnodes[0]), source=source)
+
+    def _generatechangelog(self, cl, nodes):
+        """Generate data for changelog chunks.
+
+        Returns a 2-tuple of a dict containing state and an iterable of
+        byte chunks. The state will not be fully populated until the
+        chunk stream has been fully consumed.
+        """
         clrevorder = {}
         mfs = {} # needed manifests
-        fnodes = {} # needed file nodes
-        mfl = repo.manifestlog
+        mfl = self._repo.manifestlog
         # TODO violates storage abstraction.
         mfrevlog = mfl._revlog
         changedfiles = set()
@@ -768,75 +854,15 @@
 
             return x
 
-        self._verbosenote(_('uncompressed size of bundle content:\n'))
-        size = 0
-        for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
-            size += len(chunk)
-            yield chunk
-        self._verbosenote(_('%8.i (changelog)\n') % size)
-
-        # We need to make sure that the linkrev in the changegroup refers to
-        # the first changeset that introduced the manifest or file revision.
-        # The fastpath is usually safer than the slowpath, because the filelogs
-        # are walked in revlog order.
-        #
-        # When taking the slowpath with reorder=None and the manifest revlog
-        # uses generaldelta, the manifest may be walked in the "wrong" order.
-        # Without 'clrevorder', we would get an incorrect linkrev (see fix in
-        # cc0ff93d0c0c).
-        #
-        # When taking the fastpath, we are only vulnerable to reordering
-        # of the changelog itself. The changelog never uses generaldelta, so
-        # it is only reordered when reorder=True. To handle this case, we
-        # simply take the slowpath, which already has the 'clrevorder' logic.
-        # This was also fixed in cc0ff93d0c0c.
-        fastpathlinkrev = fastpathlinkrev and not self._reorder
-        # Treemanifests don't work correctly with fastpathlinkrev
-        # either, because we don't discover which directory nodes to
-        # send along with files. This could probably be fixed.
-        fastpathlinkrev = fastpathlinkrev and (
-            'treemanifest' not in repo.requirements)
-
-        for chunk in self.generatemanifests(commonrevs, clrevorder,
-                fastpathlinkrev, mfs, fnodes, source):
-            yield chunk
+        state = {
+            'clrevorder': clrevorder,
+            'mfs': mfs,
+            'changedfiles': changedfiles,
+        }
 
-        if self._ellipses:
-            mfdicts = None
-            if self._isshallow:
-                mfdicts = [(self._repo.manifestlog[n].read(), lr)
-                           for (n, lr) in mfs.iteritems()]
-
-        mfs.clear()
-        clrevs = set(cl.rev(x) for x in clnodes)
+        gen = self.group(nodes, cl, lookupcl, units=_('changesets'))
 
-        if not fastpathlinkrev:
-            def linknodes(unused, fname):
-                return fnodes.get(fname, {})
-        else:
-            cln = cl.node
-            def linknodes(filerevlog, fname):
-                llr = filerevlog.linkrev
-                fln = filerevlog.node
-                revs = ((r, llr(r)) for r in filerevlog)
-                return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
-
-        if self._ellipses:
-            # We need to pass the mfdicts variable down into
-            # generatefiles(), but more than one command might have
-            # wrapped generatefiles so we can't modify the function
-            # signature. Instead, we pass the data to ourselves using an
-            # instance attribute. I'm sorry.
-            self._mfdicts = mfdicts
-
-        for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
-                                        source):
-            yield chunk
-
-        yield self._close()
-
-        if clnodes:
-            repo.hook('outgoing', node=hex(clnodes[0]), source=source)
+        return state, gen
 
     def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
                           fnodes, source):



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list