D4133: changegroup: factor changelog chunk generation into own function

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Mon Aug 6 19:51:26 UTC 2018


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We have separate functions for generating manifests and filelogs.
  Let's split changelog into its own function so things are consistent.
  
  As part of this, we refactor the code slightly. Before, the
  changelog linknode callback was updating state on variables
  inherited via a closure. Since the closure is now separate from
  generate(), we need to a way pass state between generate() and
  _generatechangelog(). The return value of _generatechangelog()
  is a 2-tuple where the first item is a dict containing accumulated
  state. We then alias some of its members into the scope of
  generate() to reduce code churn.
  
  I will be converting other functions to a similar pattern in future
  commits.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4133

AFFECTED FILES
  mercurial/changegroup.py

CHANGE DETAILS

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -715,14 +715,100 @@
             yield chunk
 
     def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
-        '''yield a sequence of changegroup chunks (strings)'''
+        """Yield a sequence of changegroup byte chunks."""
+
         repo = self._repo
         cl = repo.changelog
 
+        self._verbosenote(_('uncompressed size of bundle content:\n'))
+        size = 0
+
+        clstate, chunks = self._generatechangelog(cl, clnodes)
+        for chunk in chunks:
+            size += len(chunk)
+            yield chunk
+
+        self._verbosenote(_('%8.i (changelog)\n') % size)
+
+        clrevorder = clstate['clrevorder']
+        mfs = clstate['mfs']
+        changedfiles = clstate['changedfiles']
+
+        # We need to make sure that the linkrev in the changegroup refers to
+        # the first changeset that introduced the manifest or file revision.
+        # The fastpath is usually safer than the slowpath, because the filelogs
+        # are walked in revlog order.
+        #
+        # When taking the slowpath with reorder=None and the manifest revlog
+        # uses generaldelta, the manifest may be walked in the "wrong" order.
+        # Without 'clrevorder', we would get an incorrect linkrev (see fix in
+        # cc0ff93d0c0c).
+        #
+        # When taking the fastpath, we are only vulnerable to reordering
+        # of the changelog itself. The changelog never uses generaldelta, so
+        # it is only reordered when reorder=True. To handle this case, we
+        # simply take the slowpath, which already has the 'clrevorder' logic.
+        # This was also fixed in cc0ff93d0c0c.
+        fastpathlinkrev = fastpathlinkrev and not self._reorder
+        # Treemanifests don't work correctly with fastpathlinkrev
+        # either, because we don't discover which directory nodes to
+        # send along with files. This could probably be fixed.
+        fastpathlinkrev = fastpathlinkrev and (
+            'treemanifest' not in repo.requirements)
+
+        fnodes = {}  # needed file nodes
+
+        for chunk in self.generatemanifests(commonrevs, clrevorder,
+                fastpathlinkrev, mfs, fnodes, source):
+            yield chunk
+
+        if self._ellipses:
+            mfdicts = None
+            if self._isshallow:
+                mfdicts = [(self._repo.manifestlog[n].read(), lr)
+                           for (n, lr) in mfs.iteritems()]
+
+        mfs.clear()
+        clrevs = set(cl.rev(x) for x in clnodes)
+
+        if not fastpathlinkrev:
+            def linknodes(unused, fname):
+                return fnodes.get(fname, {})
+        else:
+            cln = cl.node
+            def linknodes(filerevlog, fname):
+                llr = filerevlog.linkrev
+                fln = filerevlog.node
+                revs = ((r, llr(r)) for r in filerevlog)
+                return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
+
+        if self._ellipses:
+            # We need to pass the mfdicts variable down into
+            # generatefiles(), but more than one command might have
+            # wrapped generatefiles so we can't modify the function
+            # signature. Instead, we pass the data to ourselves using an
+            # instance attribute. I'm sorry.
+            self._mfdicts = mfdicts
+
+        for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
+                                        source):
+            yield chunk
+
+        yield self._close()
+
+        if clnodes:
+            repo.hook('outgoing', node=hex(clnodes[0]), source=source)
+
+    def _generatechangelog(self, cl, nodes):
+        """Generate data for changelog chunks.
+
+        Returns a 2-tuple of a dict containing state and an iterable of
+        byte chunks. The state will not be fully populated until the
+        chunk stream has been fully consumed.
+        """
         clrevorder = {}
         mfs = {} # needed manifests
-        fnodes = {} # needed file nodes
-        mfl = repo.manifestlog
+        mfl = self._repo.manifestlog
         # TODO violates storage abstraction.
         mfrevlog = mfl._revlog
         changedfiles = set()
@@ -768,75 +854,15 @@
 
             return x
 
-        self._verbosenote(_('uncompressed size of bundle content:\n'))
-        size = 0
-        for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
-            size += len(chunk)
-            yield chunk
-        self._verbosenote(_('%8.i (changelog)\n') % size)
-
-        # We need to make sure that the linkrev in the changegroup refers to
-        # the first changeset that introduced the manifest or file revision.
-        # The fastpath is usually safer than the slowpath, because the filelogs
-        # are walked in revlog order.
-        #
-        # When taking the slowpath with reorder=None and the manifest revlog
-        # uses generaldelta, the manifest may be walked in the "wrong" order.
-        # Without 'clrevorder', we would get an incorrect linkrev (see fix in
-        # cc0ff93d0c0c).
-        #
-        # When taking the fastpath, we are only vulnerable to reordering
-        # of the changelog itself. The changelog never uses generaldelta, so
-        # it is only reordered when reorder=True. To handle this case, we
-        # simply take the slowpath, which already has the 'clrevorder' logic.
-        # This was also fixed in cc0ff93d0c0c.
-        fastpathlinkrev = fastpathlinkrev and not self._reorder
-        # Treemanifests don't work correctly with fastpathlinkrev
-        # either, because we don't discover which directory nodes to
-        # send along with files. This could probably be fixed.
-        fastpathlinkrev = fastpathlinkrev and (
-            'treemanifest' not in repo.requirements)
-
-        for chunk in self.generatemanifests(commonrevs, clrevorder,
-                fastpathlinkrev, mfs, fnodes, source):
-            yield chunk
+        state = {
+            'clrevorder': clrevorder,
+            'mfs': mfs,
+            'changedfiles': changedfiles,
+        }
 
-        if self._ellipses:
-            mfdicts = None
-            if self._isshallow:
-                mfdicts = [(self._repo.manifestlog[n].read(), lr)
-                           for (n, lr) in mfs.iteritems()]
-
-        mfs.clear()
-        clrevs = set(cl.rev(x) for x in clnodes)
+        gen = self.group(nodes, cl, lookupcl, units=_('changesets'))
 
-        if not fastpathlinkrev:
-            def linknodes(unused, fname):
-                return fnodes.get(fname, {})
-        else:
-            cln = cl.node
-            def linknodes(filerevlog, fname):
-                llr = filerevlog.linkrev
-                fln = filerevlog.node
-                revs = ((r, llr(r)) for r in filerevlog)
-                return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
-
-        if self._ellipses:
-            # We need to pass the mfdicts variable down into
-            # generatefiles(), but more than one command might have
-            # wrapped generatefiles so we can't modify the function
-            # signature. Instead, we pass the data to ourselves using an
-            # instance attribute. I'm sorry.
-            self._mfdicts = mfdicts
-
-        for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
-                                        source):
-            yield chunk
-
-        yield self._close()
-
-        if clnodes:
-            repo.hook('outgoing', node=hex(clnodes[0]), source=source)
+        return state, gen
 
     def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
                           fnodes, source):



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list