[PATCH 4 of 4 V2] changegroup: refactor changegroup generation into a separate class

Durham Goode durham at fb.com
Mon May 6 22:14:50 CDT 2013


# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1367430936 25200
#      Wed May 01 10:55:36 2013 -0700
# Node ID c86e8d104d87bf958827d9713d4e2f22f7535117
# Parent  dbdfdfcc9b14728cee940dd9f96398561bed0802
changegroup: refactor changegroup generation into a separate class

Previously changegroup generation was confined to _changegroup() and
_changegroupsubset(). Both were massive functions and they had lots of
duplicate code.

I've moved all their logic into the changegroup module as new classes:
ChangeGroupGen and SubsetChangeGroupGen. This breaks up all the different
logic into separate functions so extensions can modify them individually
and allows us to share most of the logic with the subset version. This
also helps make localrepo less of a god class (-156 lines from localrepo,
+157 lines to changegroup).

There should be no behavior changes introduced by this.

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -6,7 +6,7 @@
 # GNU General Public License version 2 or any later version.
 
 from i18n import _
-from node import nullrev
+from node import nullrev, hex
 import mdiff, util
 import struct, os, bz2, zlib, tempfile
 
@@ -254,3 +254,160 @@
     def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
         # do nothing with basenode, it is implicitly the previous one in HG10
         return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
+
+_bundling = _('bundling')
+_changesets = _('changesets')
+_manifests = _('manifests')
+_files = _('files')
+
+class changegroupgen(object):
+    def __init__(self, repo, csets, source, reorder):
+        self.repo = repo
+        self.cl = repo.changelog
+        self.mf = repo.manifest
+        self.csets = csets
+        self.changedfiles = set()
+        self.source = source
+        self.neededmfs = {}
+        self.reorder = reorder
+        self.bundler = bundle10(self.lookup)
+        self.fstate = ['', {}]
+        self.count = [0, 0]
+        self.progress = repo.ui.progress
+
+    def gengroup(self):
+        for chunk in self.addcommitgroups():
+            yield chunk
+        for chunk in self.addmanifestgroups():
+            yield chunk
+        for chunk in self.addfilegroups():
+            yield chunk
+
+        # Signal that no more groups are left.
+        yield self.bundler.close()
+        self.progress(_bundling, None)
+
+        if self.csets:
+            self.repo.hook('outgoing',
+                           node = hex(self.csets[0]),
+                           source = self.source)
+
+    def addcommitgroups(self):
+        # Create a changenode group generator that will call our functions
+        # back to lookup the owning changenode and collect information.
+        self.count[:] = [0, len(self.csets)]
+        for chunk in self.cl.group(self.csets,
+                                   self.bundler,
+                                   reorder=self.reorder):
+            yield chunk
+        self.progress(_bundling, None)
+
+    def addmanifestgroups(self):
+        # Create a generator for the manifestnodes that calls our lookup
+        # and data collection functions back.
+        self.count[:] = [0, len(self.neededmfs)]
+        outgoingmfs = self.outgoingmanifests(self.mf, self.neededmfs)
+
+        for chunk in self.mf.group(outgoingmfs, self.bundler,
+                                   reorder=self.reorder):
+            yield chunk
+        self.progress(_bundling, None)
+
+    def addfilegroups(self):
+        # Go through all our files in order sorted by name.
+        repo = self.repo
+        fstate = self.fstate
+        self.count[:] = [0, len(self.changedfiles)]
+        for fname in sorted(self.changedfiles):
+            filerevlog = repo.file(fname)
+
+            fstate[0] = fname
+            fstate[1] = self.outgoingfilemap(filerevlog, fname)
+
+            nodelist = fstate[1]
+            if nodelist:
+                self.count[0] += 1
+                yield self.bundler.fileheader(fname)
+                for chunk in filerevlog.group(nodelist,
+                                              self.bundler,
+                                              self.reorder):
+                    yield chunk
+
+    def outgoingmanifests(self, manifest, nodes):
+        # return the manifest nodes that are outgoing
+        rr, rl = manifest.rev, manifest.linkrev
+        clnode = self.cl.node
+        return [n for n in nodes if clnode(rl(rr(n))) in self.csets]
+
+    def outgoingfilemap(self, filerevlog, fname):
+        # map of outgoing file nodes to changelog nodes
+        mapping = {}
+        for r in filerevlog:
+            clnode = self.cl.node(filerevlog.linkrev(r))
+            if clnode in self.csets:
+                mapping[filerevlog.node(r)] = clnode
+        return mapping
+
+    def lookup(self, revlog, node):
+        if revlog == self.cl:
+            return self.lookupcommit(node)
+        elif revlog == self.mf:
+            return self.lookupmanifest(node)
+        else:
+            return self.lookupfile(revlog, node)
+
+    def lookupcommit(self, node):
+        c = self.cl.read(node)
+        self.changedfiles.update(c[3])
+        self.neededmfs.setdefault(c[0], node)
+        self.count[0] += 1
+        self.progress(_bundling, self.count[0],
+                      unit=_changesets, total=self.count[1])
+        return node
+
+    def lookupmanifest(self, node):
+        self.count[0] += 1
+        self.progress(_bundling, self.count[0],
+                      unit=_manifests, total=self.count[1])
+
+        return self.cl.node(self.mf.linkrev(self.mf.rev(node)))
+
+    def lookupfile(self, filelog, node):
+        self.progress(_bundling, self.count[0], item=self.fstate[0],
+                      unit=_files, total=self.count[1])
+        return self.fstate[1][node]
+
+class subsetchangegroupgen(changegroupgen):
+    def __init__(self, repo, csets, commonrevs, source, reorder):
+        super(subsetchangegroupgen, self).__init__(repo, csets, source, reorder)
+        self.commonrevs = commonrevs
+        self.fnodes = {}
+
+    def lookupmanifest(self, node):
+        clnode = self.neededmfs[node]
+        mdata = self.mf.readfast(node)
+        changedfiles = self.changedfiles
+        fnodes = self.fnodes
+        for f, n in mdata.iteritems():
+            if f in changedfiles:
+                nodes = fnodes.setdefault(f, {})
+                nodes.setdefault(n, clnode)
+
+        self.count[0] += 1
+        self.progress(_bundling, self.count[0],
+                      unit=_manifests, total=self.count[1])
+        return clnode
+
+    def outgoingmanifests(self, manifest, nodes):
+        rr, rl = manifest.rev, manifest.linkrev
+        return [n for n in nodes if rl(rr(n)) not in self.commonrevs]
+
+    def outgoingfilemap(self, filerevlog, fname):
+        # map of outgoing file nodes to changelog nodes
+        mapping = {}
+        for fnode, clnode in self.fnodes.pop(fname, {}).iteritems():
+            # filter any nodes that claim to be part of the known set
+            clrev = filerevlog.linkrev(filerevlog.rev(fnode))
+            if clrev not in self.commonrevs:
+                mapping[fnode] = clnode
+        return mapping
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -2022,15 +2022,6 @@
 
     @unfilteredmethod
     def _changegroupsubset(self, commonrevs, csets, heads, source):
-
-        cl = self.changelog
-        mf = self.manifest
-        mfs = {} # needed manifests
-        fnodes = {} # needed file nodes
-        changedfiles = set()
-        fstate = ['', {}]
-        count = [0, 0]
-
         # can we go through the fast path ?
         heads.sort()
         if heads == sorted(self.heads()):
@@ -2040,93 +2031,16 @@
         self.hook('preoutgoing', throw=True, source=source)
         self.changegroupinfo(csets, source)
 
-        # filter any nodes that claim to be part of the known set
-        def prune(revlog, missing):
-            rr, rl = revlog.rev, revlog.linkrev
-            return [n for n in missing
-                    if rl(rr(n)) not in commonrevs]
-
-        progress = self.ui.progress
-        _bundling = _('bundling')
-        _changesets = _('changesets')
-        _manifests = _('manifests')
-        _files = _('files')
-
-        def lookup(revlog, x):
-            if revlog == cl:
-                c = cl.read(x)
-                changedfiles.update(c[3])
-                mfs.setdefault(c[0], x)
-                count[0] += 1
-                progress(_bundling, count[0],
-                         unit=_changesets, total=count[1])
-                return x
-            elif revlog == mf:
-                clnode = mfs[x]
-                mdata = mf.readfast(x)
-                for f, n in mdata.iteritems():
-                    if f in changedfiles:
-                        fnodes[f].setdefault(n, clnode)
-                count[0] += 1
-                progress(_bundling, count[0],
-                         unit=_manifests, total=count[1])
-                return clnode
-            else:
-                progress(_bundling, count[0], item=fstate[0],
-                         unit=_files, total=count[1])
-                return fstate[1][x]
-
-        bundler = changegroup.bundle10(lookup)
         reorder = self.ui.config('bundle', 'reorder', 'auto')
         if reorder == 'auto':
             reorder = None
         else:
             reorder = util.parsebool(reorder)
 
-        def gengroup():
-            # Create a changenode group generator that will call our functions
-            # back to lookup the owning changenode and collect information.
-            count[:] = [0, len(csets)]
-            for chunk in cl.group(csets, bundler, reorder=reorder):
-                yield chunk
-            progress(_bundling, None)
+        gen = changegroup.subsetchangegroupgen(self, csets, commonrevs,
+                                               source, reorder)
 
-            # Create a generator for the manifestnodes that calls our lookup
-            # and data collection functions back.
-            for f in changedfiles:
-                fnodes[f] = {}
-            count[:] = [0, len(mfs)]
-            for chunk in mf.group(prune(mf, mfs), bundler, reorder=reorder):
-                yield chunk
-            progress(_bundling, None)
-
-            mfs.clear()
-
-            # Go through all our files in order sorted by name.
-            count[:] = [0, len(changedfiles)]
-            for fname in sorted(changedfiles):
-                filerevlog = self.file(fname)
-                if not len(filerevlog):
-                    raise util.Abort(_("empty or missing revlog for %s")
-                                     % fname)
-                fstate[0] = fname
-                fstate[1] = fnodes.pop(fname, {})
-
-                nodelist = prune(filerevlog, fstate[1])
-                if nodelist:
-                    count[0] += 1
-                    yield bundler.fileheader(fname)
-                    for chunk in filerevlog.group(nodelist, bundler, reorder):
-                        yield chunk
-
-            # Signal that no more groups are left.
-            yield bundler.close()
-            progress(_bundling, None)
-
-            if csets:
-                self.hook('outgoing', node=hex(csets[0]), source=source)
-
-        return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
+        return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN')
 
     def changegroup(self, basenodes, source):
         # to avoid a race we use changegroupsubset() (issue1320)
@@ -2143,88 +2057,18 @@
 
         nodes is the set of nodes to send"""
 
-        cl = self.changelog
-        mf = self.manifest
-        mfs = {}
-        changedfiles = set()
-        fstate = ['']
-        count = [0, 0]
-
         self.hook('preoutgoing', throw=True, source=source)
         self.changegroupinfo(nodes, source)
 
-        revset = set([cl.rev(n) for n in nodes])
-
-        def gennodelst(log):
-            ln, llr = log.node, log.linkrev
-            return [ln(r) for r in log if llr(r) in revset]
-
-        progress = self.ui.progress
-        _bundling = _('bundling')
-        _changesets = _('changesets')
-        _manifests = _('manifests')
-        _files = _('files')
-
-        def lookup(revlog, x):
-            if revlog == cl:
-                c = cl.read(x)
-                changedfiles.update(c[3])
-                mfs.setdefault(c[0], x)
-                count[0] += 1
-                progress(_bundling, count[0],
-                         unit=_changesets, total=count[1])
-                return x
-            elif revlog == mf:
-                count[0] += 1
-                progress(_bundling, count[0],
-                         unit=_manifests, total=count[1])
-                return cl.node(revlog.linkrev(revlog.rev(x)))
-            else:
-                progress(_bundling, count[0], item=fstate[0],
-                    total=count[1], unit=_files)
-                return cl.node(revlog.linkrev(revlog.rev(x)))
-
-        bundler = changegroup.bundle10(lookup)
         reorder = self.ui.config('bundle', 'reorder', 'auto')
         if reorder == 'auto':
             reorder = None
         else:
             reorder = util.parsebool(reorder)
 
-        def gengroup():
-            '''yield a sequence of changegroup chunks (strings)'''
-            # construct a list of all changed files
+        gen = changegroup.changegroupgen(self, nodes, source, reorder)
 
-            count[:] = [0, len(nodes)]
-            for chunk in cl.group(nodes, bundler, reorder=reorder):
-                yield chunk
-            progress(_bundling, None)
-
-            count[:] = [0, len(mfs)]
-            for chunk in mf.group(gennodelst(mf), bundler, reorder=reorder):
-                yield chunk
-            progress(_bundling, None)
-
-            count[:] = [0, len(changedfiles)]
-            for fname in sorted(changedfiles):
-                filerevlog = self.file(fname)
-                if not len(filerevlog):
-                    raise util.Abort(_("empty or missing revlog for %s")
-                                     % fname)
-                fstate[0] = fname
-                nodelist = gennodelst(filerevlog)
-                if nodelist:
-                    count[0] += 1
-                    yield bundler.fileheader(fname)
-                    for chunk in filerevlog.group(nodelist, bundler, reorder):
-                        yield chunk
-            yield bundler.close()
-            progress(_bundling, None)
-
-            if nodes:
-                self.hook('outgoing', node=hex(nodes[0]), source=source)
-
-        return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
+        return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN')
 
     @unfilteredmethod
     def addchangegroup(self, source, srctype, url, emptyok=False):


More information about the Mercurial-devel mailing list