[PATCH 4 of 4 V2] changegroup: refactor changegroup generation into a separate class

Augie Fackler raf at durin42.com
Tue May 7 12:15:25 CDT 2013


On Mon, May 06, 2013 at 08:14:50PM -0700, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham at fb.com>
> # Date 1367430936 25200
> #      Wed May 01 10:55:36 2013 -0700
> # Node ID c86e8d104d87bf958827d9713d4e2f22f7535117
> # Parent  dbdfdfcc9b14728cee940dd9f96398561bed0802
> changegroup: refactor changegroup generation into a separate class

Series LGTM, anyone else want a look?

>
> Previously changegroup generation was confined to _changegroup() and
> _changegroupsubset(). Both were massive functions and they had lots of
> duplicate code.
>
> I've moved all their logic into the changegroup module as new classes:
> ChangeGroupGen and SubsetChangeGroupGen. This breaks up all the different
> logic into separate functions so extensions can modify them individually
> and allows us to share most of the logic with the subset version. This
> also helps make localrepo less of a god class (-156 lines from localrepo,
> +157 lines to changegroup).

Code removed from localrepo. You get a cookie.

>
> There should be no behavior changes introduced by this.
>
>
> diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
> --- a/mercurial/changegroup.py
> +++ b/mercurial/changegroup.py
> @@ -6,7 +6,7 @@
>  # GNU General Public License version 2 or any later version.
>
>  from i18n import _
> -from node import nullrev
> +from node import nullrev, hex
>  import mdiff, util
>  import struct, os, bz2, zlib, tempfile
>
> @@ -254,3 +254,160 @@
>      def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
>          # do nothing with basenode, it is implicitly the previous one in HG10
>          return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
> +
> +_bundling = _('bundling')
> +_changesets = _('changesets')
> +_manifests = _('manifests')
> +_files = _('files')

I like this, but mpm may not be a fan of this underbar style. Matt?

> +
> +class changegroupgen(object):
> +    def __init__(self, repo, csets, source, reorder):
> +        self.repo = repo
> +        self.cl = repo.changelog
> +        self.mf = repo.manifest
> +        self.csets = csets
> +        self.changedfiles = set()
> +        self.source = source
> +        self.neededmfs = {}
> +        self.reorder = reorder
> +        self.bundler = bundle10(self.lookup)
> +        self.fstate = ['', {}]
> +        self.count = [0, 0]
> +        self.progress = repo.ui.progress
> +
> +    def gengroup(self):
> +        for chunk in self.addcommitgroups():
> +            yield chunk
> +        for chunk in self.addmanifestgroups():
> +            yield chunk
> +        for chunk in self.addfilegroups():
> +            yield chunk
> +
> +        # Signal that no more groups are left.
> +        yield self.bundler.close()
> +        self.progress(_bundling, None)
> +
> +        if self.csets:
> +            self.repo.hook('outgoing',
> +                           node = hex(self.csets[0]),
> +                           source = self.source)
> +
> +    def addcommitgroups(self):
> +        # Create a changenode group generator that will call our functions
> +        # back to lookup the owning changenode and collect information.
> +        self.count[:] = [0, len(self.csets)]
> +        for chunk in self.cl.group(self.csets,
> +                                   self.bundler,
> +                                   reorder=self.reorder):
> +            yield chunk
> +        self.progress(_bundling, None)
> +
> +    def addmanifestgroups(self):
> +        # Create a generator for the manifestnodes that calls our lookup
> +        # and data collection functions back.
> +        self.count[:] = [0, len(self.neededmfs)]
> +        outgoingmfs = self.outgoingmanifests(self.mf, self.neededmfs)
> +
> +        for chunk in self.mf.group(outgoingmfs, self.bundler,
> +                                   reorder=self.reorder):
> +            yield chunk
> +        self.progress(_bundling, None)
> +
> +    def addfilegroups(self):
> +        # Go through all our files in order sorted by name.
> +        repo = self.repo
> +        fstate = self.fstate
> +        self.count[:] = [0, len(self.changedfiles)]
> +        for fname in sorted(self.changedfiles):
> +            filerevlog = repo.file(fname)
> +
> +            fstate[0] = fname
> +            fstate[1] = self.outgoingfilemap(filerevlog, fname)
> +
> +            nodelist = fstate[1]
> +            if nodelist:
> +                self.count[0] += 1
> +                yield self.bundler.fileheader(fname)
> +                for chunk in filerevlog.group(nodelist,
> +                                              self.bundler,
> +                                              self.reorder):
> +                    yield chunk
> +
> +    def outgoingmanifests(self, manifest, nodes):
> +        # return the manifest nodes that are outgoing
> +        rr, rl = manifest.rev, manifest.linkrev
> +        clnode = self.cl.node
> +        return [n for n in nodes if clnode(rl(rr(n))) in self.csets]
> +
> +    def outgoingfilemap(self, filerevlog, fname):
> +        # map of outgoing file nodes to changelog nodes
> +        mapping = {}
> +        for r in filerevlog:
> +            clnode = self.cl.node(filerevlog.linkrev(r))
> +            if clnode in self.csets:
> +                mapping[filerevlog.node(r)] = clnode
> +        return mapping
> +
> +    def lookup(self, revlog, node):
> +        if revlog == self.cl:
> +            return self.lookupcommit(node)
> +        elif revlog == self.mf:
> +            return self.lookupmanifest(node)
> +        else:
> +            return self.lookupfile(revlog, node)
> +
> +    def lookupcommit(self, node):
> +        c = self.cl.read(node)
> +        self.changedfiles.update(c[3])
> +        self.neededmfs.setdefault(c[0], node)
> +        self.count[0] += 1
> +        self.progress(_bundling, self.count[0],
> +                      unit=_changesets, total=self.count[1])
> +        return node
> +
> +    def lookupmanifest(self, node):
> +        self.count[0] += 1
> +        self.progress(_bundling, self.count[0],
> +                      unit=_manifests, total=self.count[1])
> +
> +        return self.cl.node(self.mf.linkrev(self.mf.rev(node)))
> +
> +    def lookupfile(self, filelog, node):
> +        self.progress(_bundling, self.count[0], item=self.fstate[0],
> +                      unit=_files, total=self.count[1])
> +        return self.fstate[1][node]
> +
> +class subsetchangegroupgen(changegroupgen):
> +    def __init__(self, repo, csets, commonrevs, source, reorder):
> +        super(subsetchangegroupgen, self).__init__(repo, csets, source, reorder)
> +        self.commonrevs = commonrevs
> +        self.fnodes = {}
> +
> +    def lookupmanifest(self, node):
> +        clnode = self.neededmfs[node]
> +        mdata = self.mf.readfast(node)
> +        changedfiles = self.changedfiles
> +        fnodes = self.fnodes
> +        for f, n in mdata.iteritems():
> +            if f in changedfiles:
> +                nodes = fnodes.setdefault(f, {})
> +                nodes.setdefault(n, clnode)
> +
> +        self.count[0] += 1
> +        self.progress(_bundling, self.count[0],
> +                      unit=_manifests, total=self.count[1])
> +        return clnode
> +
> +    def outgoingmanifests(self, manifest, nodes):
> +        rr, rl = manifest.rev, manifest.linkrev
> +        return [n for n in nodes if rl(rr(n)) not in self.commonrevs]
> +
> +    def outgoingfilemap(self, filerevlog, fname):
> +        # map of outgoing file nodes to changelog nodes
> +        mapping = {}
> +        for fnode, clnode in self.fnodes.pop(fname, {}).iteritems():
> +            # filter any nodes that claim to be part of the known set
> +            clrev = filerevlog.linkrev(filerevlog.rev(fnode))
> +            if clrev not in self.commonrevs:
> +                mapping[fnode] = clnode
> +        return mapping
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -2022,15 +2022,6 @@
>
>      @unfilteredmethod
>      def _changegroupsubset(self, commonrevs, csets, heads, source):
> -
> -        cl = self.changelog
> -        mf = self.manifest
> -        mfs = {} # needed manifests
> -        fnodes = {} # needed file nodes
> -        changedfiles = set()
> -        fstate = ['', {}]
> -        count = [0, 0]
> -
>          # can we go through the fast path ?
>          heads.sort()
>          if heads == sorted(self.heads()):
> @@ -2040,93 +2031,16 @@
>          self.hook('preoutgoing', throw=True, source=source)
>          self.changegroupinfo(csets, source)
>
> -        # filter any nodes that claim to be part of the known set
> -        def prune(revlog, missing):
> -            rr, rl = revlog.rev, revlog.linkrev
> -            return [n for n in missing
> -                    if rl(rr(n)) not in commonrevs]
> -
> -        progress = self.ui.progress
> -        _bundling = _('bundling')
> -        _changesets = _('changesets')
> -        _manifests = _('manifests')
> -        _files = _('files')
> -
> -        def lookup(revlog, x):
> -            if revlog == cl:
> -                c = cl.read(x)
> -                changedfiles.update(c[3])
> -                mfs.setdefault(c[0], x)
> -                count[0] += 1
> -                progress(_bundling, count[0],
> -                         unit=_changesets, total=count[1])
> -                return x
> -            elif revlog == mf:
> -                clnode = mfs[x]
> -                mdata = mf.readfast(x)
> -                for f, n in mdata.iteritems():
> -                    if f in changedfiles:
> -                        fnodes[f].setdefault(n, clnode)
> -                count[0] += 1
> -                progress(_bundling, count[0],
> -                         unit=_manifests, total=count[1])
> -                return clnode
> -            else:
> -                progress(_bundling, count[0], item=fstate[0],
> -                         unit=_files, total=count[1])
> -                return fstate[1][x]
> -
> -        bundler = changegroup.bundle10(lookup)
>          reorder = self.ui.config('bundle', 'reorder', 'auto')
>          if reorder == 'auto':
>              reorder = None
>          else:
>              reorder = util.parsebool(reorder)
>
> -        def gengroup():
> -            # Create a changenode group generator that will call our functions
> -            # back to lookup the owning changenode and collect information.
> -            count[:] = [0, len(csets)]
> -            for chunk in cl.group(csets, bundler, reorder=reorder):
> -                yield chunk
> -            progress(_bundling, None)
> +        gen = changegroup.subsetchangegroupgen(self, csets, commonrevs,
> +                                               source, reorder)
>
> -            # Create a generator for the manifestnodes that calls our lookup
> -            # and data collection functions back.
> -            for f in changedfiles:
> -                fnodes[f] = {}
> -            count[:] = [0, len(mfs)]
> -            for chunk in mf.group(prune(mf, mfs), bundler, reorder=reorder):
> -                yield chunk
> -            progress(_bundling, None)
> -
> -            mfs.clear()
> -
> -            # Go through all our files in order sorted by name.
> -            count[:] = [0, len(changedfiles)]
> -            for fname in sorted(changedfiles):
> -                filerevlog = self.file(fname)
> -                if not len(filerevlog):
> -                    raise util.Abort(_("empty or missing revlog for %s")
> -                                     % fname)
> -                fstate[0] = fname
> -                fstate[1] = fnodes.pop(fname, {})
> -
> -                nodelist = prune(filerevlog, fstate[1])
> -                if nodelist:
> -                    count[0] += 1
> -                    yield bundler.fileheader(fname)
> -                    for chunk in filerevlog.group(nodelist, bundler, reorder):
> -                        yield chunk
> -
> -            # Signal that no more groups are left.
> -            yield bundler.close()
> -            progress(_bundling, None)
> -
> -            if csets:
> -                self.hook('outgoing', node=hex(csets[0]), source=source)
> -
> -        return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
> +        return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN')
>
>      def changegroup(self, basenodes, source):
>          # to avoid a race we use changegroupsubset() (issue1320)
> @@ -2143,88 +2057,18 @@
>
>          nodes is the set of nodes to send"""
>
> -        cl = self.changelog
> -        mf = self.manifest
> -        mfs = {}
> -        changedfiles = set()
> -        fstate = ['']
> -        count = [0, 0]
> -
>          self.hook('preoutgoing', throw=True, source=source)
>          self.changegroupinfo(nodes, source)
>
> -        revset = set([cl.rev(n) for n in nodes])
> -
> -        def gennodelst(log):
> -            ln, llr = log.node, log.linkrev
> -            return [ln(r) for r in log if llr(r) in revset]
> -
> -        progress = self.ui.progress
> -        _bundling = _('bundling')
> -        _changesets = _('changesets')
> -        _manifests = _('manifests')
> -        _files = _('files')
> -
> -        def lookup(revlog, x):
> -            if revlog == cl:
> -                c = cl.read(x)
> -                changedfiles.update(c[3])
> -                mfs.setdefault(c[0], x)
> -                count[0] += 1
> -                progress(_bundling, count[0],
> -                         unit=_changesets, total=count[1])
> -                return x
> -            elif revlog == mf:
> -                count[0] += 1
> -                progress(_bundling, count[0],
> -                         unit=_manifests, total=count[1])
> -                return cl.node(revlog.linkrev(revlog.rev(x)))
> -            else:
> -                progress(_bundling, count[0], item=fstate[0],
> -                    total=count[1], unit=_files)
> -                return cl.node(revlog.linkrev(revlog.rev(x)))
> -
> -        bundler = changegroup.bundle10(lookup)
>          reorder = self.ui.config('bundle', 'reorder', 'auto')
>          if reorder == 'auto':
>              reorder = None
>          else:
>              reorder = util.parsebool(reorder)
>
> -        def gengroup():
> -            '''yield a sequence of changegroup chunks (strings)'''
> -            # construct a list of all changed files
> +        gen = changegroup.changegroupgen(self, nodes, source, reorder)
>
> -            count[:] = [0, len(nodes)]
> -            for chunk in cl.group(nodes, bundler, reorder=reorder):
> -                yield chunk
> -            progress(_bundling, None)
> -
> -            count[:] = [0, len(mfs)]
> -            for chunk in mf.group(gennodelst(mf), bundler, reorder=reorder):
> -                yield chunk
> -            progress(_bundling, None)
> -
> -            count[:] = [0, len(changedfiles)]
> -            for fname in sorted(changedfiles):
> -                filerevlog = self.file(fname)
> -                if not len(filerevlog):
> -                    raise util.Abort(_("empty or missing revlog for %s")
> -                                     % fname)
> -                fstate[0] = fname
> -                nodelist = gennodelst(filerevlog)
> -                if nodelist:
> -                    count[0] += 1
> -                    yield bundler.fileheader(fname)
> -                    for chunk in filerevlog.group(nodelist, bundler, reorder):
> -                        yield chunk
> -            yield bundler.close()
> -            progress(_bundling, None)
> -
> -            if nodes:
> -                self.hook('outgoing', node=hex(nodes[0]), source=source)
> -
> -        return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
> +        return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN')
>
>      @unfilteredmethod
>      def addchangegroup(self, source, srctype, url, emptyok=False):
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list