[PATCH 4 of 4 V2] changegroup: refactor changegroup generation into a separate class
Augie Fackler
raf at durin42.com
Tue May 7 12:15:25 CDT 2013
On Mon, May 06, 2013 at 08:14:50PM -0700, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham at fb.com>
> # Date 1367430936 25200
> # Wed May 01 10:55:36 2013 -0700
> # Node ID c86e8d104d87bf958827d9713d4e2f22f7535117
> # Parent dbdfdfcc9b14728cee940dd9f96398561bed0802
> changegroup: refactor changegroup generation into a separate class
Series LGTM, anyone else want a look?
>
> Previously changegroup generation was confined to _changegroup() and
> _changegroupsubset(). Both were massive functions and they had lots of
> duplicate code.
>
> I've moved all their logic into the changegroup module as new classes:
> ChangeGroupGen and SubsetChangeGroupGen. This breaks up all the different
> logic into separate functions so extensions can modify them individually
> and allows us to share most of the logic with the subset version. This
> also helps make localrepo less of a god class (-156 lines from localrepo,
> +157 lines to changegroup).
Code removed from localrepo. You get a cookie.
>
> There should be no behavior changes introduced by this.
>
>
> diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
> --- a/mercurial/changegroup.py
> +++ b/mercurial/changegroup.py
> @@ -6,7 +6,7 @@
> # GNU General Public License version 2 or any later version.
>
> from i18n import _
> -from node import nullrev
> +from node import nullrev, hex
> import mdiff, util
> import struct, os, bz2, zlib, tempfile
>
> @@ -254,3 +254,160 @@
> def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
> # do nothing with basenode, it is implicitly the previous one in HG10
> return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
> +
> +_bundling = _('bundling')
> +_changesets = _('changesets')
> +_manifests = _('manifests')
> +_files = _('files')
I like this, but mpm may not be a fan of this underbar style. Matt?
> +
> +class changegroupgen(object):
> + def __init__(self, repo, csets, source, reorder):
> + self.repo = repo
> + self.cl = repo.changelog
> + self.mf = repo.manifest
> + self.csets = csets
> + self.changedfiles = set()
> + self.source = source
> + self.neededmfs = {}
> + self.reorder = reorder
> + self.bundler = bundle10(self.lookup)
> + self.fstate = ['', {}]
> + self.count = [0, 0]
> + self.progress = repo.ui.progress
> +
> + def gengroup(self):
> + for chunk in self.addcommitgroups():
> + yield chunk
> + for chunk in self.addmanifestgroups():
> + yield chunk
> + for chunk in self.addfilegroups():
> + yield chunk
> +
> + # Signal that no more groups are left.
> + yield self.bundler.close()
> + self.progress(_bundling, None)
> +
> + if self.csets:
> + self.repo.hook('outgoing',
> + node = hex(self.csets[0]),
> + source = self.source)
> +
> + def addcommitgroups(self):
> + # Create a changenode group generator that will call our functions
> + # back to lookup the owning changenode and collect information.
> + self.count[:] = [0, len(self.csets)]
> + for chunk in self.cl.group(self.csets,
> + self.bundler,
> + reorder=self.reorder):
> + yield chunk
> + self.progress(_bundling, None)
> +
> + def addmanifestgroups(self):
> + # Create a generator for the manifestnodes that calls our lookup
> + # and data collection functions back.
> + self.count[:] = [0, len(self.neededmfs)]
> + outgoingmfs = self.outgoingmanifests(self.mf, self.neededmfs)
> +
> + for chunk in self.mf.group(outgoingmfs, self.bundler,
> + reorder=self.reorder):
> + yield chunk
> + self.progress(_bundling, None)
> +
> + def addfilegroups(self):
> + # Go through all our files in order sorted by name.
> + repo = self.repo
> + fstate = self.fstate
> + self.count[:] = [0, len(self.changedfiles)]
> + for fname in sorted(self.changedfiles):
> + filerevlog = repo.file(fname)
> +
> + fstate[0] = fname
> + fstate[1] = self.outgoingfilemap(filerevlog, fname)
> +
> + nodelist = fstate[1]
> + if nodelist:
> + self.count[0] += 1
> + yield self.bundler.fileheader(fname)
> + for chunk in filerevlog.group(nodelist,
> + self.bundler,
> + self.reorder):
> + yield chunk
> +
> + def outgoingmanifests(self, manifest, nodes):
> + # return the manifest nodes that are outgoing
> + rr, rl = manifest.rev, manifest.linkrev
> + clnode = self.cl.node
> + return [n for n in nodes if clnode(rl(rr(n))) in self.csets]
> +
> + def outgoingfilemap(self, filerevlog, fname):
> + # map of outgoing file nodes to changelog nodes
> + mapping = {}
> + for r in filerevlog:
> + clnode = self.cl.node(filerevlog.linkrev(r))
> + if clnode in self.csets:
> + mapping[filerevlog.node(r)] = clnode
> + return mapping
> +
> + def lookup(self, revlog, node):
> + if revlog == self.cl:
> + return self.lookupcommit(node)
> + elif revlog == self.mf:
> + return self.lookupmanifest(node)
> + else:
> + return self.lookupfile(revlog, node)
> +
> + def lookupcommit(self, node):
> + c = self.cl.read(node)
> + self.changedfiles.update(c[3])
> + self.neededmfs.setdefault(c[0], node)
> + self.count[0] += 1
> + self.progress(_bundling, self.count[0],
> + unit=_changesets, total=self.count[1])
> + return node
> +
> + def lookupmanifest(self, node):
> + self.count[0] += 1
> + self.progress(_bundling, self.count[0],
> + unit=_manifests, total=self.count[1])
> +
> + return self.cl.node(self.mf.linkrev(self.mf.rev(node)))
> +
> + def lookupfile(self, filelog, node):
> + self.progress(_bundling, self.count[0], item=self.fstate[0],
> + unit=_files, total=self.count[1])
> + return self.fstate[1][node]
> +
> +class subsetchangegroupgen(changegroupgen):
> + def __init__(self, repo, csets, commonrevs, source, reorder):
> + super(subsetchangegroupgen, self).__init__(repo, csets, source, reorder)
> + self.commonrevs = commonrevs
> + self.fnodes = {}
> +
> + def lookupmanifest(self, node):
> + clnode = self.neededmfs[node]
> + mdata = self.mf.readfast(node)
> + changedfiles = self.changedfiles
> + fnodes = self.fnodes
> + for f, n in mdata.iteritems():
> + if f in changedfiles:
> + nodes = fnodes.setdefault(f, {})
> + nodes.setdefault(n, clnode)
> +
> + self.count[0] += 1
> + self.progress(_bundling, self.count[0],
> + unit=_manifests, total=self.count[1])
> + return clnode
> +
> + def outgoingmanifests(self, manifest, nodes):
> + rr, rl = manifest.rev, manifest.linkrev
> + return [n for n in nodes if rl(rr(n)) not in self.commonrevs]
> +
> + def outgoingfilemap(self, filerevlog, fname):
> + # map of outgoing file nodes to changelog nodes
> + mapping = {}
> + for fnode, clnode in self.fnodes.pop(fname, {}).iteritems():
> + # filter any nodes that claim to be part of the known set
> + clrev = filerevlog.linkrev(filerevlog.rev(fnode))
> + if clrev not in self.commonrevs:
> + mapping[fnode] = clnode
> + return mapping
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -2022,15 +2022,6 @@
>
> @unfilteredmethod
> def _changegroupsubset(self, commonrevs, csets, heads, source):
> -
> - cl = self.changelog
> - mf = self.manifest
> - mfs = {} # needed manifests
> - fnodes = {} # needed file nodes
> - changedfiles = set()
> - fstate = ['', {}]
> - count = [0, 0]
> -
> # can we go through the fast path ?
> heads.sort()
> if heads == sorted(self.heads()):
> @@ -2040,93 +2031,16 @@
> self.hook('preoutgoing', throw=True, source=source)
> self.changegroupinfo(csets, source)
>
> - # filter any nodes that claim to be part of the known set
> - def prune(revlog, missing):
> - rr, rl = revlog.rev, revlog.linkrev
> - return [n for n in missing
> - if rl(rr(n)) not in commonrevs]
> -
> - progress = self.ui.progress
> - _bundling = _('bundling')
> - _changesets = _('changesets')
> - _manifests = _('manifests')
> - _files = _('files')
> -
> - def lookup(revlog, x):
> - if revlog == cl:
> - c = cl.read(x)
> - changedfiles.update(c[3])
> - mfs.setdefault(c[0], x)
> - count[0] += 1
> - progress(_bundling, count[0],
> - unit=_changesets, total=count[1])
> - return x
> - elif revlog == mf:
> - clnode = mfs[x]
> - mdata = mf.readfast(x)
> - for f, n in mdata.iteritems():
> - if f in changedfiles:
> - fnodes[f].setdefault(n, clnode)
> - count[0] += 1
> - progress(_bundling, count[0],
> - unit=_manifests, total=count[1])
> - return clnode
> - else:
> - progress(_bundling, count[0], item=fstate[0],
> - unit=_files, total=count[1])
> - return fstate[1][x]
> -
> - bundler = changegroup.bundle10(lookup)
> reorder = self.ui.config('bundle', 'reorder', 'auto')
> if reorder == 'auto':
> reorder = None
> else:
> reorder = util.parsebool(reorder)
>
> - def gengroup():
> - # Create a changenode group generator that will call our functions
> - # back to lookup the owning changenode and collect information.
> - count[:] = [0, len(csets)]
> - for chunk in cl.group(csets, bundler, reorder=reorder):
> - yield chunk
> - progress(_bundling, None)
> + gen = changegroup.subsetchangegroupgen(self, csets, commonrevs,
> + source, reorder)
>
> - # Create a generator for the manifestnodes that calls our lookup
> - # and data collection functions back.
> - for f in changedfiles:
> - fnodes[f] = {}
> - count[:] = [0, len(mfs)]
> - for chunk in mf.group(prune(mf, mfs), bundler, reorder=reorder):
> - yield chunk
> - progress(_bundling, None)
> -
> - mfs.clear()
> -
> - # Go through all our files in order sorted by name.
> - count[:] = [0, len(changedfiles)]
> - for fname in sorted(changedfiles):
> - filerevlog = self.file(fname)
> - if not len(filerevlog):
> - raise util.Abort(_("empty or missing revlog for %s")
> - % fname)
> - fstate[0] = fname
> - fstate[1] = fnodes.pop(fname, {})
> -
> - nodelist = prune(filerevlog, fstate[1])
> - if nodelist:
> - count[0] += 1
> - yield bundler.fileheader(fname)
> - for chunk in filerevlog.group(nodelist, bundler, reorder):
> - yield chunk
> -
> - # Signal that no more groups are left.
> - yield bundler.close()
> - progress(_bundling, None)
> -
> - if csets:
> - self.hook('outgoing', node=hex(csets[0]), source=source)
> -
> - return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
> + return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN')
>
> def changegroup(self, basenodes, source):
> # to avoid a race we use changegroupsubset() (issue1320)
> @@ -2143,88 +2057,18 @@
>
> nodes is the set of nodes to send"""
>
> - cl = self.changelog
> - mf = self.manifest
> - mfs = {}
> - changedfiles = set()
> - fstate = ['']
> - count = [0, 0]
> -
> self.hook('preoutgoing', throw=True, source=source)
> self.changegroupinfo(nodes, source)
>
> - revset = set([cl.rev(n) for n in nodes])
> -
> - def gennodelst(log):
> - ln, llr = log.node, log.linkrev
> - return [ln(r) for r in log if llr(r) in revset]
> -
> - progress = self.ui.progress
> - _bundling = _('bundling')
> - _changesets = _('changesets')
> - _manifests = _('manifests')
> - _files = _('files')
> -
> - def lookup(revlog, x):
> - if revlog == cl:
> - c = cl.read(x)
> - changedfiles.update(c[3])
> - mfs.setdefault(c[0], x)
> - count[0] += 1
> - progress(_bundling, count[0],
> - unit=_changesets, total=count[1])
> - return x
> - elif revlog == mf:
> - count[0] += 1
> - progress(_bundling, count[0],
> - unit=_manifests, total=count[1])
> - return cl.node(revlog.linkrev(revlog.rev(x)))
> - else:
> - progress(_bundling, count[0], item=fstate[0],
> - total=count[1], unit=_files)
> - return cl.node(revlog.linkrev(revlog.rev(x)))
> -
> - bundler = changegroup.bundle10(lookup)
> reorder = self.ui.config('bundle', 'reorder', 'auto')
> if reorder == 'auto':
> reorder = None
> else:
> reorder = util.parsebool(reorder)
>
> - def gengroup():
> - '''yield a sequence of changegroup chunks (strings)'''
> - # construct a list of all changed files
> + gen = changegroup.changegroupgen(self, nodes, source, reorder)
>
> - count[:] = [0, len(nodes)]
> - for chunk in cl.group(nodes, bundler, reorder=reorder):
> - yield chunk
> - progress(_bundling, None)
> -
> - count[:] = [0, len(mfs)]
> - for chunk in mf.group(gennodelst(mf), bundler, reorder=reorder):
> - yield chunk
> - progress(_bundling, None)
> -
> - count[:] = [0, len(changedfiles)]
> - for fname in sorted(changedfiles):
> - filerevlog = self.file(fname)
> - if not len(filerevlog):
> - raise util.Abort(_("empty or missing revlog for %s")
> - % fname)
> - fstate[0] = fname
> - nodelist = gennodelst(filerevlog)
> - if nodelist:
> - count[0] += 1
> - yield bundler.fileheader(fname)
> - for chunk in filerevlog.group(nodelist, bundler, reorder):
> - yield chunk
> - yield bundler.close()
> - progress(_bundling, None)
> -
> - if nodes:
> - self.hook('outgoing', node=hex(nodes[0]), source=source)
> -
> - return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN')
> + return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN')
>
> @unfilteredmethod
> def addchangegroup(self, source, srctype, url, emptyok=False):
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list