[PATCH 7 of 9] bundle-ng: simplify bundle10.generate a bit
Sune Foldager
cryo at cyanite.org
Thu Feb 14 17:07:25 CST 2013
# HG changeset patch
# User Sune Foldager <cryo at cyanite.org>
# Date 1360509790 -3600
# Node ID 45c57eaf3b41b1493169ffe474926b793bb12db9
# Parent da249fd44d65728ccc8d1fe926e951a771cec558
bundle-ng: simplify bundle10.generate a bit
diff -r da249fd44d65 -r 45c57eaf3b41 contrib/shrink-revlog.py
--- a/contrib/shrink-revlog.py Sun Feb 10 16:03:20 2013 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,295 +0,0 @@
-"""reorder a revlog (the manifest by default) to save space
-
-Specifically, this topologically sorts the revisions in the revlog so that
-revisions on the same branch are adjacent as much as possible. This is a
-workaround for the fact that Mercurial computes deltas relative to the
-previous revision rather than relative to a parent revision.
-
-This is *not* safe to run on a changelog.
-"""
-
-# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
-# as a patch to rewrite-log. Cleaned up, refactored, documented, and
-# renamed by Greg Ward <greg at gerg.ca>.
-
-# XXX would be nice to have a way to verify the repository after shrinking,
-# e.g. by comparing "before" and "after" states of random changesets
-# (maybe: export before, shrink, export after, diff).
-
-import os, errno
-from mercurial import revlog, transaction, node, util, scmutil
-from mercurial import changegroup
-from mercurial.i18n import _
-
-
-def postorder(start, edges):
- result = []
- visit = list(start)
- finished = set()
-
- while visit:
- cur = visit[-1]
- for p in edges[cur]:
- # defend against node.nullrev because it's occasionally
- # possible for a node to have parents (null, something)
- # rather than (something, null)
- if p not in finished and p != node.nullrev:
- visit.append(p)
- break
- else:
- result.append(cur)
- finished.add(cur)
- visit.pop()
-
- return result
-
-def toposort_reversepostorder(ui, rl):
- # postorder of the reverse directed graph
-
- # map rev to list of parent revs (p2 first)
- parents = {}
- heads = set()
- ui.status(_('reading revs\n'))
- try:
- for rev in rl:
- ui.progress(_('reading'), rev, total=len(rl))
- (p1, p2) = rl.parentrevs(rev)
- if p1 == p2 == node.nullrev:
- parents[rev] = () # root node
- elif p1 == p2 or p2 == node.nullrev:
- parents[rev] = (p1,) # normal node
- else:
- parents[rev] = (p2, p1) # merge node
- heads.add(rev)
- for p in parents[rev]:
- heads.discard(p)
- finally:
- ui.progress(_('reading'), None)
-
- heads = list(heads)
- heads.sort(reverse=True)
-
- ui.status(_('sorting revs\n'))
- return postorder(heads, parents)
-
-def toposort_postorderreverse(ui, rl):
- # reverse-postorder of the reverse directed graph
-
- children = {}
- roots = set()
- ui.status(_('reading revs\n'))
- try:
- for rev in rl:
- ui.progress(_('reading'), rev, total=len(rl))
- (p1, p2) = rl.parentrevs(rev)
- if p1 == p2 == node.nullrev:
- roots.add(rev)
- children[rev] = []
- if p1 != node.nullrev:
- children[p1].append(rev)
- if p2 != node.nullrev:
- children[p2].append(rev)
- finally:
- ui.progress(_('reading'), None)
-
- roots = list(roots)
- roots.sort()
-
- ui.status(_('sorting revs\n'))
- result = postorder(roots, children)
- result.reverse()
- return result
-
-def writerevs(ui, r1, r2, order, tr):
-
- ui.status(_('writing revs\n'))
-
-
- order = [r1.node(r) for r in order]
-
- # this is a bit ugly, but it works
- count = [0]
- def lookup(revl, x):
- count[0] += 1
- ui.progress(_('writing'), count[0], total=len(order))
- return "%020d" % revl.linkrev(revl.rev(x))
-
- unlookup = lambda x: int(x, 10)
-
- try:
- bundler = changegroup.bundle10()
- bundler.start(lookup)
- group = util.chunkbuffer(bundler.group(order, r1))
- group = changegroup.unbundle10(group, "UN")
- r2.addgroup(group, unlookup, tr)
- finally:
- ui.progress(_('writing'), None)
-
-def report(ui, r1, r2):
- def getsize(r):
- s = 0
- for fn in (r.indexfile, r.datafile):
- try:
- s += os.stat(fn).st_size
- except OSError, inst:
- if inst.errno != errno.ENOENT:
- raise
- return s
-
- oldsize = float(getsize(r1))
- newsize = float(getsize(r2))
-
- # argh: have to pass an int to %d, because a float >= 2^32
- # blows up under Python 2.5 or earlier
- ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
- % (int(oldsize), oldsize / 1024 / 1024))
- ui.write(_('new file size: %12d bytes (%6.1f MiB)\n')
- % (int(newsize), newsize / 1024 / 1024))
-
- shrink_percent = (oldsize - newsize) / oldsize * 100
- shrink_factor = oldsize / newsize
- ui.write(_('shrinkage: %.1f%% (%.1fx)\n')
- % (shrink_percent, shrink_factor))
-
-def shrink(ui, repo, **opts):
- """shrink a revlog by reordering revisions
-
- Rewrites all the entries in some revlog of the current repository
- (by default, the manifest log) to save space.
-
- Different sort algorithms have different performance
- characteristics. Use ``--sort`` to select a sort algorithm so you
- can determine which works best for your data.
- """
-
- if not repo.local():
- raise util.Abort(_('not a local repository: %s') % repo.root)
-
- fn = opts.get('revlog')
- if not fn:
- indexfn = repo.sjoin('00manifest.i')
- else:
- if not fn.endswith('.i'):
- raise util.Abort(_('--revlog option must specify the revlog index '
- 'file (*.i), not %s') % opts.get('revlog'))
-
- indexfn = os.path.realpath(fn)
- store = repo.sjoin('')
- if not indexfn.startswith(store):
- raise util.Abort(_('--revlog option must specify a revlog in %s, '
- 'not %s') % (store, indexfn))
-
- sortname = opts['sort']
- try:
- toposort = globals()['toposort_' + sortname]
- except KeyError:
- raise util.Abort(_('no such toposort algorithm: %s') % sortname)
-
- if not os.path.exists(indexfn):
- raise util.Abort(_('no such file: %s') % indexfn)
- if '00changelog' in indexfn:
- raise util.Abort(_('shrinking the changelog '
- 'will corrupt your repository'))
-
- ui.write(_('shrinking %s\n') % indexfn)
- tmpindexfn = util.mktempcopy(indexfn, emptyok=True)
-
- r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn)
- r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn)
-
- datafn, tmpdatafn = r1.datafile, r2.datafile
-
- oldindexfn = indexfn + '.old'
- olddatafn = datafn + '.old'
- if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
- raise util.Abort(_('one or both of\n'
- ' %s\n'
- ' %s\n'
- 'exists from a previous run; please clean up '
- 'before running again') % (oldindexfn, olddatafn))
-
- # Don't use repo.transaction(), because then things get hairy with
- # paths: some need to be relative to .hg, and some need to be
- # absolute. Doing it this way keeps things simple: everything is an
- # absolute path.
- lock = repo.lock(wait=False)
- tr = transaction.transaction(ui.warn,
- open,
- repo.sjoin('journal'))
-
- def ignoremissing(func):
- def f(*args, **kw):
- try:
- return func(*args, **kw)
- except OSError, inst:
- if inst.errno != errno.ENOENT:
- raise
- return f
-
- try:
- try:
- order = toposort(ui, r1)
-
- suboptimal = 0
- for i in xrange(1, len(order)):
- parents = [p for p in r1.parentrevs(order[i])
- if p != node.nullrev]
- if parents and order[i - 1] not in parents:
- suboptimal += 1
- ui.note(_('%d suboptimal nodes\n') % suboptimal)
-
- writerevs(ui, r1, r2, order, tr)
- report(ui, r1, r2)
- tr.close()
- except: # re-raises
- # Abort transaction first, so we truncate the files before
- # deleting them.
- tr.abort()
- for fn in (tmpindexfn, tmpdatafn):
- ignoremissing(os.unlink)(fn)
- raise
- if not opts.get('dry_run'):
- # racy, both files cannot be renamed atomically
- # copy files
- util.oslink(indexfn, oldindexfn)
- ignoremissing(util.oslink)(datafn, olddatafn)
-
- # rename
- util.rename(tmpindexfn, indexfn)
- try:
- os.chmod(tmpdatafn, os.stat(datafn).st_mode)
- util.rename(tmpdatafn, datafn)
- except OSError, inst:
- if inst.errno != errno.ENOENT:
- raise
- ignoremissing(os.unlink)(datafn)
- else:
- for fn in (tmpindexfn, tmpdatafn):
- ignoremissing(os.unlink)(fn)
- finally:
- lock.release()
-
- if not opts.get('dry_run'):
- ui.write(
- _('note: old revlog saved in:\n'
- ' %s\n'
- ' %s\n'
- '(You can delete those files when you are satisfied that your\n'
- 'repository is still sane. '
- 'Running \'hg verify\' is strongly recommended.)\n')
- % (oldindexfn, olddatafn))
-
-cmdtable = {
- 'shrink': (shrink,
- [('', 'revlog', '',
- _('the revlog to shrink (.i)')),
- ('n', 'dry-run', None,
- _('do not shrink, simulate only')),
- ('', 'sort', 'reversepostorder',
- _('name of sort algorithm to use')),
- ],
- _('hg shrink [--revlog PATH]'))
-}
-
-if __name__ == "__main__":
- print "shrink-revlog.py is now an extension (see hg help extensions)"
diff -r da249fd44d65 -r 45c57eaf3b41 mercurial/changegroup.py
--- a/mercurial/changegroup.py Sun Feb 10 16:03:20 2013 +0100
+++ b/mercurial/changegroup.py Sun Feb 10 16:23:10 2013 +0100
@@ -243,8 +243,6 @@
self._repo = repo
self._reorder = reorder
self.count = [0, 0]
- def start(self, lookup):
- self._lookup = lookup
def close(self):
return closechunk()
@@ -338,17 +336,29 @@
unit=_files, total=count[1])
return fstate[1][x]
- self.start(lookup)
+ self._lookup = lookup
- def getmfnodes():
- for f in changedfiles:
- fnodes[f] = {}
- count[:] = [0, len(mfs)]
- return prune(mf, mfs)
- def getfiles():
- mfs.clear()
- return changedfiles
- def getfilenodes(fname, filerevlog):
+ count[:] = [0, len(clnodes)]
+ for chunk in self.group(clnodes, cl, reorder=reorder):
+ yield chunk
+ progress(_bundling, None)
+
+ for f in changedfiles:
+ fnodes[f] = {}
+ count[:] = [0, len(mfs)]
+ mfnodes = prune(mf, mfs)
+ for chunk in self.group(mfnodes, mf, reorder=reorder):
+ yield chunk
+ progress(_bundling, None)
+
+ mfs.clear()
+ count[:] = [0, len(changedfiles)]
+ for fname in sorted(changedfiles):
+ filerevlog = repo.file(fname)
+ if not len(filerevlog):
+ raise util.Abort(_("empty or missing revlog for %s")
+ % fname)
+
if fastpathlinkrev:
ln, llr = filerevlog.node, filerevlog.linkrev
def genfilenodes():
@@ -359,30 +369,11 @@
fnodes[fname] = dict(genfilenodes())
fstate[0] = fname
fstate[1] = fnodes.pop(fname, {})
- return prune(filerevlog, fstate[1])
-
-
- count[:] = [0, len(clnodes)]
- for chunk in self.group(clnodes, cl, reorder=reorder):
- yield chunk
- progress(_bundling, None)
-
- for chunk in self.group(getmfnodes(), mf, reorder=reorder):
- yield chunk
- progress(_bundling, None)
-
- changedfiles = getfiles()
- count[:] = [0, len(changedfiles)]
- for fname in sorted(changedfiles):
- filerevlog = repo.file(fname)
- if not len(filerevlog):
- raise util.Abort(_("empty or missing revlog for %s")
- % fname)
- nodelist = getfilenodes(fname, filerevlog)
- if nodelist:
+ filenodes = prune(filerevlog, fstate[1])
+ if filenodes:
count[0] += 1
yield self.fileheader(fname)
- for chunk in self.group(nodelist, filerevlog, reorder):
+ for chunk in self.group(filenodes, filerevlog, reorder):
yield chunk
yield self.close()
progress(_bundling, None)
diff -r da249fd44d65 -r 45c57eaf3b41 tests/test-contrib.t
--- a/tests/test-contrib.t Sun Feb 10 16:03:20 2013 +0100
+++ b/tests/test-contrib.t Sun Feb 10 16:23:10 2013 +0100
@@ -103,34 +103,6 @@
no changes found
[1]
-
-#if hardlink
-
-Test shrink-revlog:
- $ cd repo-a
- $ hg --config extensions.shrink="$CONTRIBDIR/shrink-revlog.py" shrink
- shrinking $TESTTMP/repo-a/.hg/store/00manifest.i (glob)
- reading revs
- sorting revs
- writing revs
- old file size: 324 bytes ( 0.0 MiB)
- new file size: 324 bytes ( 0.0 MiB)
- shrinkage: 0.0% (1.0x)
- note: old revlog saved in:
- $TESTTMP/repo-a/.hg/store/00manifest.i.old (glob)
- $TESTTMP/repo-a/.hg/store/00manifest.d.old (glob)
- (You can delete those files when you are satisfied that your
- repository is still sane. Running 'hg verify' is strongly recommended.)
- $ hg verify
- checking changesets
- checking manifests
- crosschecking files in changesets and manifests
- checking files
- 1 files, 3 changesets, 3 total revisions
- $ cd ..
-
-#endif
-
Test simplemerge command:
$ cp "$CONTRIBDIR/simplemerge" .
More information about the Mercurial-devel
mailing list