[PATCH] Add script to rewrite manifest to workaround lack of parent deltas

Greg Ward greg-hg at gerg.ca
Wed Aug 19 16:36:37 CDT 2009


# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1233047576 0
# Node ID d7ff31c478891a8ef2273a0d0997c99a2b05092b
# Parent  c5173a05aec8ee8e74a5bfc101e0ed7ed9f24625
Add script to rewrite manifest to workaround lack of parent deltas.

Based on a patch to rewrite-log by Benoit Boissinot that I found here:
  http://article.gmane.org/gmane.comp.version-control.mercurial.general/11908

Probably not ready to push yet: I'm just sending this now for initial
feedback: is this worth having, is the name appropriate, did I get
transactions/locking right, etc.?

diff --git a/contrib/shrink-manifest.py b/contrib/shrink-manifest.py
new file mode 100755
--- /dev/null
+++ b/contrib/shrink-manifest.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+"""\
+Reorder the manifest file in the current repository to save space.
+Specifically, this topologically sorts the revisions in the manifest so that
+revisions on the same branch are adjacent as much as possible.  This is a
+workaround for the fact that Mercurial computes deltas relative to the previous
+revision rather than relative to a parent revision.
+"""
+
+# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org> as a
+# patch to rewrite-log.  Cleaned up, refactored, documented, and renamed by Greg
+# Ward <greg at gerg.ca>.
+
+# XXX would be nice to have a way to verify the repository after shrinking,
+# e.g. by comparing "before" and "after" states of random changesets (maybe:
+# export before, shrink, export after, diff).
+
+import sys, os, tempfile
+from mercurial import ui as ui_, hg, revlog, transaction, node, util
+
+def good_sort(rl):
+    write = sys.stdout.write
+
+    children = {}
+    root = []
+    # build children and roots
+    write('reading %d revs ' % len(rl))
+    #for i in revs:
+    i = 0
+    while i < len(rl):
+        children[i] = []
+        parents = [p for p in rl.parentrevs(i) if p != -1]
+        for p in parents:
+            assert p in children
+        if len(parents) == 0:
+            root.append(i)
+        else:
+            for p in parents:
+                children[p].append(i)
+
+        if i % 1000 == 0:
+            write('.')
+        i += 1
+    write('\n')
+
+    #print children, visit
+    write('sorting ...')
+    visit = root
+    ret = []
+    while visit:
+        i = visit.pop(0)
+        ret.append(i)
+        if i not in children:
+            # this only happens if some node's p1 == p2, which can happen in the
+            # manifest in certain circumstances
+            break
+        next = []
+        for c in children.pop(i):
+            parents_with_child = [p for p in rl.parentrevs(c) if p != -1 and p in children]
+            if len(parents_with_child) == 0:
+                next.append(c)
+        visit = next + visit
+    write('\n')
+    return ret
+
+def write_revs(r1, r2, order, tr):
+    write = sys.stdout.write
+    write('writing %d revs ' % len(order))
+    count = 0
+    for rev in order:
+        n = r1.node(rev)
+        p1, p2 = r1.parents(n)
+        l = r1.linkrev(rev)
+        t = r1.revision(n)
+        n2 = r2.addrevision(t, tr, l, p1, p2)
+
+        if count % 1000 == 0:
+            write('.')
+        count += 1
+    write('\n')
+    
+def report_shrinkage(olddatafn, newdatafn):
+    oldsize = float(os.stat(olddatafn).st_size)
+    newsize = float(os.stat(newdatafn).st_size)
+    sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'
+                     % (oldsize, oldsize/1024/1024))
+    sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'
+                     % (newsize, newsize/1024/1024))
+
+    shrink_percent = (oldsize - newsize) / oldsize * 100
+    shrink_factor = oldsize / newsize
+    sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'
+                     % (shrink_percent, shrink_factor))
+
+def main():
+
+    # unbuffer stdout for nice progress output
+    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
+    write = sys.stdout.write
+
+    # Open the local repository
+    ui = ui_.ui()
+    repo = hg.repository(ui)
+
+    indexfn = repo.join('store/00manifest.i')
+    datafn = indexfn[:-2] + '.d'
+    (tmpfd, tmpindexfn) = tempfile.mkstemp(
+        dir=repo.join('store'), prefix='00manifest.', suffix='.i')
+    tmpdatafn = tmpindexfn[:-2] + '.d'
+    os.close(tmpfd)
+
+    r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
+    r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
+
+    # XXX shouldn't the journal be in .hg/store?
+    # XXX shouldn't we lock the store?
+    tr = transaction.transaction(sys.stderr.write, open, "journal")
+
+    try:
+        order = good_sort(r1)
+        write_revs(r1, r2, order, tr)
+        report_shrinkage(datafn, tmpdatafn)
+        tr.close()
+    except:
+        # abort transaction first, so we truncate the files before deleting them
+        tr.abort()
+        if os.path.exists(tmpindexfn):
+            os.unlink(tmpindexfn)
+        if os.path.exists(tmpdatafn):
+            os.unlink(tmpdatafn)
+        raise
+
+    # XXX this will crash if there is no .d file ... but if that's the case,
+    # this manifest is not big enough to be worth shrinking!
+    os.rename(indexfn, indexfn + '.old')
+    os.rename(datafn, datafn + '.old')
+    os.rename(tmpindexfn, indexfn)
+    os.rename(tmpdatafn, datafn)
+
+main()


More information about the Mercurial-devel mailing list