Manifest compression

Matt Mackall mpm at selenic.com
Thu Aug 8 20:42:16 CDT 2013


On Thu, 2013-08-08 at 23:45 +0000, Wojciech Lopata wrote:
> Hello,
> 
> My name is Wojciech Lopata, I'm an intern at Facebook. I'm going to
> spend next weeks implementing manifest compression, basing on ideas
> described in wiki:
> http://mercurial.selenic.com/wiki/ImprovingManifestCompressionPlan.
> Feel free to email me with any ideas and suggestions.

You'll probably be interested in this quick proof of concept I wrote in
February:

---8<----

import sys, os
from mercurial import revlog, transaction, node, util, scmutil, mdiff
import re

fn = sys.argv[1]

opener = scmutil.opener(os.getcwd(), audit=False)
o2 = scmutil.opener(os.getcwd(), audit=False)
o2.options = {}
#o2.options['generaldelta'] = 1
r1 = revlog.revlog(opener, fn)
r2 = revlog.revlog(opener, fn + ".packman")

class faketrans(object):
    def add(self, file, offset, data=None): pass
    def find(self, file): return 0, 0, 0
    def replace(self, file, offset, data): pass

lasttext = ''
for rev in xrange(5000, 10000):
    d = r1.revision(rev)
    nd = ''
    nh = ''
    pf = ''
    for l in d.splitlines():
        f,h = l.split('\0')
        fl = ''
        if len(h) > 40:
            fl, h = h[-1], h[:-1]
        h2 = node.bin(h)
        pos = 0
        m = min(len(pf), len(f))
        while pos < m and pf[pos] == f[pos]:
            pos += 1
        nl = h2 + fl + "\n" + chr(pos) + f[pos:] + "\n"
        #nl = h2 + fl + "\n" + f + "\n"
        nd += nl
        pf = f

    delta = mdiff.textdiff(lasttext, nd)
    delta = re.sub(r'\0(...)\0(...)\0\0\0(.)', '\1\3', delta)
    delta = r1.compress(delta)
    delta = delta[0] + delta[1]
    lasttext = nd

    prevs = r1.parentrevs(rev)
    print rev, r1.length(rev), len(delta)
    if prevs[0] != rev -1:
        print ' ', prevs[0], prevs[1]


-- 
Mathematics is the supreme nostalgia of our time.




More information about the Mercurial-devel mailing list