size of repository with many branches, vs. git
Benoit Boissinot
bboissin at gmail.com
Sun Mar 30 17:24:00 CDT 2008
On Sun, Mar 30, 2008 at 11:04 PM, Matt Mackall <mpm at selenic.com> wrote:
> Here's a quick test to see what we might stand to gain from non-linear
> deltas on your repo:
>
>
> ie we save nearly 70% of the manifest size. This is only an
> approximation, because it doesn't include insertion of full revisions at
> intervals to keep extraction time bounded.
>
>
I modified it a bit to always do a delta in the delta vs tip case and
I got (for the manifest of the kernel repo):
85000 226463816 43403672 19.17
86000 228332602 43588129 19.09
87000 230034416 43800074 19.04
88000 232135027 44029133 18.97
88247 232664773 44065644 18.94
I tried sorting the revs to be linear and do a delta vs tip:
85000 226463824 110756686 48.91
86000 228332610 111076561 48.65
87000 230034424 111751755 48.58
88000 232135035 112312502 48.38
88247 232664781 112395752 48.31
script:
#!/usr/bin/python
import sys
from mercurial import revlog
def good_sort(rl, revs):
tot = dict.fromkeys(revs)
children = {}
root = []
# build children and roots
for i in revs:
children[i] = []
parents = [p for p in rl.parentrevs(i) if p != -1]
for p in parents:
assert p in children
if len(parents) == 0:
root.append(i)
else:
for p in parents:
children[p].append(i)
#print children, visit
visit = root
ret = []
while visit:
i = visit.pop(0)
ret.append(i)
if i not in children:
# no child
break
next = []
for c in children.pop(i):
parents_with_child = [p for p in rl.parentrevs(c) if p !=
-1 and p in children]
if len(parents_with_child) == 0:
next.append(c)
visit = next + visit
assert len(ret) == len(ret)
return ret
r = revlog.revlog(open, sys.argv[1])
frags = []
total = 0
optimized = 0
map = good_sort(r, range(r.count()))
for i in range(r.count()):
n1, p1 = i, i-1
n2, p2 = map[i], map[i-1]
total += sum([len(x) for x in revlog.compress(r.revdiff(p1, n1))])
optimized += sum([len(x) for x in revlog.compress(r.revdiff(p2, n2))])
if not i % 1000:
print i, total, optimized, "%5.2f" % (100.0 * optimized/total)
print "%10d" % i, total, optimized, "%5.2f" % (100.0 * optimized/total)
More information about the Mercurial
mailing list