[PATCH 3 of 5 V2] ancestor: add a class to generate ancestors lazily
Siddharth Agarwal
sid0 at fb.com
Fri Dec 14 18:13:53 CST 2012
# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1355528919 28800
# Node ID a00028bfa9eeb2e39b2256e25792695463aa22d8
# Parent 35eb8d05863f3907b5bfd5f023a4746f91ef97d8
ancestor: add a class to generate ancestors lazily
In a linear repository with over 400,000 commits, hg perfancestorset takes
0.80 seconds no matter how far behind we're looking. With this patch, hg
perfancestorset -- X takes:
Rev X Time
-1 0.00s
-4000 0.01s
-20000 0.04s
-80000 0.17s
-200000 0.43s
-300000 0.69s
0 0.88s
Thus, for revisions close to tip, we're up to several orders of magnitude
faster. At 0 we're around 10% slower.
diff -r 35eb8d05863f -r a00028bfa9ee contrib/perf.py
--- a/contrib/perf.py Fri Dec 14 10:23:18 2012 -0800
+++ b/contrib/perf.py Fri Dec 14 15:48:39 2012 -0800
@@ -1,7 +1,7 @@
# perf.py - performance test routines
'''helper extension to measure performance'''
-from mercurial import cmdutil, scmutil, util, match, commands
+from mercurial import cmdutil, scmutil, util, match, commands, ancestor
import time, os, sys
def timer(func, title=None):
@@ -82,7 +82,7 @@
revs = repo.revs(revset)
heads = repo.changelog.headrevs()
def d():
- s = set(repo.changelog.ancestors(heads))
+ s = ancestor.lazyancestorset(repo, heads, False)
for rev in revs:
rev in s
timer(d)
diff -r 35eb8d05863f -r a00028bfa9ee mercurial/ancestor.py
--- a/mercurial/ancestor.py Fri Dec 14 10:23:18 2012 -0800
+++ b/mercurial/ancestor.py Fri Dec 14 15:48:39 2012 -0800
@@ -316,3 +316,49 @@
missing.reverse()
return missing
+
+class lazyancestorset(object):
+ def __init__(self, repo, revs, increvs, stoprev=0):
+ """Create a new set-like object generating ancestors for the given
+ revs.
+
+ This is computed lazily starting from tip and working in descending rev
+ order. The set only supports the membership operation.
+
+ revs should be an iterable. pfunc must return a list of parent revs for
+ a given rev. increvs is a boolean that indicates whether revs should be
+ included in the set. Revs lower than stoprev will not be generated."""
+ # Python's heap is a min-heap. Multiply all values by -1 to convert it
+ # into a max-heap.
+ self._parentrevs = repo.changelog.parentrevs
+ self._visit = [-rev for rev in revs]
+ heapq.heapify(self._visit)
+ if increvs:
+ self._seen = set(revs)
+ else:
+ self._seen = set()
+ self._stoprev = stoprev
+
+ def __contains__(self, target):
+ seen = self._seen
+ if target in seen:
+ return True
+
+ parentrevs = self._parentrevs
+ visit = self._visit
+ stoprev = self._stoprev
+ heappop = heapq.heappop
+ heappush = heapq.heappush
+
+ targetseen = False
+
+ while visit and -visit[0] >= target and not targetseen:
+ for parent in parentrevs(-heappop(visit)):
+ if parent in seen or parent < stoprev:
+ continue
+ heappush(visit, -parent)
+ seen.add(parent)
+ if parent == target:
+ targetseen = True
+
+ return targetseen
diff -r 35eb8d05863f -r a00028bfa9ee tests/test-ancestor.py
--- a/tests/test-ancestor.py Fri Dec 14 10:23:18 2012 -0800
+++ b/tests/test-ancestor.py Fri Dec 14 15:48:39 2012 -0800
@@ -34,6 +34,10 @@
13: [8]}
pfunc = graph.get
+class mockrepo(object):
+ class changelog(object):
+ parentrevs = graph.get
+
def runmissingancestors(revs, bases):
print "%% ancestors of %s and not of %s" % (revs, bases)
print ancestor.missingancestors(revs, bases, pfunc)
@@ -70,5 +74,32 @@
runmissingancestors([10, 11, 12], [13])
runmissingancestors([13], [10, 11, 12])
+def genlazyancestorset(revs, increvs, stoprev=0):
+ print "%% lazy ancestor set for %s, including revs = %s" % (revs, increvs)
+ return ancestor.lazyancestorset(mockrepo, revs, increvs, stoprev=stoprev)
+
+def isinlazyset(s, l):
+ print [n for n in l if n in s]
+
+def test_lazyancestorset():
+ # Empty revs
+ s = genlazyancestorset([], False)
+ isinlazyset(s, [3, 0, -1])
+
+ # Standard example
+ s = genlazyancestorset([11, 13], False)
+ isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+ # Including revs
+ s = genlazyancestorset([11, 13], True)
+ isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+ # Test with stoprev
+ s = genlazyancestorset([11, 13], False, stoprev=6)
+ isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+ s = genlazyancestorset([11, 13], True, stoprev=6)
+ isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
if __name__ == '__main__':
test_missingancestors()
+ test_lazyancestorset()
diff -r 35eb8d05863f -r a00028bfa9ee tests/test-ancestor.py.out
--- a/tests/test-ancestor.py.out Fri Dec 14 10:23:18 2012 -0800
+++ b/tests/test-ancestor.py.out Fri Dec 14 15:48:39 2012 -0800
@@ -34,3 +34,13 @@
[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
% ancestors of [13] and not of [10, 11, 12]
[8, 13]
+% lazy ancestor set for [], including revs = False
+[]
+% lazy ancestor set for [11, 13], including revs = False
+[7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], including revs = True
+[11, 13, 7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], including revs = False
+[7, 8]
+% lazy ancestor set for [11, 13], including revs = True
+[11, 13, 7, 8]
More information about the Mercurial-devel
mailing list