[PATCH 3 of 5 V2] ancestor: add a class to generate ancestors lazily

Siddharth Agarwal sid0 at fb.com
Fri Dec 14 18:13:53 CST 2012


# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1355528919 28800
# Node ID a00028bfa9eeb2e39b2256e25792695463aa22d8
# Parent  35eb8d05863f3907b5bfd5f023a4746f91ef97d8
ancestor: add a class to generate ancestors lazily

In a linear repository with over 400,000 commits, hg perfancestorset takes
0.80 seconds no matter how far behind we're looking. With this patch, hg
perfancestorset -- X takes:

    Rev X       Time
       -1      0.00s
    -4000      0.01s
   -20000      0.04s
   -80000      0.17s
  -200000      0.43s
  -300000      0.69s
        0      0.88s

Thus, for revisions close to tip, we're up to several orders of magnitude
faster. At 0 we're around 10% slower.

diff -r 35eb8d05863f -r a00028bfa9ee contrib/perf.py
--- a/contrib/perf.py	Fri Dec 14 10:23:18 2012 -0800
+++ b/contrib/perf.py	Fri Dec 14 15:48:39 2012 -0800
@@ -1,7 +1,7 @@
 # perf.py - performance test routines
 '''helper extension to measure performance'''
 
-from mercurial import cmdutil, scmutil, util, match, commands
+from mercurial import cmdutil, scmutil, util, match, commands, ancestor
 import time, os, sys
 
 def timer(func, title=None):
@@ -82,7 +82,7 @@
     revs = repo.revs(revset)
     heads = repo.changelog.headrevs()
     def d():
-        s = set(repo.changelog.ancestors(heads))
+        s = ancestor.lazyancestorset(repo, heads, False)
         for rev in revs:
             rev in s
     timer(d)
diff -r 35eb8d05863f -r a00028bfa9ee mercurial/ancestor.py
--- a/mercurial/ancestor.py	Fri Dec 14 10:23:18 2012 -0800
+++ b/mercurial/ancestor.py	Fri Dec 14 15:48:39 2012 -0800
@@ -316,3 +316,49 @@
 
     missing.reverse()
     return missing
+
+class lazyancestorset(object):
+    def __init__(self, repo, revs, increvs, stoprev=0):
+        """Create a new set-like object generating ancestors for the given
+        revs.
+
+        This is computed lazily starting from tip and working in descending rev
+        order. The set only supports the membership operation.
+
+        revs should be an iterable. pfunc must return a list of parent revs for
+        a given rev. increvs is a boolean that indicates whether revs should be
+        included in the set. Revs lower than stoprev will not be generated."""
+        # Python's heap is a min-heap. Multiply all values by -1 to convert it
+        # into a max-heap.
+        self._parentrevs = repo.changelog.parentrevs
+        self._visit = [-rev for rev in revs]
+        heapq.heapify(self._visit)
+        if increvs:
+            self._seen = set(revs)
+        else:
+            self._seen = set()
+        self._stoprev = stoprev
+
+    def __contains__(self, target):
+        seen = self._seen
+        if target in seen:
+            return True
+
+        parentrevs = self._parentrevs
+        visit = self._visit
+        stoprev = self._stoprev
+        heappop = heapq.heappop
+        heappush = heapq.heappush
+
+        targetseen = False
+
+        while visit and -visit[0] >= target and not targetseen:
+            for parent in parentrevs(-heappop(visit)):
+                if parent in seen or parent < stoprev:
+                    continue
+                heappush(visit, -parent)
+                seen.add(parent)
+                if parent == target:
+                    targetseen = True
+
+        return targetseen
diff -r 35eb8d05863f -r a00028bfa9ee tests/test-ancestor.py
--- a/tests/test-ancestor.py	Fri Dec 14 10:23:18 2012 -0800
+++ b/tests/test-ancestor.py	Fri Dec 14 15:48:39 2012 -0800
@@ -34,6 +34,10 @@
          13: [8]}
 pfunc = graph.get
 
+class mockrepo(object):
+    class changelog(object):
+        parentrevs = graph.get
+
 def runmissingancestors(revs, bases):
     print "%% ancestors of %s and not of %s" % (revs, bases)
     print ancestor.missingancestors(revs, bases, pfunc)
@@ -70,5 +74,32 @@
     runmissingancestors([10, 11, 12], [13])
     runmissingancestors([13], [10, 11, 12])
 
+def genlazyancestorset(revs, increvs, stoprev=0):
+    print "%% lazy ancestor set for %s, including revs = %s" % (revs, increvs)
+    return ancestor.lazyancestorset(mockrepo, revs, increvs, stoprev=stoprev)
+
+def isinlazyset(s, l):
+    print [n for n in l if n in s]
+
+def test_lazyancestorset():
+    # Empty revs
+    s = genlazyancestorset([], False)
+    isinlazyset(s, [3, 0, -1])
+
+    # Standard example
+    s = genlazyancestorset([11, 13], False)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+    # Including revs
+    s = genlazyancestorset([11, 13], True)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+    # Test with stoprev
+    s = genlazyancestorset([11, 13], False, stoprev=6)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+    s = genlazyancestorset([11, 13], True, stoprev=6)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
 if __name__ == '__main__':
     test_missingancestors()
+    test_lazyancestorset()
diff -r 35eb8d05863f -r a00028bfa9ee tests/test-ancestor.py.out
--- a/tests/test-ancestor.py.out	Fri Dec 14 10:23:18 2012 -0800
+++ b/tests/test-ancestor.py.out	Fri Dec 14 15:48:39 2012 -0800
@@ -34,3 +34,13 @@
 [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
 % ancestors of [13] and not of [10, 11, 12]
 [8, 13]
+% lazy ancestor set for [], including revs = False
+[]
+% lazy ancestor set for [11, 13], including revs = False
+[7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], including revs = True
+[11, 13, 7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], including revs = False
+[7, 8]
+% lazy ancestor set for [11, 13], including revs = True
+[11, 13, 7, 8]


More information about the Mercurial-devel mailing list