[PATCH 2 of 4 V3 part 2] ancestor: add lazy membership testing to lazyancestors

Siddharth Agarwal sid0 at fb.com
Mon Dec 17 23:30:11 CST 2012


# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1355808393 28800
# Node ID 379ba541bc8c1c1d0b014b073da1767a6f2fa17e
# Parent  492535c0a3b45125888c7d20d402857cecaeae00
ancestor: add lazy membership testing to lazyancestors

In a linear repository with over 400,000 commits, without this patch, hg
perfancestorset takes 0.80 seconds no matter how far behind we're looking.
With this patch, hg perfancestorset -- X takes:

    Rev X       Time
       -1      0.00s
    -4000      0.01s
   -20000      0.04s
   -80000      0.17s
  -200000      0.43s
  -300000      0.69s
        0      0.88s

Thus, for revisions close to tip, we're up to several orders of magnitude
faster. At 0 we're around 10% slower.

diff -r 492535c0a3b4 -r 379ba541bc8c contrib/perf.py
--- a/contrib/perf.py	Mon Dec 17 20:42:24 2012 -0800
+++ b/contrib/perf.py	Mon Dec 17 21:26:33 2012 -0800
@@ -82,7 +82,7 @@
     revs = repo.revs(revset)
     heads = repo.changelog.headrevs()
     def d():
-        s = set(repo.changelog.ancestors(heads))
+        s = repo.changelog.ancestors(heads)
         for rev in revs:
             rev in s
     timer(d)
diff -r 492535c0a3b4 -r 379ba541bc8c mercurial/ancestor.py
--- a/mercurial/ancestor.py	Mon Dec 17 20:42:24 2012 -0800
+++ b/mercurial/ancestor.py	Mon Dec 17 21:26:33 2012 -0800
@@ -322,8 +322,8 @@
         """Create a new object generating ancestors for the given revs. Does
         not generate revs lower than stoprev.
 
-        This is computed lazily starting from revs. The object only supports
-        iteration.
+        This is computed lazily starting from revs. The object supports
+        iteration and membership.
 
         cl should be a changelog and revs should be an iterable. inclusive is
         a boolean that indicates whether revs should be included. Revs lower
@@ -335,6 +335,19 @@
         self._stoprev = stoprev
         self._inclusive = inclusive
 
+        # Initialize data structures for __contains__.
+        # For __contains__, we use a heap rather than a deque because
+        # (a) it minimizes the number of parentrevs calls made
+        # (b) it makes the loop termination condition obvious
+        # Python's heap is a min-heap. Multiply all values by -1 to convert it
+        # into a max-heap.
+        self._containsvisit = [-rev for rev in revs]
+        heapq.heapify(self._containsvisit)
+        if inclusive:
+            self._containsseen = set(revs)
+        else:
+            self._containsseen = set()
+
     def __iter__(self):
         """Generate the ancestors of _initrevs in reverse topological order.
 
@@ -366,3 +379,33 @@
                     visit.append(parent)
                     seen.add(parent)
                     yield parent
+
+    def __contains__(self, target):
+        """Test whether target is an ancestor of self._initrevs."""
+        # Trying to do both __iter__ and __contains__ using the same visit
+        # heap and seen set is complex enough that it slows down both. Keep
+        # them separate.
+        seen = self._containsseen
+        if target in seen:
+            return True
+
+        parentrevs = self._parentrevs
+        visit = self._containsvisit
+        stoprev = self._stoprev
+        heappop = heapq.heappop
+        heappush = heapq.heappush
+
+        targetseen = False
+
+        while visit and -visit[0] > target and not targetseen:
+            for parent in parentrevs(-heappop(visit)):
+                if parent < stoprev or parent in seen:
+                    continue
+                # We need to make sure we push all parents into the heap so
+                # that we leave it in a consistent state for future calls.
+                heappush(visit, -parent)
+                seen.add(parent)
+                if parent == target:
+                    targetseen = True
+
+        return targetseen
diff -r 492535c0a3b4 -r 379ba541bc8c tests/test-ancestor.py
--- a/tests/test-ancestor.py	Mon Dec 17 20:42:24 2012 -0800
+++ b/tests/test-ancestor.py	Mon Dec 17 21:26:33 2012 -0800
@@ -34,6 +34,9 @@
          13: [8]}
 pfunc = graph.get
 
+class mockchangelog(object):
+    parentrevs = graph.get
+
 def runmissingancestors(revs, bases):
     print "%% ancestors of %s and not of %s" % (revs, bases)
     print ancestor.missingancestors(revs, bases, pfunc)
@@ -70,5 +73,33 @@
     runmissingancestors([10, 11, 12], [13])
     runmissingancestors([13], [10, 11, 12])
 
+def genlazyancestors(revs, stoprev=0, inclusive=False):
+    print "%% lazy ancestor set for %s, inclusive = %s" % (revs, inclusive)
+    return ancestor.lazyancestors(mockchangelog, revs, stoprev=stoprev,
+                                  inclusive=inclusive)
+
+def isinlazyset(s, l):
+    print [n for n in l if n in s]
+
+def test_lazyancestors():
+    # Empty revs
+    s = genlazyancestors([])
+    isinlazyset(s, [3, 0, -1])
+
+    # Standard example
+    s = genlazyancestors([11, 13])
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+    # Including revs
+    s = genlazyancestors([11, 13], inclusive=True)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
+    # Test with stoprev
+    s = genlazyancestors([11, 13], stoprev=6)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+    s = genlazyancestors([11, 13], stoprev=6, inclusive=True)
+    isinlazyset(s, [11, 13, 7, 9, 8, 3, 6, 4, 1, -1, 0])
+
 if __name__ == '__main__':
     test_missingancestors()
+    test_lazyancestors()
diff -r 492535c0a3b4 -r 379ba541bc8c tests/test-ancestor.py.out
--- a/tests/test-ancestor.py.out	Mon Dec 17 20:42:24 2012 -0800
+++ b/tests/test-ancestor.py.out	Mon Dec 17 21:26:33 2012 -0800
@@ -34,3 +34,13 @@
 [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
 % ancestors of [13] and not of [10, 11, 12]
 [8, 13]
+% lazy ancestor set for [], inclusive = False
+[]
+% lazy ancestor set for [11, 13], inclusive = False
+[7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], inclusive = True
+[11, 13, 7, 8, 3, 4, 1, 0]
+% lazy ancestor set for [11, 13], inclusive = False
+[7, 8]
+% lazy ancestor set for [11, 13], inclusive = True
+[11, 13, 7, 8]


More information about the Mercurial-devel mailing list