[PATCH 1 of 2] ancestor: a new algorithm for ancestors of a not in b that is faster when a and b are close
Siddharth Agarwal
sid0 at fb.com
Tue Nov 20 21:15:26 CST 2012
# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1353467541 28800
# Node ID 25bdb24973e3b43d84b6d4c314f035f303ada192
# Parent 536f5df951bece49b760392fd543e8082489b7d0
ancestor: a new algorithm for ancestors of a not in b that is faster when a and b are close
One of the major reasons rebase is slow in large repositories is the computation
of the detach set: the set of ancestors of the changesets to rebase not in the
destination parent. This is currently done via a revset that does two walks all
the way to the root of the DAG. Instead of doing that, to find ancestors of a
not in b we walk up the tree in reverse revision number order, maintaining sets
of nodes visited from a, b or both.
For the common case where the nodes are close both topologically and in
revision number (relative to repository size), this has been found to speed
up rebase by around 15-20%. When the nodes are farther apart and the DAG is
highly branching, it is harder to say which would win.
Here's how long computing the detach set takes in a linear repository with over
400000 changesets, rebasing near tip:
Rebasing across 4 changesets
Revset method: 2.2s
New algorithm: 0.00015s
Rebasing across 250 changesets
Revset method: 2.2s
New algorithm: 0.00069s
Rebasing across 10000 changesets
Revset method: 2.4s
New algorithm: 0.019s
diff -r 536f5df951be -r 25bdb24973e3 mercurial/ancestor.py
--- a/mercurial/ancestor.py Tue Nov 13 19:24:45 2012 -0800
+++ b/mercurial/ancestor.py Tue Nov 20 19:12:21 2012 -0800
@@ -130,6 +130,122 @@
return gca
return deepest(gca)
+def ancestorsofanotb(a, b, pfunc):
+ """
+ Return all the ancestors of a that are not ancestors of b, possibly
+ including a itself. Equivalent to the revset (::a - ::b).
+
+ pfunc must return a list of parent vertices for a given vertex.
+
+ The graph is a dict of child->parent adjacency lists
+ >>> graph = {0: [-1], 1: [0], 2: [1], 3: [1], 4: [2], 5: [4], 6: [4],
+ ... 7: [4], 8: [-1], 9: [6, 7], 10: [5], 11: [3, 7], 12: [9],
+ ... 13: [8]}
+ >>> pfunc = graph.get
+
+ Trivial case: a == b
+ >>> ancestorsofanotb(0, 0, pfunc)
+ []
+ >>> ancestorsofanotb(4, 4, pfunc)
+ []
+
+ With nullrev
+ >>> ancestorsofanotb(-1, 12, pfunc)
+ []
+ >>> ancestorsofanotb(12, -1, pfunc)
+ [12, 9, 7, 6, 4, 2, 1, 0]
+
+ 9 is a parent of 12
+ >>> ancestorsofanotb(12, 9, pfunc)
+ [12]
+ >>> ancestorsofanotb(9, 12, pfunc)
+ []
+
+ 7 is an ancestor of 12. 6 is an ancestor of 12 but not of 7
+ >>> ancestorsofanotb(12, 7, pfunc)
+ [12, 9, 6]
+ >>> ancestorsofanotb(7, 12, pfunc)
+ []
+
+ More complex cases
+ >>> ancestorsofanotb(10, 11, pfunc)
+ [10, 5]
+ >>> ancestorsofanotb(10, 12, pfunc)
+ [10, 5]
+ >>> ancestorsofanotb(10, 13, pfunc)
+ [10, 5, 4, 2, 1, 0]
+ >>> ancestorsofanotb(11, 10, pfunc)
+ [11, 7, 3]
+ >>> ancestorsofanotb(11, 12, pfunc)
+ [11, 3]
+ >>> ancestorsofanotb(11, 13, pfunc)
+ [11, 7, 4, 3, 2, 1, 0]
+ >>> ancestorsofanotb(12, 10, pfunc)
+ [12, 9, 7, 6]
+ >>> ancestorsofanotb(12, 11, pfunc)
+ [12, 9, 6]
+ >>> ancestorsofanotb(12, 13, pfunc)
+ [12, 9, 7, 6, 4, 2, 1, 0]
+ >>> ancestorsofanotb(13, 10, pfunc)
+ [13, 8]
+ >>> ancestorsofanotb(13, 11, pfunc)
+ [13, 8]
+ >>> ancestorsofanotb(13, 12, pfunc)
+ [13, 8]
+ """
+ if a == b:
+ return []
+
+ avisit = set([a])
+ bvisit = set([b])
+ both = set()
+ next = max(a, b)
+ anotb = []
+ while next > nullrev and (avisit or bvisit):
+ curr = next
+ next -= 1
+
+ if curr in both:
+ both.remove(curr)
+ # curr's parents might have made it into avisit or bvisit through
+ # another path
+ for p in pfunc(curr):
+ avisit.discard(p)
+ bvisit.discard(p)
+ both.add(p)
+ continue
+
+ # curr will never be in both avisit and bvisit, since if it were it'd
+ # have been pushed to both
+ if curr in avisit:
+ anotb.append(curr)
+ thisvisit = avisit
+ othervisit = bvisit
+ elif curr in bvisit:
+ thisvisit = bvisit
+ othervisit = avisit
+ else:
+ # not an ancestor of a or b: ignore
+ continue
+
+ thisvisit.remove(curr)
+ for p in pfunc(curr):
+ if p == nullrev:
+ continue
+ if p in othervisit or p in both:
+ # p is implicitly in thisvisit. This means p is or should be
+ # in both
+ avisit.discard(p)
+ bvisit.discard(p)
+ both.add(p)
+ continue
+
+ # visit later
+ thisvisit.add(p)
+
+ return anotb
+
+
def genericancestor(a, b, pfunc):
"""
Returns the common ancestor of a and b that is furthest from a
diff -r 536f5df951be -r 25bdb24973e3 tests/test-doctest.py
--- a/tests/test-doctest.py Tue Nov 13 19:24:45 2012 -0800
+++ b/tests/test-doctest.py Tue Nov 20 19:12:21 2012 -0800
@@ -42,3 +42,6 @@
import mercurial.templatefilters
doctest.testmod(mercurial.templatefilters)
+
+import mercurial.ancestor
+doctest.testmod(mercurial.ancestor)
More information about the Mercurial-devel
mailing list