[PATCH 2 of 3 V3] strip: add faster revlog strip computation

Durham Goode durham at fb.com
Sat Nov 16 14:29:45 CST 2013


# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1384216969 28800
#      Mon Nov 11 16:42:49 2013 -0800
# Node ID 91e01e4b76015511c29b4dbbcc2a73eeba2440b2
# Parent  e243df7f91498e4423c5854c3f38322854304760
strip: add faster revlog strip computation

The previous revlog strip computation would walk every rev in the revlog, from
the bottom to the top.  Since we're usually stripping only the top few revs of
the revlog, this was needlessly expensive on large repos.

The new algorithm walks the exact number of revs that will be stripped, thus
making the operation not dependent on the number of revs in the repo.

This makes amend on a large repo go from 8.7 seconds to 6 seconds.

diff --git a/mercurial/repair.py b/mercurial/repair.py
--- a/mercurial/repair.py
+++ b/mercurial/repair.py
@@ -38,16 +38,8 @@
     """return the changesets which will be broken by the truncation"""
     s = set()
     def collectone(revlog):
-        linkgen = (revlog.linkrev(i) for i in revlog)
-        # find the truncation point of the revlog
-        for lrev in linkgen:
-            if lrev >= striprev:
-                break
-        # see if any revision after this point has a linkrev
-        # less than striprev (those will be broken by strip)
-        for lrev in linkgen:
-            if lrev < striprev:
-                s.add(lrev)
+        _, brokenset = revlog.getstrippoint(striprev)
+        s.update([revlog.linkrev(r) for r in brokenset])
 
     collectone(repo.manifest)
     for fname in files:
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1285,6 +1285,46 @@
 
         return content
 
+    def getstrippoint(self, minlink):
+        """find the minimum rev that must be stripped to strip the linkrev
+
+        Returns a tuple containing the minimum rev and a set of all revs that
+        have linkrevs that will be broken by this strip.
+        """
+        brokenrevs = set()
+        strippoint = len(self)
+
+        heads = {}
+        futurelargelinkrevs = set()
+        for head in self.headrevs():
+            headlinkrev = self.linkrev(head)
+            heads[head] = headlinkrev
+            if headlinkrev >= minlink:
+                futurelargelinkrevs.add(headlinkrev)
+
+        # This algorithm involves walking down the rev graph, starting at the
+        # heads. Since the revs are topologically sorted according to linkrev,
+        # once all head linkrevs are below the minlink, we know there are
+        # no more revs that could have a linkrev greater than minlink.
+        # So we can stop walking.
+        while futurelargelinkrevs:
+            strippoint -= 1
+            linkrev = heads.pop(strippoint)
+
+            if linkrev < minlink:
+                brokenrevs.add(strippoint)
+            else:
+                futurelargelinkrevs.remove(linkrev)
+
+            for p in self.parentrevs(strippoint):
+                if p != nullrev:
+                    plinkrev = self.linkrev(p)
+                    heads[p] = plinkrev
+                    if plinkrev >= minlink:
+                        futurelargelinkrevs.add(plinkrev)
+
+        return strippoint, brokenrevs
+
     def strip(self, minlink, transaction):
         """truncate the revlog on the first revision with a linkrev >= minlink
 
@@ -1302,10 +1342,8 @@
         if len(self) == 0:
             return
 
-        for rev in self:
-            if self.index[rev][4] >= minlink:
-                break
-        else:
+        rev, _ = self.getstrippoint(minlink)
+        if rev == len(self):
             return
 
         # first truncate the files on disk


More information about the Mercurial-devel mailing list