[PATCH v4] strip: incrementally update the branchheads cache after a strip

Joshua Redstone joshua.redstone at fb.com
Wed May 23 16:15:43 CDT 2012


# HG changeset patch
# User Joshua Redstone <joshua.redstone at fb.com>
# Date 1337370347 25200
# Node ID cb504dab484a34f21656fc594f0a58f65a653a23
# Parent  d0b9ebba41e9a1733294d5fa1b497ada5eda93c8
strip: incrementally update the branchheads cache after a strip

This function augments strip to incrementally update the branchheads cache
rather than recompute it from scratch.  This speeds up the performance of strip
and rebase on repos with long history.  The performance optimization only
happens if the revisions stripped are all on the same branch and the parents of
the stripped revisions are also on that same branch.

This also fixes a small performance bug in localrepo._updatebranchcache.  That
function would iterate over new candidate heads in order from oldest to newest,
in contrast to the comment that observes that iterating from newest to oldest
results in fewer pases over the set of reachable nodes.

This adds a few test cases, particularly one that reproduces the extra heads
that mpm observed.

diff -r d0b9ebba41e9 -r cb504dab484a hgext/mq.py
--- a/hgext/mq.py	Sun May 20 14:40:36 2012 -0500
+++ b/hgext/mq.py	Fri May 18 12:45:47 2012 -0700
@@ -3449,7 +3449,7 @@
             if start < qbase:
                 # update the cache (excluding the patches) and save it
                 ctxgen = (self[r] for r in xrange(lrev + 1, qbase))
-                self._updatebranchcache(partial, ctxgen)
+                self._updatebranchcache(partial, ctxgen, ctxisnew=True)
                 self._writebranchcache(partial, cl.node(qbase - 1), qbase - 1)
                 start = qbase
             # if start = qbase, the cache is as updated as it should be.
@@ -3458,7 +3458,7 @@
 
             # update the cache up to the tip
             ctxgen = (self[r] for r in xrange(start, len(cl)))
-            self._updatebranchcache(partial, ctxgen)
+            self._updatebranchcache(partial, ctxgen, ctxisnew=True)
 
             return partial
 
diff -r d0b9ebba41e9 -r cb504dab484a mercurial/discovery.py
--- a/mercurial/discovery.py	Sun May 20 14:40:36 2012 -0500
+++ b/mercurial/discovery.py	Fri May 18 12:45:47 2012 -0700
@@ -201,7 +201,7 @@
         # 4. Update newmap with outgoing changes.
         # This will possibly add new heads and remove existing ones.
         ctxgen = (repo[n] for n in outgoing.missing)
-        repo._updatebranchcache(newmap, ctxgen)
+        repo._updatebranchcache(newmap, ctxgen, ctxisnew=True)
 
     else:
         # 1-4b. old servers: Check for new topological heads.
diff -r d0b9ebba41e9 -r cb504dab484a mercurial/localrepo.py
--- a/mercurial/localrepo.py	Sun May 20 14:40:36 2012 -0500
+++ b/mercurial/localrepo.py	Fri May 18 12:45:47 2012 -0700
@@ -480,7 +480,7 @@
         tiprev = len(self) - 1
         if lrev != tiprev:
             ctxgen = (self[r] for r in xrange(lrev + 1, tiprev + 1))
-            self._updatebranchcache(partial, ctxgen)
+            self._updatebranchcache(partial, ctxgen, ctxisnew=True)
             self._writebranchcache(partial, self.changelog.tip(), tiprev)
 
         return partial
@@ -550,6 +550,9 @@
                     continue
                 node, label = l.split(" ", 1)
                 label = encoding.tolocal(label.strip())
+                if not node in self:
+                    raise ValueError('invalidating branch cache because node '+
+                                     '%s does not exist' % node)
                 partial.setdefault(label, []).append(bin(node))
         except KeyboardInterrupt:
             raise
@@ -570,7 +573,14 @@
         except (IOError, OSError):
             pass
 
-    def _updatebranchcache(self, partial, ctxgen):
+    def _updatebranchcache(self, partial, ctxgen, ctxisnew):
+        """Given a branchhead cache, partial, that may have extra nodes or be
+        missing heads, and a generator of nodes that are at least a superset of
+        heads missing, this function updates partial to be correct.
+
+        ctxisnew is a bool specifying whether the nodes produced by ctxgen are
+        guaranteed to not be ancestors of any existing heads.
+        """
         # collect new branch entries
         newbranches = {}
         for c in ctxgen:
@@ -581,21 +591,55 @@
         for branch, newnodes in newbranches.iteritems():
             bheads = partial.setdefault(branch, [])
             bheads.extend(newnodes)
-            if len(bheads) <= 1:
-                continue
-            bheads = sorted(bheads, key=lambda x: self[x].rev())
-            # starting from tip means fewer passes over reachable
-            while newnodes:
-                latest = newnodes.pop()
-                if latest not in bheads:
-                    continue
-                minbhnode = self[bheads[0]].node()
-                reachable = self.changelog.reachable(latest, minbhnode)
-                reachable.remove(latest)
-                if reachable:
-                    bheads = [b for b in bheads if b not in reachable]
+            # Remove duplicates - nodes that are in newnodes and are already in
+            # bheads.  This can happen if you strip a node whose parent was
+            # already a head (because they're on different branches).
+            bheads = set(bheads)
+
+            # Remove candidate heads that no longer are in the repo (e.g., as
+            # the result of a strip that just happened).  Avoid using 'bhead in
+            # self' here because that dives down into branchcache code somewhat
+            # recrusively.
+            bheads = [bhead for bhead in bheads \
+                          if self.changelog.hasnode(bhead)]
+            if len(bheads) > 1:
+                bheads = sorted(bheads, key=lambda x: self[x].rev())
+                # starting from tip means fewer passes over reachable.
+                #
+                # If we know the new candidates are not ancestors of existing
+                # heads, we don't have to examine ancestors of existing heads
+                if ctxisnew:
+                    iternodes = sorted(newnodes, key=lambda x: self[x].rev(),
+                                       reverse=True)
+                else:
+                    iternodes = sorted(bheads, key=lambda x: self[x].rev(),
+                                       reverse=True)
+
+                # This loop prunes out two kinds of heads - heads that are
+                # superceded by a head in newnodes, and newnodes that are not
+                # heads because an existing head is their descendant.
+                while iternodes:
+                    latest = iternodes.pop()
+                    if latest not in bheads:
+                        continue
+                    minbhnode = self[bheads[0]].node()
+                    reachable = self.changelog.reachable(latest, minbhnode)
+                    reachable.remove(latest)
+                    if reachable:
+                        bheads = [b for b in bheads if b not in reachable]
             partial[branch] = bheads
 
+        # There may be branches that cease to exist when the last commit in the
+        # branch was stripped.  This code filters them out.  Note that the
+        # branch that ceased to exist may not be in newbranches because
+        # newbranches is the set of candidate heads, which when you strip the
+        # last commit in a branch will be the parent branch.
+        for branch in partial.keys():
+            nodes = [head for head in partial[branch] \
+                         if self.changelog.hasnode(head)]
+            if len(nodes) < 1:
+                del partial[branch]
+
     def lookup(self, key):
         return self[key].node()
 
@@ -858,6 +902,9 @@
             else:
                 ui.status(_('working directory now based on '
                             'revision %d\n') % parents)
+        # TODO: if we know which new heads may result from this rollback, pass
+        # them to destroy(), which will prevent the branchhead cache from being
+        # invalidated.
         self.destroyed()
         return 0
 
@@ -1301,12 +1348,28 @@
                 tr.release()
             lock.release()
 
-    def destroyed(self):
+    def destroyed(self, newheadrevs=None):
         '''Inform the repository that nodes have been destroyed.
         Intended for use by strip and rollback, so there's a common
-        place for anything that has to be done after destroying history.'''
-        # XXX it might be nice if we could take the list of destroyed
-        # nodes, but I don't see an easy way for rollback() to do that
+        place for anything that has to be done after destroying history.
+
+        If you know the branchheadcache was uptodate before nodes were removed
+        and you also know the set of candidate new heads that may have resulted
+        from the destruction, you can set newheadrevs.  This will enable the
+        code to update the branchheads cache, rather than having future code
+        decide it's invalid and regenrating it from scratch.
+        '''
+        if newheadrevs:
+            tiprev = len(self) - 1
+            ctxgen = (self[rev] for rev in newheadrevs)
+            self._updatebranchcache(self._branchcache, ctxgen,
+                                    ctxisnew=False)
+            self._writebranchcache(self._branchcache, self.changelog.tip(),
+                                   tiprev)
+        else:
+            # No info to update the cache.  If nodes were destroyed, the cache
+            # is stale and this will be caught the next time it is read.
+            pass
 
         # Ensure the persistent tag cache is updated.  Doing it now
         # means that the tag cache only has to worry about destroyed
diff -r d0b9ebba41e9 -r cb504dab484a mercurial/repair.py
--- a/mercurial/repair.py	Sun May 20 14:40:36 2012 -0500
+++ b/mercurial/repair.py	Fri May 18 12:45:47 2012 -0700
@@ -56,6 +56,11 @@
     return s
 
 def strip(ui, repo, nodelist, backup="all", topic='backup'):
+    # It simplifies the logic around updating the branchheads cache if we only
+    # have to consider the effect of the stripped revisions and not revisions
+    # missing because the cache is out-of-date.
+    repo.updatebranchcache()
+
     cl = repo.changelog
     # TODO handle undo of merge sets
     if isinstance(nodelist, str):
@@ -63,6 +68,20 @@
     striplist = [cl.rev(node) for node in nodelist]
     striprev = min(striplist)
 
+    # Generate set of branches who will have nodes stripped.
+    stringstriplist = [str(rev) for rev in striplist]
+    striprevs = set(repo.revs("%lr::", stringstriplist,
+                                stringstriplist))
+    stripbranches = set([repo[rev].branch() for rev in striprevs])
+
+    # Set of potential new heads resulting from the strip.  The parents of any
+    # node removed could be a new head because the node to be removed could have
+    # been the only child of the parent.
+    # Save as a set to remove duplicates.
+    newheadrevs = set(repo.revs("parents(%lr::) - %lr::", stringstriplist,
+                                stringstriplist))
+    newheadbranches = set([repo[rev].branch() for rev in newheadrevs])
+
     keeppartialbundle = backup == 'strip'
 
     # Some revisions with rev > striprev may not be descendants of striprev.
@@ -169,4 +188,11 @@
                     % chgrpfile)
         raise
 
-    repo.destroyed()
+    if len(stripbranches) == 1 and len(newheadbranches) == 1 \
+            and stripbranches == newheadbranches:
+        repo.destroyed(newheadrevs)
+    else:
+        # Multiple branches involved in strip. Will allow branchcache to become
+        # invalid and later on rebuilt from scratch
+        repo.destroyed()
+
diff -r d0b9ebba41e9 -r cb504dab484a tests/test-mq-caches.t
--- a/tests/test-mq-caches.t	Sun May 20 14:40:36 2012 -0500
+++ b/tests/test-mq-caches.t	Fri May 18 12:45:47 2012 -0700
@@ -36,7 +36,8 @@
   $ hg qrefresh -m 'patch 1'
   $ show_branch_cache
   tip: 0
-  No branch cache
+  d986d5caac23a7d44a46efc0ddaf5eb9665844cf 0
+  d986d5caac23a7d44a46efc0ddaf5eb9665844cf default
 
 some regular revisions
 
@@ -75,8 +76,8 @@
   $ hg qrefresh -m 'patch 2'
   $ show_branch_cache 1
   tip: 3
-  c229711f16da3d7591f89b1b8d963b79bda22714 1
-  c229711f16da3d7591f89b1b8d963b79bda22714 bar
+  982611f6955f9c48d3365decea203217c945ef0d 2
+  982611f6955f9c48d3365decea203217c945ef0d bar
   dc25e3827021582e979f600811852e36cbe57341 foo
   branch foo: 3
   branch bar: 2
diff -r d0b9ebba41e9 -r cb504dab484a tests/test-rebase-cache.t
--- a/tests/test-rebase-cache.t	Sun May 20 14:40:36 2012 -0500
+++ b/tests/test-rebase-cache.t	Fri May 18 12:45:47 2012 -0700
@@ -2,6 +2,7 @@
   > [extensions]
   > graphlog=
   > rebase=
+  > mq=
   > 
   > [phases]
   > publish=False
@@ -262,3 +263,75 @@
   
   $ hg verify -q
 
+Stripping multiple branches in one go bypasses the fast-case code to
+update the branch cache.
+
+  $ hg strip 2
+  0 files updated, 0 files merged, 4 files removed, 0 files unresolved
+  saved backup bundle to $TESTTMP/a3/.hg/strip-backup/*-backup.hg (glob)
+
+  $ hg tglog
+  o  3: 'C' branch2
+  |
+  o  2: 'branch2' branch2
+  |
+  | @  1: 'branch1' branch1
+  |/
+  o  0: 'A'
+  
+
+  $ hg branches
+  branch2                        3:e4fdb121d036
+  branch1                        1:63379ac49655
+  default                        0:1994f17a630e (inactive)
+
+  $ hg theads
+  3: 'C' branch2
+  1: 'branch1' branch1
+  0: 'A' 
+
+Fast path branchcache code should not be invoked if branches stripped is not 
+the same as branches remaining.
+
+  $ hg init b
+  $ cd b
+
+  $ hg branch branch1
+  marked working directory as branch branch1
+  (branches are permanent and global, did you want a bookmark?)
+  $ hg ci -m 'branch1'
+
+  $ hg branch branch2
+  marked working directory as branch branch2
+  (branches are permanent and global, did you want a bookmark?)
+  $ hg ci -m 'branch2'
+
+  $ hg branch -f branch1
+  marked working directory as branch branch1
+  (branches are permanent and global, did you want a bookmark?)
+
+  $ echo a > A
+  $ hg ci -Am A
+  adding A
+  created new head
+
+  $ hg tglog
+  @  2: 'A' branch1
+  |
+  o  1: 'branch2' branch2
+  |
+  o  0: 'branch1' branch1
+  
+
+  $ hg theads
+  2: 'A' branch1
+  1: 'branch2' branch2
+
+  $ hg strip 2
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  saved backup bundle to $TESTTMP/a3/b/.hg/strip-backup/*-backup.hg (glob)
+
+  $ hg theads
+  1: 'branch2' branch2
+  0: 'branch1' branch1
+
diff -r d0b9ebba41e9 -r cb504dab484a tests/test-rebase-collapse.t
--- a/tests/test-rebase-collapse.t	Sun May 20 14:40:36 2012 -0500
+++ b/tests/test-rebase-collapse.t	Fri May 18 12:45:47 2012 -0700
@@ -2,6 +2,7 @@
   > [extensions]
   > graphlog=
   > rebase=
+  > mq=
   > 
   > [phases]
   > publish=False
@@ -260,6 +261,44 @@
   $ cd ..
 
 
+
+
+Test that branchheads cache is updated correctly when doing a strip in which
+the parent of the ancestor node to be stripped does not become a head and
+also, the parent of a node that is a child of the node stripped becomes a head
+(node 3).
+
+  $ hg clone -q -u . b b2
+  $ cd b2
+
+  $ hg heads --template="{rev}:{node} {branch}\n"
+  7:c65502d4178782309ce0574c5ae6ee9485a9bafa default
+  6:c772a8b2dc17629cec88a19d09c926c4814b12c7 default
+
+  $ cat $TESTTMP/b2/.hg/cache/branchheads
+  c65502d4178782309ce0574c5ae6ee9485a9bafa 7
+  c772a8b2dc17629cec88a19d09c926c4814b12c7 default
+  c65502d4178782309ce0574c5ae6ee9485a9bafa default
+
+  $ hg strip 4
+  saved backup bundle to $TESTTMP/b2/.hg/strip-backup/8a5212ebc852-backup.hg
+
+  $ cat $TESTTMP/b2/.hg/cache/branchheads
+  c65502d4178782309ce0574c5ae6ee9485a9bafa 4
+  2870ad076e541e714f3c2bc32826b5c6a6e5b040 default
+  c65502d4178782309ce0574c5ae6ee9485a9bafa default
+
+  $ hg heads --template="{rev}:{node} {branch}\n"
+  4:c65502d4178782309ce0574c5ae6ee9485a9bafa default
+  3:2870ad076e541e714f3c2bc32826b5c6a6e5b040 default
+
+  $ cd ..
+
+
+
+
+
+
 Create repo c:
 
   $ hg init c
@@ -630,3 +669,56 @@
   b
   b
   $ cd ..
+
+
+Test stripping a revision with another child
+
+  $ hg init f
+  $ cd f
+
+  $ echo A > A
+  $ hg ci -Am A
+  adding A
+  $ echo B > B
+  $ hg ci -Am B
+  adding B
+
+  $ hg up -q 0
+
+  $ echo C > C
+  $ hg ci -Am C
+  adding C
+  created new head
+
+  $ hg tglog
+  @  2: 'C'
+  |
+  | o  1: 'B'
+  |/
+  o  0: 'A'
+  
+
+
+  $ hg heads --template="{rev}:{node} {branch}: {desc}\n"
+  2:c5cefa58fd557f84b72b87f970135984337acbc5 default: C
+  1:27547f69f25460a52fff66ad004e58da7ad3fb56 default: B
+
+  $ hg strip 2
+  0 files updated, 0 files merged, 1 files removed, 0 files unresolved
+  saved backup bundle to $TESTTMP/f/.hg/strip-backup/*-backup.hg (glob)
+
+  $ hg tglog
+  o  1: 'B'
+  |
+  @  0: 'A'
+  
+
+
+  $ hg heads --template="{rev}:{node} {branch}: {desc}\n"
+  1:27547f69f25460a52fff66ad004e58da7ad3fb56 default: B
+
+  $ cd ..
+
+
+
+


More information about the Mercurial-devel mailing list