[PATCH 2 of 2 v5] revset: use localrepo persistent branch name caching

Mads Kiilerich mads at kiilerich.com
Thu Oct 16 09:49:34 CDT 2014


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1413470216 -7200
#      Thu Oct 16 16:36:56 2014 +0200
# Node ID afd86cc6fd978743e49547abab31a4b7dafb95d0
# Parent  efe19367c4809045dece68abded2a3297c16a7a8
revset: use localrepo persistent branch name caching

Branch name filtering in revsets was expensive. For every rev it created a
changectx and called .branch(). (Using changelog.branchinfo() would make it a
bit faster.)

Instead, use the new localrepo caching branch lookup method.

On the small hg repo:
  hg log --time -r 'branch(stable) & branch(default)'
Before:
  time: real 1.910 secs (user 1.890+0.000 sys 0.020+0.000)
After:
  time: real 1.240 secs (user 1.230+0.000 sys 0.010+0.000)
  time: real 0.120 secs (user 0.110+0.000 sys 0.000+0.000)

On mozilla-central with 210557:a280a03c9f3c :
  hg --time log -r 'branch(mobile)' -T.
Before:
  time: real 10.450 secs (user 10.390+0.000 sys 0.060+0.000)
After:
  time: real 7.640 secs (user 7.480+0.000 sys 0.140+0.000)
  time: real 0.520 secs (user 0.490+0.000 sys 0.030+0.000)

First run is 35%/27% faster (primarily because the new code path uses
changelog.branchinfo instead changectx.branch and we avoid messing with
localrepo). Following runs will use the cache and are 16x/20x faster.

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -478,6 +478,7 @@ def branch(repo, subset, x):
     a regular expression. To match a branch that actually starts with `re:`,
     use the prefix `literal:`.
     """
+    branch = repo.revbranchcache.branch
     try:
         b = getstring(x, '')
     except error.ParseError:
@@ -489,16 +490,16 @@ def branch(repo, subset, x):
             # note: falls through to the revspec case if no branch with
             # this name exists
             if pattern in repo.branchmap():
-                return subset.filter(lambda r: matcher(repo[r].branch()))
+                return subset.filter(lambda r: matcher(branch(r)))
         else:
-            return subset.filter(lambda r: matcher(repo[r].branch()))
+            return subset.filter(lambda r: matcher(branch(r)))
 
     s = getset(repo, spanset(repo), x)
     b = set()
     for r in s:
-        b.add(repo[r].branch())
+        b.add(branch(r))
     c = s.__contains__
-    return subset.filter(lambda r: c(r) or repo[r].branch() in b)
+    return subset.filter(lambda r: c(r) or branch(r) in b)
 
 def bumped(repo, subset, x):
     """``bumped()``
@@ -1431,7 +1432,7 @@ def matching(repo, subset, x):
     getfieldfuncs = []
     _funcs = {
         'user': lambda r: repo[r].user(),
-        'branch': lambda r: repo[r].branch(),
+        'branch': repo.revbranchcache.branch,
         'date': lambda r: repo[r].date(),
         'description': lambda r: repo[r].description(),
         'files': lambda r: repo[r].files(),
@@ -1532,9 +1533,9 @@ def sort(repo, subset, x):
             elif k == '-rev':
                 e.append(-r)
             elif k == 'branch':
-                e.append(c.branch())
+                e.append(repo.revbranchcache.branch(r))
             elif k == '-branch':
-                e.append(invert(c.branch()))
+                e.append(invert(repo.revbranchcache.branch(r)))
             elif k == 'desc':
                 e.append(c.description())
             elif k == '-desc':
diff --git a/tests/test-branches.t b/tests/test-branches.t
--- a/tests/test-branches.t
+++ b/tests/test-branches.t
@@ -516,4 +516,71 @@ template output:
    }
   ]
 
+revision branch name caching implementation
+
+cache creation
+  $ rm .hg/cache/branchnames
+  $ hg debugrevspec 'max(branch("b"))'
+  13
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  2c243ef8a7c2503d205bc6116f04d423  .hg/cache/branchnames
+recovery from invalid cache file
+  $ echo > .hg/cache/branchnames
+  $ hg debugrevspec 'max(branch("b"))'
+  13
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  2c243ef8a7c2503d205bc6116f04d423  .hg/cache/branchnames
+recovery from other corruption - extra trailing data
+  $ echo >> .hg/cache/branchnames
+  $ hg debugrevspec 'max(branch("b"))'
+  13
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  2c243ef8a7c2503d205bc6116f04d423  .hg/cache/branchnames
+lazy update after commit
+  $ hg tag tag
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  2c243ef8a7c2503d205bc6116f04d423  .hg/cache/branchnames
+  $ hg debugrevspec 'max(branch("b"))'
+  15
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  e0f1a43e584afd743115a8eeaa38b6b4  .hg/cache/branchnames
+update after rollback
+  $ hg up -qr '.^'
+  $ hg rollback -qf
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  e0f1a43e584afd743115a8eeaa38b6b4  .hg/cache/branchnames
+  $ hg debugrevspec 'max(branch("b"))'
+  13
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  2c243ef8a7c2503d205bc6116f04d423  .hg/cache/branchnames
+handle history mutations that doesn't change the tip node - this is a problem
+with the cache invalidation scheme used by branchmap
+  $ hg log -r tip -T'{rev}:{node|short}\n'
+  14:f894c25619d3
+  $ hg bundle -q --all bu.hg
+  $ hg --config extensions.strip= strip --no-b -qr -1:
+  $ hg up -q tip
+  $ hg branch -q hacked
+  $ hg ci --amend -qm 'hacked'
+  $ hg pull -q bu.hg -r f894c25619d3
+  $ hg log -r tip -T'{rev}:{node|short}\n'
+  14:f894c25619d3
+  $ hg debugrevspec 'branch("hacked")'
+  13
+  $ hg debugrevspec 'max(branch("b"))'
+  12
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  a067ccd62fcf15722b9515842b3ed4d3  .hg/cache/branchnames
+  $ hg --config extensions.strip= strip --no-b -qr -2:
+  $ hg pull -q bu.hg
+  $ rm bu.hg
+  $ hg up -qr tip
+  $ hg log -r tip -T'{rev}:{node|short}\n'
+  14:f894c25619d3
+  $ hg debugrevspec 'max(branch("b"))'
+  13
+  $ "$TESTDIR/md5sum.py" .hg/cache/branchnames
+  55227148664f02d086bc3bf07c23180b  .hg/cache/branchnames
+(the cache file still contains the now unused 'hacked' branch name)
+
   $ cd ..


More information about the Mercurial-devel mailing list