[PATCH 3 of 3 v2] revset: use localrepo revbranchcache for branch name filtering

Mads Kiilerich mads at kiilerich.com
Wed Jan 7 22:23:55 CST 2015


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1420671663 -3600
#      Thu Jan 08 00:01:03 2015 +0100
# Node ID 3104a961c2930f09bdd8db9318f4d89668d0ab3c
# Parent  1e999fcce9f77d0bfd641061086069846a1b2864
revset: use localrepo revbranchcache for branch name filtering

Branch name filtering in revsets was expensive. For every rev it created a
changectx and called .branch().

Instead, use the revbranchcache.

The revbranchcache is used read-only. The revset callbacks makes it hard to
figure out when we are done using/updating the cache and it would be 'tricky'
to lock the repo for writing from within a revset execution ... and the
branchmap update will usually make sure that the cache always is updated before
any revset can be run.

perfrevset 'branch(mobile)' on mozilla-central.
Before:
! wall 10.989637 comb 10.970000 user 10.940000 sys 0.030000 (best of 3)
After, no cache:
! wall 7.440318 comb 7.430000 user 7.400000 sys 0.030000 (best of 3)
After, with cache:
! wall 0.382129 comb 0.380000 user 0.380000 sys 0.000000 (best of 26)

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -456,6 +456,11 @@ def branch(repo, subset, x):
     a regular expression. To match a branch that actually starts with `re:`,
     use the prefix `literal:`.
     """
+    import branchmap
+    urepo = repo.unfiltered()
+    cl = urepo.changelog
+    getbi = branchmap.revbranchcache(urepo).branchinfo
+
     try:
         b = getstring(x, '')
     except error.ParseError:
@@ -467,16 +472,16 @@ def branch(repo, subset, x):
             # note: falls through to the revspec case if no branch with
             # this name exists
             if pattern in repo.branchmap():
-                return subset.filter(lambda r: matcher(repo[r].branch()))
+                return subset.filter(lambda r: matcher(getbi(cl, r)[0]))
         else:
-            return subset.filter(lambda r: matcher(repo[r].branch()))
+            return subset.filter(lambda r: matcher(getbi(cl, r)[0]))
 
     s = getset(repo, spanset(repo), x)
     b = set()
     for r in s:
-        b.add(repo[r].branch())
+        b.add(getbi(cl, r)[0])
     c = s.__contains__
-    return subset.filter(lambda r: c(r) or repo[r].branch() in b)
+    return subset.filter(lambda r: c(r) or getbi(cl, r)[0] in b)
 
 def bumped(repo, subset, x):
     """``bumped()``
diff --git a/tests/test-branches.t b/tests/test-branches.t
--- a/tests/test-branches.t
+++ b/tests/test-branches.t
@@ -520,4 +520,79 @@ template output:
    }
   ]
 
+revision branch name caching implementation
+
+cache creation
+  $ rm .hg/cache/rbcrevs-v1
+  $ hg debugrevspec 'branch("re:a ")'
+  7
+  $ [ -f .hg/cache/rbcrevs-v1 ] || echo no file
+  no file
+recovery from invalid cache file
+  $ echo > .hg/cache/rbcrevs-v1
+  $ hg debugrevspec 'branch("re:a ")'
+  7
+cache update NOT fully written from revset
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  68b329da9893e34099c7d8ad5cb9c940  .hg/cache/rbcrevs-v1
+recovery from other corruption - extra trailing data
+  $ echo >> .hg/cache/rbcrevs-v1
+  $ hg debugrevspec 'branch("re:a ")'
+  7
+cache update NOT fully written from revset
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  e1c06d85ae7b8b032bef47e42e4c08f9  .hg/cache/rbcrevs-v1
+lazy update after commit
+  $ hg tag tag
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  0bd281bcf98663164c034a908b7c49b1  .hg/cache/rbcrevs-v1
+  $ hg debugrevspec 'branch("re:a ")'
+  7
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  0bd281bcf98663164c034a908b7c49b1  .hg/cache/rbcrevs-v1
+update after rollback - cache keeps stripped revs until written for other reasons
+  $ hg up -qr '.^'
+  $ hg rollback -qf
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  870abdb609030b84603f964b04dccfaa  .hg/cache/rbcrevs-v1
+  $ hg debugrevspec 'branch("re:a ")'
+  7
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  870abdb609030b84603f964b04dccfaa  .hg/cache/rbcrevs-v1
+handle history mutations that doesn't change the tip node - this is a problem
+with the cache invalidation scheme used by branchmap
+  $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n'
+  14:f894c25619d3 c
+  13:e23b5505d1ad b
+  $ hg bundle -q --all bu.hg
+  $ hg --config extensions.strip= strip --no-b -qr -1:
+  $ hg up -q tip
+  $ hg branch
+  b
+  $ hg branch -q hacked
+  $ hg ci --amend -qm 'hacked'
+  $ hg pull -q bu.hg -r f894c25619d3
+  $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n'
+  14:f894c25619d3 c
+  12:e3d49c0575d8 b
+  $ hg debugrevspec 'branch("hacked")'
+  13
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  c8eb67c076ce6d58b7502944e373fc8c  .hg/cache/rbcrevs-v1
+cleanup, restore old state
+  $ hg --config extensions.strip= strip --no-b -qr -2:
+  $ hg pull -q bu.hg
+  $ rm bu.hg
+  $ hg up -qr tip
+  $ hg log -r tip -T'{rev}:{node|short}\n'
+  14:f894c25619d3
+the cache file do not go back to the old state - it still contains the
+now unused 'hacked' branch name)
+  $ hg debugrevspec 'branch("re:a ")'
+  7
+  $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+  870abdb609030b84603f964b04dccfaa  .hg/cache/rbcrevs-v1
+  $ cat .hg/cache/rbcnames-v1
+  default\x00a\x00b\x00c\x00a branch name much longer than the default justification used by branches\x00hacked (no-eol) (esc)
+
   $ cd ..


More information about the Mercurial-devel mailing list