[PATCH 3 of 3 v2] revset: use localrepo revbranchcache for branch name filtering
Mads Kiilerich
mads at kiilerich.com
Wed Jan 7 22:23:55 CST 2015
# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1420671663 -3600
# Thu Jan 08 00:01:03 2015 +0100
# Node ID 3104a961c2930f09bdd8db9318f4d89668d0ab3c
# Parent 1e999fcce9f77d0bfd641061086069846a1b2864
revset: use localrepo revbranchcache for branch name filtering
Branch name filtering in revsets was expensive. For every rev it created a
changectx and called .branch().
Instead, use the revbranchcache.
The revbranchcache is used read-only. The revset callbacks makes it hard to
figure out when we are done using/updating the cache and it would be 'tricky'
to lock the repo for writing from within a revset execution ... and the
branchmap update will usually make sure that the cache always is updated before
any revset can be run.
perfrevset 'branch(mobile)' on mozilla-central.
Before:
! wall 10.989637 comb 10.970000 user 10.940000 sys 0.030000 (best of 3)
After, no cache:
! wall 7.440318 comb 7.430000 user 7.400000 sys 0.030000 (best of 3)
After, with cache:
! wall 0.382129 comb 0.380000 user 0.380000 sys 0.000000 (best of 26)
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -456,6 +456,11 @@ def branch(repo, subset, x):
a regular expression. To match a branch that actually starts with `re:`,
use the prefix `literal:`.
"""
+ import branchmap
+ urepo = repo.unfiltered()
+ cl = urepo.changelog
+ getbi = branchmap.revbranchcache(urepo).branchinfo
+
try:
b = getstring(x, '')
except error.ParseError:
@@ -467,16 +472,16 @@ def branch(repo, subset, x):
# note: falls through to the revspec case if no branch with
# this name exists
if pattern in repo.branchmap():
- return subset.filter(lambda r: matcher(repo[r].branch()))
+ return subset.filter(lambda r: matcher(getbi(cl, r)[0]))
else:
- return subset.filter(lambda r: matcher(repo[r].branch()))
+ return subset.filter(lambda r: matcher(getbi(cl, r)[0]))
s = getset(repo, spanset(repo), x)
b = set()
for r in s:
- b.add(repo[r].branch())
+ b.add(getbi(cl, r)[0])
c = s.__contains__
- return subset.filter(lambda r: c(r) or repo[r].branch() in b)
+ return subset.filter(lambda r: c(r) or getbi(cl, r)[0] in b)
def bumped(repo, subset, x):
"""``bumped()``
diff --git a/tests/test-branches.t b/tests/test-branches.t
--- a/tests/test-branches.t
+++ b/tests/test-branches.t
@@ -520,4 +520,79 @@ template output:
}
]
+revision branch name caching implementation
+
+cache creation
+ $ rm .hg/cache/rbcrevs-v1
+ $ hg debugrevspec 'branch("re:a ")'
+ 7
+ $ [ -f .hg/cache/rbcrevs-v1 ] || echo no file
+ no file
+recovery from invalid cache file
+ $ echo > .hg/cache/rbcrevs-v1
+ $ hg debugrevspec 'branch("re:a ")'
+ 7
+cache update NOT fully written from revset
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ 68b329da9893e34099c7d8ad5cb9c940 .hg/cache/rbcrevs-v1
+recovery from other corruption - extra trailing data
+ $ echo >> .hg/cache/rbcrevs-v1
+ $ hg debugrevspec 'branch("re:a ")'
+ 7
+cache update NOT fully written from revset
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ e1c06d85ae7b8b032bef47e42e4c08f9 .hg/cache/rbcrevs-v1
+lazy update after commit
+ $ hg tag tag
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ 0bd281bcf98663164c034a908b7c49b1 .hg/cache/rbcrevs-v1
+ $ hg debugrevspec 'branch("re:a ")'
+ 7
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ 0bd281bcf98663164c034a908b7c49b1 .hg/cache/rbcrevs-v1
+update after rollback - cache keeps stripped revs until written for other reasons
+ $ hg up -qr '.^'
+ $ hg rollback -qf
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ 870abdb609030b84603f964b04dccfaa .hg/cache/rbcrevs-v1
+ $ hg debugrevspec 'branch("re:a ")'
+ 7
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ 870abdb609030b84603f964b04dccfaa .hg/cache/rbcrevs-v1
+handle history mutations that doesn't change the tip node - this is a problem
+with the cache invalidation scheme used by branchmap
+ $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n'
+ 14:f894c25619d3 c
+ 13:e23b5505d1ad b
+ $ hg bundle -q --all bu.hg
+ $ hg --config extensions.strip= strip --no-b -qr -1:
+ $ hg up -q tip
+ $ hg branch
+ b
+ $ hg branch -q hacked
+ $ hg ci --amend -qm 'hacked'
+ $ hg pull -q bu.hg -r f894c25619d3
+ $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n'
+ 14:f894c25619d3 c
+ 12:e3d49c0575d8 b
+ $ hg debugrevspec 'branch("hacked")'
+ 13
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ c8eb67c076ce6d58b7502944e373fc8c .hg/cache/rbcrevs-v1
+cleanup, restore old state
+ $ hg --config extensions.strip= strip --no-b -qr -2:
+ $ hg pull -q bu.hg
+ $ rm bu.hg
+ $ hg up -qr tip
+ $ hg log -r tip -T'{rev}:{node|short}\n'
+ 14:f894c25619d3
+the cache file do not go back to the old state - it still contains the
+now unused 'hacked' branch name)
+ $ hg debugrevspec 'branch("re:a ")'
+ 7
+ $ "$TESTDIR/md5sum.py" .hg/cache/rbcrevs-v1
+ 870abdb609030b84603f964b04dccfaa .hg/cache/rbcrevs-v1
+ $ cat .hg/cache/rbcnames-v1
+ default\x00a\x00b\x00c\x00a branch name much longer than the default justification used by branches\x00hacked (no-eol) (esc)
+
$ cd ..
More information about the Mercurial-devel
mailing list