[PATCH 8 of 8] tags: implement persistent tag caching (issue548)
Greg Ward
greg-hg at gerg.ca
Mon Jul 13 21:03:27 CDT 2009
# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1247536884 14400
# Node ID c456f1ebd65d7a9d947641c4f9234748bc618608
# Parent bfc26f905db3664fc1932de253db7b6f20f85772
tags: implement persistent tag caching (issue548).
- rename findglobaltags() to findglobaltags1() (so the "no cache"
implementation is still there if we need it)
- add findglobaltags2() and make findglobaltags() an alias for it
(disabling tag caching is a one-line patch)
- factor out tagcache class with methods readcache() and writecache();
the expensive part of tag finding (iterate over heads and find
.hgtags filenode) is now in tagcache.readcache()
diff --git a/mercurial/tags.py b/mercurial/tags.py
--- a/mercurial/tags.py
+++ b/mercurial/tags.py
@@ -6,16 +6,16 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2, incorporated herein by reference.
-# Currently this module only deals with reading tags. Soon it will grow
-# support for caching tag info. Eventually, it could take care of
-# updating (adding/removing/moving) tags too.
+# Currently this module only deals with reading tags (with caching).
+# Eventually, it could take care of updating (adding/removing/moving)
+# tags too.
from node import bin, hex
from i18n import _
import encoding
import error
-def findglobaltags(ui, repo, alltags, tagtypes):
+def findglobaltags1(ui, repo, alltags, tagtypes):
'''Find global tags in repo by reading .hgtags from every head that
has a distinct version of it. Updates the dicts alltags, tagtypes
in place: alltags maps tag name to (node, hist) pair (see _readtags()
@@ -44,6 +44,32 @@
ui, repo, fctx.data().splitlines(), fctx)
_updatetags(filetags, "global", alltags, tagtypes)
+def findglobaltags2(ui, repo, alltags, tagtypes):
+ '''Same as findglobaltags1(), but with caching.'''
+ cache = tagcache(ui, repo)
+ (heads, tagfnode) = cache.readcache()
+
+ #ui.debug("iterating over %d head(s) for .hgtags...\n" % len(heads))
+ seen = set() # set of fnode
+ fctx = None
+ for head in reversed(heads): # oldest to newest
+ fnode = tagfnode.get(head)
+ if fnode and fnode not in seen:
+ seen.add(fnode)
+ if not fctx:
+ fctx = repo.filectx('.hgtags', fileid=fnode)
+ else:
+ fctx = fctx.filectx(fnode)
+
+ filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx)
+ _updatetags(filetags, 'global', alltags, tagtypes)
+
+ # and update the cache (if necessary)
+ cache.writecache(heads, tagfnode)
+
+# Set this to findglobaltags1 to disable tag caching.
+findglobaltags = findglobaltags2
+
def readlocaltags(ui, repo, alltags, tagtypes):
'''Read local tags in repo. Update alltags and tagtypes.'''
try:
@@ -120,3 +146,136 @@
alltags[name] = anode, ahist
tagtypes[name] = tagtype
+
+class tagcache(object):
+ '''Object for managing persistent tag cache. Only needs to live for
+ as long as it takes to open, read, update, and write the cache.'''
+
+ # The tag cache only stores info about heads, not the tag contents
+ # from each head. I.e. it doesn't try to squeeze out the maximum
+ # performance, but is simpler has a better chance of actually
+ # working correctly. And this gives the biggest performance win: it
+ # avoids looking up .hgtags in the manifest for every head, and it
+ # can avoid calling heads() at all if there have been no changes to
+ # the repo.
+ #
+ # Caching .hgtags content is unexpectedly difficult because of
+ # strip/rollback and tag rank. I suspect the only way to get it
+ # right is to store the exact data from .hgtags on each head -- in
+ # which case why not just read it straight from the .hgtags revlog?
+ # So that is exactly what we do.
+
+ __slots__ = ['ui',
+ 'repo',
+ 'shouldwrite',
+ ]
+
+ cacheversion = 0 # format version
+
+ def __init__(self, ui, repo):
+ self.ui = ui
+ self.repo = repo
+ self.shouldwrite = False
+
+ def readcache(self):
+ '''Read the tag cache and return a tuple (heads, fnodes). heads
+ is the list of all heads currently in the repository (ordered
+ from tip to oldest) and fnodes is a mapping from head to .hgtags
+ filenode. Caller is responsible for reading tag info from each
+ head.'''
+
+ (ui, repo) = (self.ui, self.repo)
+ try:
+ cachefile = repo.opener('tags.cache', 'r')
+ #ui.debug('reading tag cache from %s\n' % cachefile.name)
+ except IOError:
+ cachefile = None
+
+ # The cache file consists of lines like
+ # <headrev> <headnode> [<tagnode>]
+ # where <headrev> and <headnode> redundantly identify a
+ # repository head from the time the cache was written, and
+ # <tagnode> is the filenode of .hgtags on that head. Heads with
+ # no .hgtags file will have no <tagnode>. The cache is ordered
+ # from tip to oldest (which is why <headrev> is there: a quick
+ # visual check is all that's required to ensure correct order).
+ #
+ # This information is enough to let us avoid the most expensive
+ # part of finding global tags, which is looking up <tagnode> in
+ # the manifest for each head.
+ cacheheads = [] # list of headnode
+ cachefnode = {} # map headnode to filenode
+ if cachefile:
+ for line in cachefile:
+ line = line.rstrip().split()
+ head = bin(line[1])
+ cacheheads.append(head)
+ if len(line) == 3:
+ fnode = bin(line[2])
+ cachefnode[head] = fnode
+
+ # See if any heads have been destroyed by strip or rollback.
+ # That'll ensure that writecache() writes accurate data, and it
+ # makes life easier for localrepo._findglobaltags().
+ goodheads = []
+ for head in cacheheads:
+ try:
+ repo.changelog.rev(head)
+ goodheads.append(head)
+ except error.LookupError:
+ pass
+ if len(goodheads) < len(cacheheads):
+ self.shouldwrite = True
+
+ # If the first head == current tip, there have been no new
+ # changesets since the cache was written. Just tell our caller
+ # to read .hgtags from every head that has it.
+ if goodheads and goodheads[0] == repo.changelog.tip():
+ #ui.debug('tag cache: tip not changed, so cache is up-to-date\n')
+ return (goodheads, cachefnode)
+
+ # Tip has changed, so we have to find new heads.
+ currentheads = repo.heads()
+ newheads = [head
+ for head in currentheads
+ if head not in set(goodheads)]
+ if newheads:
+ self.shouldwrite = True
+ #ui.debug('tag cache: found %d uncached head(s)\n' % len(newheads))
+
+ # Now we have to lookup the .hgtags filenode for every new head.
+ # This is the most expensive part of finding tags, so
+ # performance will depend primarily on the size of newheads.
+ # Worst case: newheads == currentheads (no cache file).
+ for head in newheads:
+ cctx = self.repo[head]
+ try:
+ fnode = cctx.filenode('.hgtags')
+ cachefnode[head] = fnode
+ except error.LookupError:
+ # no .hgtags file on this head
+ pass
+
+ # Everything in newheads should be closer to tip than everything
+ # in goodheads. And both are already sorted tip-to-oldest, so
+ # we can just concatenate them. (XXX unless someone edits the
+ # cache file in a sneaky attempt to trip us up.)
+ return (newheads + goodheads, cachefnode)
+
+ def writecache(self, heads, tagfnode):
+ if not self.shouldwrite:
+ return
+
+ cachefile = self.repo.opener('tags.cache', 'w', atomictemp=True)
+ #self.ui.debug('writing cache file %s\n' % cachefile.name)
+
+ for head in heads:
+ rev = self.repo[head].rev()
+ fnode = tagfnode.get(head)
+ if fnode:
+ cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
+ else:
+ cachefile.write('%d %s\n' % (rev, hex(head)))
+
+ cachefile.rename()
+ cachefile.close()
diff --git a/tests/test-empty.out b/tests/test-empty.out
--- a/tests/test-empty.out
+++ b/tests/test-empty.out
@@ -19,3 +19,4 @@
hgrc
requires
store
+tags.cache
diff --git a/tests/test-tags b/tests/test-tags
--- a/tests/test-tags
+++ b/tests/test-tags
@@ -4,7 +4,9 @@
mkdir t
cd t
hg init
+[ -f .hg/tags.cache ] && echo "tag cache exists" || echo "no tag cache"
hg id
+[ -f .hg/tags.cache ] && echo "tag cache exists" || echo "no tag cache"
echo a > a
hg add a
hg commit -m "test"
@@ -25,6 +27,10 @@
hg tags
hg identify
+# repeat with cold tag cache
+rm -f .hg/tags.cache
+hg identify
+
echo "% create a branch"
echo bb > a
hg status
diff --git a/tests/test-tags.out b/tests/test-tags.out
--- a/tests/test-tags.out
+++ b/tests/test-tags.out
@@ -1,5 +1,7 @@
% setup
+no tag cache
000000000000 tip
+tag cache exists
0 files updated, 0 files merged, 0 files removed, 0 files unresolved
acb14030fe0a tip
% create local tag with long name
@@ -10,6 +12,7 @@
tip 1:b9154636be93
first 0:acb14030fe0a
b9154636be93 tip
+b9154636be93 tip
% create a branch
M a
b9154636be93+ tip
@@ -73,7 +76,10 @@
rev 4: .hgtags:
bbd179dfa0a71671c253b3ae0aa1513b60d199fa bar
.hg/tags.cache:
-no such file
+4 0c192d7d5e6b78a714de54a2e9627952a877e25a 0c04f2a8af31de17fab7422878ee5a2dadbc943d
+3 6fa450212aeb2a21ed616a54aea39a4a27894cd7 7d3b718c964ef37b89e550ebdafd5789e76ce1b0
+2 7a94127795a33c10a370c93f731fd9fea0b79af6 0c04f2a8af31de17fab7422878ee5a2dadbc943d
+0 bbd179dfa0a71671c253b3ae0aa1513b60d199fa
% test tag removal
changeset: 5:5f6e8655b1c7
tag: tip
More information about the Mercurial-devel
mailing list