[PATCH 8 of 8] localrepo: implement persistent tag caching
Greg Ward
greg-hg at gerg.ca
Sun Jul 12 20:02:11 CDT 2009
# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1247446899 14400
# Node ID 932640a34ea88da78d680fdc3ab5e9b77e601592
# Parent 6106257165fc2d3a461f67c5c361085fd02271d4
localrepo: implement persistent tag caching
- factor out localrepository._findglobaltags()
- factor out tagcache class with methods readcache() and writecache();
the expensive part of tag finding (iterate over heads and find
.hgtags filenode) is now in tagcache.readcache()
- add nulltagcache so we can easily revert to non-cached tags
if necessary
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -258,6 +258,7 @@
# be one tagtype for all such "virtual" tags? Or is the status
# quo fine?
+ # tag names are in UTF-8 in these two dicts
alltags = {} # map tag name to (node, hist)
tagtypes = {}
@@ -285,27 +286,11 @@
alltags[name] = anode, ahist
tagtypes[name] = tagtype
- seen = set()
- fctx = None
- ctxs = [] # list of filectx
- for node in self.heads():
- try:
- fnode = self[node].filenode('.hgtags')
- except error.LookupError:
- continue
- if fnode not in seen:
- seen.add(fnode)
- if not fctx:
- fctx = self.filectx('.hgtags', fileid=fnode)
- else:
- fctx = fctx.filectx(fnode)
- ctxs.append(fctx)
+ self._findglobaltags(updatetags)
+ #self.ui.debug("found %d global tags\n" % len(alltags))
- # read the tags file from each head, ending with the tip
- for fctx in reversed(ctxs):
- filetags = self._readtags(fctx.data().splitlines(), fctx)
- updatetags(filetags, "global")
-
+ # this is cheap: no need to cache it
+ #self.ui.debug("reading localtags...\n")
try:
data = encoding.fromlocal(self.opener("localtags").read())
# localtags are stored in the local character set
@@ -318,10 +303,15 @@
tags = {}
for (name, (node, hist)) in alltags.iteritems():
if node != nullid:
- tags[name] = node
+ tags[encoding.tolocal(name)] = node
tags['tip'] = self.changelog.tip()
+
+ # re-encode tagtypes keys to local encoding
+ tagtypes = dict([(encoding.tolocal(name), ttype)
+ for (name, ttype) in tagtypes.iteritems()])
return (tags, tagtypes)
+ # tag names here: UTF-8 in and UTF-8 out
def _readtags(self, lines, fn):
'''Read tag definitions from a file (or any source of
lines). Return a mapping from tag name to (node, hist):
@@ -344,7 +334,7 @@
except ValueError:
warn(_("cannot parse entry"))
continue
- name = encoding.tolocal(name.strip()) # stored in UTF-8
+ name = name.strip() # keep it in UTF-8 for now
try:
nodebin = bin(nodehex)
except TypeError:
@@ -362,6 +352,32 @@
filetags[name] = (nodebin, hist)
return filetags
+ def _findglobaltags(self, updatetags):
+ # Use nulltagcache rather than tagcache to disable tag caching.
+ # Useful if you find a bug in the tag cache or want to compare
+ # performance.
+ #cache = nulltagcache(self.ui, self)
+ cache = tagcache(self.ui, self)
+ (heads, tagfnode) = cache.readcache()
+
+ #self.ui.debug("iterating over %d head(s) for .hgtags...\n" % len(heads))
+ seen = set() # set of fnode
+ fctx = None
+ for head in reversed(heads): # oldest to newest
+ fnode = tagfnode.get(head)
+ if fnode and fnode not in seen:
+ seen.add(fnode)
+ if not fctx:
+ fctx = self.filectx('.hgtags', fileid=fnode)
+ else:
+ fctx = fctx.filectx(fnode)
+
+ filetags = self._readtags(fctx.data().splitlines(), fctx)
+ updatetags(filetags, 'global')
+
+ # and update the cache (if necessary)
+ cache.writecache(heads, tagfnode)
+
def tagtype(self, tagname):
'''
return the type of the given tag. result can be:
@@ -2200,6 +2216,208 @@
return self.stream_in(remote)
return self.pull(remote, heads)
+class nulltagcache(object):
+ """Object that acts like tagcache, but does no caching. Useful
+ to restore old behaviour with minimal code change."""
+
+ def __init__(self, ui, repo):
+ self.ui = ui
+ self.repo = repo
+
+ def readcache(self):
+ # XXX this loop is copied from tagcache.readcache(): should
+ # factor it out if nulltagcache is going to stick around for the
+ # long term
+ heads = self.repo.heads()
+ fnodes = {}
+ for head in heads:
+ cctx = self.repo[head]
+ try:
+ fnodes[head] = cctx.filenode('.hgtags')
+ except error.LookupError:
+ pass
+
+ return (heads, fnodes)
+
+ def writecache(self, heads, tagfnode):
+ pass
+
+class tagcache(object):
+ '''Object for managing persistent tag cache. Only needs to live for
+ as long as it takes to open, read, update, and write the cache.'''
+
+ # The tag cache stores only info about heads, not the tag contents
+ # from each head. I.e. this doesn't try to squeeze out the maximum
+ # performance, but it is simpler has a better chance at actually
+ # working correctly. And this gives the biggest performance win: it
+ # avoids looking up .hgtags in the manifest for every head, and it
+ # can avoid calling heads() at all if there have been no changes to
+ # the repo.
+ #
+ # Caching .hgtags content is unexpectedly difficult because of
+ # strip/rollback and tag rank. I suspect the only way to get it
+ # right is to store the exact data from .hgtags on each head -- in
+ # which case why not just read it straight from the .hgtags revlog?
+ # So that is exactly what we do.
+
+ __slots__ = ['ui',
+ 'repo',
+ 'shouldwrite',
+ ]
+
+ cacheversion = 0 # format version
+
+ def __init__(self, ui, repo):
+ self.ui = ui
+ self.repo = repo
+ self.shouldwrite = False
+
+ def readcache(self):
+ '''Read the tag cache and return a tuple (heads, fnodes). heads
+ is the list of all heads currently in the repository (ordered
+ from tip to oldest) and fnodes is a mapping from head to .hgtags
+ filenode. Caller is responsible for reading tag info from each
+ head.'''
+
+ (ui, repo) = (self.ui, self.repo)
+ try:
+ # XXX should we treat this as a binary file (really, text
+ # Unix line endings on all platforms) from the beginning?
+ # That might avoid difficulties in reading the first 16
+ # bytes if we switch to a truly binary format in future (see
+ # _readversion()).
+ cachefile = repo.opener('tags.cache', 'r')
+ #ui.debug('reading tag cache from %s\n' % cachefile.name)
+ except IOError:
+ cachefile = None
+ else:
+ canuse = self._readversion(cachefile)
+ if not canuse:
+ cachefile.close()
+ cachefile = None
+
+ # The cache file consists of lines like
+ # <headrev> <headnode> [<tagnode>]
+ # where <headrev> and <headnode> redundantly identify a
+ # repository head from the time the cache was written, and
+ # <tagnode> is the filenode of .hgtags on that head. Heads with
+ # no .hgtags file will have no <tagnode>. The cache is ordered
+ # from tip to oldest (which is why <headrev> is there: a quick
+ # visual check is all that's required to ensure correct order).
+ #
+ # This information is enough to let us avoid the most expensive
+ # part of finding global tags, which is looking up <tagnode> in
+ # the manifest for each head.
+ cacheheads = [] # list of headnode
+ cachefnode = {} # map headnode to filenode
+ if cachefile:
+ for line in cachefile:
+ line = line.rstrip().split()
+ head = bin(line[1])
+ cacheheads.append(head)
+ if len(line) == 3:
+ fnode = bin(line[2])
+ cachefnode[head] = fnode
+
+ # Determine which heads have been destroyed by strip or
+ # rollback. That'll ensure that writecache() writes accurate
+ # data, and it makes life easier for
+ # localrepo._findglobaltags().
+ goodheads = []
+ for head in cacheheads:
+ try:
+ repo.changelog.rev(head)
+ goodheads.append(head)
+ except error.LookupError:
+ pass
+ if len(goodheads) < len(cacheheads):
+ self.shouldwrite = True
+
+ # Optimization #1: if the first head == current tip, there have
+ # been new changesets since the cache was written. All we need
+ # to do is tell our caller to read .hgtags from each relevant
+ # head.
+ if goodheads and goodheads[0] == repo.changelog.tip():
+ #ui.debug('tag cache: tip not changed, so cache is up-to-date\n')
+ return (goodheads, cachefnode)
+
+ # Tip has changed, so we have to find new heads.
+ currentheads = repo.heads()
+ newheads = [head
+ for head in currentheads
+ if head not in set(goodheads)]
+ if newheads:
+ self.shouldwrite = True
+ #ui.debug('tag cache: found %d uncached head(s)\n' % len(newheads))
+
+ # Now we have to lookup the .hgtags filenode for every new head.
+ # This is the most expensive part of finding tags, so
+ # performance will depend primarily on the size of newheads.
+ # When there is no cache file, newheads == currentheads, so
+ # that's the worst case.
+ for head in newheads:
+ cctx = self.repo[head]
+ try:
+ fnode = cctx.filenode('.hgtags')
+ cachefnode[head] = fnode
+ except error.LookupError:
+ # no .hgtags file on this head
+ pass
+
+ # Everything in newheads should be closer to tip than everything
+ # in goodheads. And both are already sorted tip-to-oldest, so
+ # we can just concatenate them. (XXX unless someone edits the
+ # cache file in a sneaky attempt to trip us up.)
+ return (newheads + goodheads, cachefnode)
+
+ def _readversion(self, cachefile):
+ '''Read the first line of the cache file, which contains the
+ file version number. Return true if we can use this cache file.'''
+
+ # The first line looks like 'hgtagcache 0000', where 0000 is the
+ # cache file version number in hex. (This should make it
+ # possible to switch to a binary format if necessary in future,
+ # where the first 16 bytes will be 'hgtagcache xxxx\n'.)
+ firstline = cachefile.next()
+ if not (len(firstline) == 16 and
+ firstline[:11] == 'hgtagcache '):
+ self.ui.warn(_('invalid tag cache file (ignoring it)\n'))
+ return False
+ else:
+ try:
+ version = int(firstline[11:15], 16)
+ except ValueError:
+ ui.warn(_('invalid tag cache version (%r) (ignoring file)')
+ % firstline[11:15])
+ return False
+
+ if version > self.cacheversion:
+ ui.warn(_('tag cache file from a later Mercurial version '
+ '(ignoring it)'))
+ return False
+
+ return True
+
+ def writecache(self, heads, tagfnode):
+ if not self.shouldwrite:
+ return
+
+ cachefile = self.repo.opener('tags.cache', 'w', atomictemp=True)
+ #self.ui.debug('writing cache file %s\n' % cachefile.name)
+
+ cachefile.write('hgtagcache %04x\n' % self.cacheversion)
+ for head in heads:
+ rev = self.repo[head].rev()
+ fnode = tagfnode.get(head)
+ if fnode:
+ cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
+ else:
+ cachefile.write('%d %s\n' % (rev, hex(head)))
+
+ cachefile.rename()
+ cachefile.close()
+
+
# used to avoid circular references so destructors work
def aftertrans(files):
renamefiles = [tuple(t) for t in files]
diff --git a/tests/test-empty.out b/tests/test-empty.out
--- a/tests/test-empty.out
+++ b/tests/test-empty.out
@@ -19,3 +19,4 @@
hgrc
requires
store
+tags.cache
diff --git a/tests/test-tags b/tests/test-tags
--- a/tests/test-tags
+++ b/tests/test-tags
@@ -4,7 +4,9 @@
mkdir t
cd t
hg init
+[ -f .hg/tags.cache ] && echo "tag cache exists" || echo "no tag cache"
hg id
+[ -f .hg/tags.cache ] && echo "tag cache exists" || echo "no tag cache"
echo a > a
hg add a
hg commit -m "test"
@@ -25,6 +27,10 @@
hg tags
hg identify
+# repeat with cold tag cache
+rm -f .hg/tags.cache
+hg identify
+
echo "% create a branch"
echo bb > a
hg status
diff --git a/tests/test-tags.out b/tests/test-tags.out
--- a/tests/test-tags.out
+++ b/tests/test-tags.out
@@ -1,5 +1,7 @@
% setup
+no tag cache
000000000000 tip
+tag cache exists
0 files updated, 0 files merged, 0 files removed, 0 files unresolved
acb14030fe0a tip
% create local tag with long name
@@ -10,6 +12,7 @@
tip 1:b9154636be93
first 0:acb14030fe0a
b9154636be93 tip
+b9154636be93 tip
% create a branch
M a
b9154636be93+ tip
@@ -73,7 +76,11 @@
rev 4: .hgtags:
bbd179dfa0a71671c253b3ae0aa1513b60d199fa bar
.hg/tags.cache:
-no such file
+hgtagcache 0000
+4 0c192d7d5e6b78a714de54a2e9627952a877e25a 0c04f2a8af31de17fab7422878ee5a2dadbc943d
+3 6fa450212aeb2a21ed616a54aea39a4a27894cd7 7d3b718c964ef37b89e550ebdafd5789e76ce1b0
+2 7a94127795a33c10a370c93f731fd9fea0b79af6 0c04f2a8af31de17fab7422878ee5a2dadbc943d
+0 bbd179dfa0a71671c253b3ae0aa1513b60d199fa
% test tag removal
changeset: 5:5f6e8655b1c7
tag: tip
More information about the Mercurial-devel
mailing list