[PATCH 7 of 7] localrepo: implement persistent tag caching
Greg Ward
greg at gerg.ca
Fri Jul 3 09:48:31 CDT 2009
# HG changeset patch
# User Greg Ward <greg at gerg.ca>
# Date 1246632310 14400
# Node ID ab5c492c2cd6e878c1cbcccad0a043d00a9413b6
# Parent 28f7005c067ff1380c262283c71ba5327ee29614
localrepo: implement persistent tag caching
- factor out tagcache class with methods readcache() and writecache()
- the expensive part of tag finding (iterate over heads and find
.hgtags filenode) is now in tagcache.readcache()
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -280,27 +280,37 @@
alltags[name] = anode, ahist
tagtypes[name] = tagtype
+ cache = tagcache(self.ui, self)
+
+ self.ui.debug("getting list of heads...\n")
+ heads = self.heads()
+ heads.reverse() # oldest to newest
+ self.ui.debug("checking tag cache...\n")
+ (staleheads, cachetags) = cache.readcache(heads)
+ updatetags(cachetags, 'global')
+
+ self.ui.debug("iterating over %d stale heads...\n" % len(staleheads))
seen = set()
fctx = None
- ctxs = [] # list of filectx
- for node in self.heads():
- try:
- fnode = self[node].filenode('.hgtags')
- except error.LookupError:
- continue
+ for (node, fnode) in staleheads:
+ assert fnode is not None
if fnode not in seen:
seen.add(fnode)
if not fctx:
fctx = self.filectx('.hgtags', fileid=fnode)
else:
fctx = fctx.filectx(fnode)
- ctxs.append(fctx)
- # read the tags file from each head, ending with the tip
- for fctx in reversed(ctxs):
- filetags = self._readtags(fctx.data().splitlines(), fctx)
- updatetags(filetags, "global")
+ filetags = self._readtags(fctx.data().splitlines(), fctx)
+ updatetags(filetags, "global")
+ self.ui.debug("found %d global tags\n" % len(alltags))
+
+ # Update the cache with tag info read from stale heads.
+ cache.writecache(alltags)
+
+ # this is cheap: no need to cache it
+ self.ui.debug("reading localtags...\n")
try:
data = encoding.fromlocal(self.opener("localtags").read())
# localtags are stored in the local character set
@@ -312,12 +322,15 @@
self._tags = {}
self._tagtypes = {}
+ #for (name, (node, _)) in alltags.iteritems():
for name, nodehist in alltags.iteritems():
node = nodehist[0]
if node != nullid:
self._tags[name] = node
self._tagtypes[name] = tagtypes[name]
self._tags['tip'] = self.changelog.tip()
+
+ self.ui.debug("_findtags() all done\n")
return self._tags
def _readtags(self, lines, fn):
@@ -2207,6 +2220,151 @@
return self.stream_in(remote)
return self.pull(remote, heads)
+class tagcache(object):
+ """Object for managing persistent tag cache. Only needs to live for
+ as long as it takes to open, read, update, and write the cache."""
+
+ __slots__ = ['ui',
+ 'repo',
+ 'allheads',
+ ]
+
+ def __init__(self, ui, repo):
+ self.ui = ui
+ self.repo = repo
+
+ self.allheads = None # list of (headrev, headnode, tagnode)
+
+ def readcache(self, heads):
+ """Read the tags.cache file and check if it is up-to-date. Return
+ a tuple (staleheads, cachetags). staleheads is a list of the
+ heads for which we must re-read .hgtags as (headnode, tagnode)
+ tuples, where tagnode is the filenode of .hgtags on that head.
+ cachetags is the map of cached tags in the same format as
+ returned by repo._readtags()."""
+
+ # Open the cache file or bail out early.
+ try:
+ cachefile = self.repo.opener("tags.cache", "rt")
+ except IOError:
+ # No cache file yet: total cache miss. All heads with
+ # .hgtags are considered stale. (This loop is the most
+ # expensive part of the whole tag-reading process, and the
+ # one we try hardest to avoid.)
+ self.ui.debug("iterating over %d heads for .hgtags fnodes...\n"
+ % len(heads))
+ self.allheads = []
+ staleheads = []
+ for head in heads:
+ cctx = self.repo[head]
+ try:
+ fnode = cctx.filenode('.hgtags')
+ staleheads.append((head, fnode))
+ except error.LookupError:
+ fnode = None
+ self.allheads.append((cctx.rev(), head, fnode))
+
+ return (staleheads, {})
+
+ # Read the cache up to the delimiting blank line.
+ self.allheads = []
+ cacheheads = [] # list of head nodes
+ for line in cachefile:
+ line = line.strip()
+ if not line:
+ break
+
+ line = line.split()
+ headrev = int(line[0])
+ headbin = bin(line[1])
+ cacheheads.append(headbin)
+ if len(line) == 2:
+ fnode = None
+ elif len(line) == 3:
+ fnode = bin(line[1])
+ self.allheads.append((headrev, headbin, fnode))
+
+ # And read the rest of the file: we're going to need it sooner
+ # or later.
+ cachetags = self.repo._readtags(cachefile, cachefile.name)
+ cachefile.close()
+
+ # Check if cached heads == current heads. If so, that means no
+ # new changesets have entered the repo since the cache was
+ # written, so the cache is completely up-to-date.
+ if cacheheads == heads:
+ self.ui.debug("good news: tagcache up-to-date\n")
+ return ([], cachetags)
+
+ # Tag cache is stale: find new heads added since the cache was
+ # written. (Heads that have disappeared are not interesting:
+ # that just means they have been merged, and we'll find their
+ # tag information in the new head created from the merge.)
+ newheads = [head for head in heads if head not in set(cacheheads)]
+ assert newheads, "cache heads != current heads, but no new heads found"
+ self.ui.debug("tag cache is stale: found %d new heads (%s)\n"
+ % (len(newheads), ", ".join(map(short, newheads))))
+
+ # Not all new heads actually have new tag info. Visit each new
+ # head, checking if the .hgtags version there is a version we've
+ # ever seen before. If not, that's a stale head and we'll have
+ # to reread its .hgtags.
+ staleheads = []
+ fnodes = set([fnode for (_, _, fnode) in self.allheads
+ if fnode is not None])
+ for head in newheads:
+ cctx = self.repo[head]
+ try:
+ newfnode = cctx.filenode('.hgtags')
+ except error.LookupError:
+ newfnode = None
+ self.allheads.append((cctx.rev(), head, newfnode))
+ if newfnode and newfnode not in fnodes:
+ # XXX what happens if user removes .hgtags from some
+ # head? does that make filenode() raise LookupError? if
+ # so, this might be the killer case that means cached
+ # tags have to store more info than each .hgtags (ie. we
+ # have to persist the alltags dict)
+ staleheads.append((head, newfnode))
+
+ self.allheads.sort() # order by rev, oldest to tip
+ self.ui.debug("tagcache: found %d stale heads (%s)\n"
+ % (len(staleheads), ", ".join([short(sh) for (sh, fn) in staleheads])))
+ return (staleheads, cachetags)
+
+ def writecache(self, tags):
+ """Write the tags.cache file. tags is the mapping of all global
+ tags, whether read from the cache or from .hgtags files on stale
+ heads, in the format returned by repo.readtags()."""
+
+ assert self.allheads is not None, \
+ "readcache() must be called before writecache()"
+
+ # The cache file has two sections, delimited by a blank line.
+ #
+ # Section 1 is the list of heads:
+ # <headrev> <headnode> [<tagnode>]
+ # where <headnode> and <headrev> redundantly identify the head,
+ # and <tagnode> is the filenode of .hgtags at that head. Heads
+ # with no .hgtags file are still listed, but with no <tagnode>.
+ #
+ # Section 2 is the list of tags in the same format as .hgtags
+ # itself:
+ # <node> <name>
+ cachefile = self.repo.opener("tags.cache", "wt")
+ self.ui.debug("writing %s\n" % cachefile.name)
+ for (rev, node, fnode) in self.allheads:
+ if fnode is None:
+ cachefile.write("%d %s\n" % (rev, hex(node)))
+ else:
+ cachefile.write("%d %s %s\n" % (rev, hex(node), hex(fnode)))
+ cachefile.write("\n")
+
+ for (name, (node, _)) in tags.iteritems():
+ cachefile.write("%s %s\n" % (hex(node), name))
+
+ cachefile.close()
+
# used to avoid circular references so destructors work
def aftertrans(files):
renamefiles = [tuple(t) for t in files]
More information about the Mercurial-devel
mailing list