[PATCH] Implement persistent tag caching

Greg Ward greg-hg at gerg.ca
Tue May 12 08:53:09 CDT 2009


# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1242136298 0
# Node ID dfa1e342c02d5ec5171865d9e64462836585fa8a
# Parent  1f0f01bc86a505ce227d6778af38bc67cff7e85e
Implement persistent tag caching.

The implementation is fairly straightforward: after iterating over
heads, save the global tags in .hg/tags.cache.  Do not cache local
tags or 'tip', because they are much cheaper to read and would greatly
complicate cache invalidation.  Discard the cache on every commit that
touches .hgtags.

This is a rough draft, posted for review and feedback rather than for
immediate push to crew.  Known flaws:
  - only invalidates the cache on commit (i.e. does nothing about
    pulled/pushed changes to .hgtags)
  - the tag cache is a pickle, which seems un-Mercurial to me
  - makes localrepository, a large and complex class, even more
    large and complex
  - invalidating the entire cache every time someone runs "hg tag x"
    is a bit harsh: is it worth a special case to update the
    cache in that case?
  - lots of debug output

I'm not sure how to invalidate the cache on push/pull.  Do I have to
make localrepository use a 'changegroup' hook?  Tips welcome.

IMHO the right format for tags.cache is the same as .hgtags and
localtags, but doing that means some refactoring of readtags() in
localrepo.tags().

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -5,6 +5,8 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2, incorporated herein by reference.
 
+import cPickle                          # for tag cache (for now)
+
 from node import bin, hex, nullid, nullrev, short
 from i18n import _
 import repo, changegroup
@@ -234,7 +236,10 @@
 
     def tags(self):
         '''return a mapping of tag to node'''
+        self.ui.debug("self.tagscache (memory) = %r\n" % self.tagscache)
         if self.tagscache:
+            # The in-memory tag cache contains everything: global tags,
+            # local tags, tip.  So if we have it, use it.
             return self.tagscache
 
         globaltags = {}
@@ -291,13 +296,22 @@
                 globaltags[k] = an, ah
                 tagtypes[k] = tagtype
 
-        # read the tags file from each head, ending with the tip
-        f = None
-        for rev, node, fnode in self._hgtagsnodes():
-            f = (f and f.filectx(fnode) or
-                 self.filectx('.hgtags', fileid=fnode))
-            readtags(f.data().splitlines(), f, "global")
+        # Try to read the persistent (on-disk) cache.  It only contains
+        # global tags, since avoiding the iteration over heads is where
+        # the performance win comes.
+        cached = self._readtagcache(globaltags, tagtypes)
+        if not cached:
+            # Read the tags file from each head, ending with the tip.
+            f = None
+            self.ui.debug("calling _hgtagsnodes()...\n")
+            for rev, node, fnode in self._hgtagsnodes():
+                self.ui.debug("  got rev = %r\n" % rev)
+                f = (f and f.filectx(fnode) or
+                     self.filectx('.hgtags', fileid=fnode))
+                readtags(f.data().splitlines(), f, "global")
+            self._writetagcache(globaltags)
 
+        self.ui.debug("reading local tags\n")
         try:
             data = encoding.fromlocal(self.opener("localtags").read())
             # localtags are stored in the local character set
@@ -306,6 +320,7 @@
         except IOError:
             pass
 
+        # Create the in-memory tag cache.
         self.tagscache = {}
         self._tagstypecache = {}
         for k, nh in globaltags.iteritems():
@@ -316,6 +331,35 @@
         self.tagscache['tip'] = self.changelog.tip()
         return self.tagscache
 
+    def _readtagcache(self, globaltags, tagtype):
+        try:
+            self.ui.debug("reading tags.cache\n")
+            cachefile = self.opener("tags.cache", "rb")
+            cache = cPickle.load(cachefile)
+            cachefile.close()
+
+            self.ui.debug("cached global tag: %r\n" % cache)
+            globaltags.update(cache)
+            for tag in cache:
+                tagtype[tag] = 'global'
+
+            return True
+        except (IOError, OSError), err:
+            if err.errno != errno.ENOENT:
+                self.ui.warn(_("error reading tags.cache: %s\n") % err)
+            return False
+
+    def _writetagcache(self, globaltags):
+        # Save the global tags to the persistent cache.
+        try:
+            self.ui.debug("writing tags.cache\n")
+            cachefile = self.opener("tags.cache", "wb")
+            cPickle.dump(globaltags, cachefile, protocol=2)
+        except (IOError, OSError), err:
+            self.ui.warn("unable to write %s: %s\n" %
+                         (self.join("tags.cache")), err)
+        cachefile.close()
+
     def tagtype(self, tagname):
         '''
         return the type of the given tag. result can be:
@@ -332,7 +376,10 @@
     def _hgtagsnodes(self):
         last = {}
         ret = []
-        for node in reversed(self.heads()):
+        heads = reversed(self.heads())
+        self.ui.debug("_hgtagsnodes(): iterating over %d heads\n"
+                      % (len(heads),))
+        for node in heads:
             c = self[node]
             rev = c.rev()
             try:
@@ -891,8 +938,11 @@
             new = {}
             changed = []
             linkrev = len(self)
+            touchtags = False           # does this commit modify .hgtags?
             for f in commit:
                 self.ui.note(f + "\n")
+                if f == ".hgtags":
+                    touchtags = True
                 try:
                     fctx = wctx.filectx(f)
                     newflags = fctx.flags()
@@ -975,6 +1025,11 @@
             p = lambda: self.changelog.writepending() and self.root or ""
             self.hook('pretxncommit', throw=True, node=hex(n), parent1=xp1,
                       parent2=xp2, pending=p)
+            if touchtags:
+                tagcache = self.join("tags.cache")
+                if os.path.exists(tagcache):
+                    self.ui.debug("commit touches .hgtags: deleting %s\n" % tagcache)
+                    os.unlink(tagcache)
             self.changelog.finalize(trp)
             tr.close()
 
diff --git a/tests/test-tags.out b/tests/test-tags.out
--- a/tests/test-tags.out
+++ b/tests/test-tags.out
@@ -30,9 +30,6 @@
 tip                                8:4ca6f1b1a68c
 first                              0:0acdaf898367
 changeset:   8:4ca6f1b1a68c
-.hgtags at c071f74ab5eb, line 2: cannot parse entry
-.hgtags at c071f74ab5eb, line 4: node 'foo' is not well formed
-.hgtags at 4ca6f1b1a68c, line 2: node 'x' is not well formed
 localtags, line 1: tag 'invalid' refers to unknown node
 tag:         tip
 parent:      3:b2ef3841386b


More information about the Mercurial-devel mailing list