[PATCH 7 of 7] localrepo: implement persistent tag caching

Greg Ward greg at gerg.ca
Fri Jul 3 09:48:31 CDT 2009


# HG changeset patch
# User Greg Ward <greg at gerg.ca>
# Date 1246632310 14400
# Node ID ab5c492c2cd6e878c1cbcccad0a043d00a9413b6
# Parent  28f7005c067ff1380c262283c71ba5327ee29614
localrepo: implement persistent tag caching

- factor out tagcache class with methods readcache() and writecache()
- the expensive part of tag finding (iterate over heads and find
  .hgtags filenode) is now in tagcache.readcache()

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -280,27 +280,37 @@
                 alltags[name] = anode, ahist
                 tagtypes[name] = tagtype
 
+        cache = tagcache(self.ui, self)
+
+        self.ui.debug("getting list of heads...\n")
+        heads = self.heads()
+        heads.reverse()                 # oldest to newest
+        self.ui.debug("checking tag cache...\n")
+        (staleheads, cachetags) = cache.readcache(heads)
+        updatetags(cachetags, 'global')
+
+        self.ui.debug("iterating over %d stale heads...\n" % len(staleheads))
         seen = set()
         fctx = None
-        ctxs = []                       # list of filectx
-        for node in self.heads():
-            try:
-                fnode = self[node].filenode('.hgtags')
-            except error.LookupError:
-                continue
+        for (node, fnode) in staleheads:
+            assert fnode is not None
             if fnode not in seen:
                 seen.add(fnode)
                 if not fctx:
                     fctx = self.filectx('.hgtags', fileid=fnode)
                 else:
                     fctx = fctx.filectx(fnode)
-                ctxs.append(fctx)
 
-        # read the tags file from each head, ending with the tip
-        for fctx in reversed(ctxs):
-            filetags = self._readtags(fctx.data().splitlines(), fctx)
-            updatetags(filetags, "global")
+                filetags = self._readtags(fctx.data().splitlines(), fctx)
+                updatetags(filetags, "global")
 
+        self.ui.debug("found %d global tags\n" % len(alltags))
+
+        # Update the cache with tag info read from stale heads.
+        cache.writecache(alltags)
+
+        # this is cheap: no need to cache it
+        self.ui.debug("reading localtags...\n")
         try:
             data = encoding.fromlocal(self.opener("localtags").read())
             # localtags are stored in the local character set
@@ -312,12 +322,15 @@
 
         self._tags = {}
         self._tagtypes = {}
+        #for (name, (node, _)) in alltags.iteritems():
         for name, nodehist in alltags.iteritems():
             node = nodehist[0]
             if node != nullid:
                 self._tags[name] = node
             self._tagtypes[name] = tagtypes[name]
         self._tags['tip'] = self.changelog.tip()
+
+        self.ui.debug("_findtags() all done\n")
         return self._tags
 
     def _readtags(self, lines, fn):
@@ -2207,6 +2220,151 @@
             return self.stream_in(remote)
         return self.pull(remote, heads)
 
+class tagcache(object):
+    """Object for managing persistent tag cache.  Only needs to live for
+    as long as it takes to open, read, update, and write the cache."""
+
+    __slots__ = ['ui',
+                 'repo',
+                 'allheads',
+                ]
+
+    def __init__(self, ui, repo):
+        self.ui = ui
+        self.repo = repo
+
+        self.allheads = None            # list of (headrev, headnode, tagnode)
+
+    def readcache(self, heads):
+        """Read the tags.cache file and check if it is up-to-date.  Return
+        a tuple (staleheads, cachetags).  staleheads is a list of the
+        heads for which we must re-read .hgtags as (headnode, tagnode)
+        tuples, where tagnode is the filenode of .hgtags on that head.
+        cachetags is the map of cached tags in the same format as
+        returned by repo._readtags()."""
+
+        # Open the cache file or bail out early.
+        try:
+            cachefile = self.repo.opener("tags.cache", "rt")
+        except IOError:
+            # No cache file yet: total cache miss.  All heads with
+            # .hgtags are considered stale.  (This loop is the most
+            # expensive part of the whole tag-reading process, and the
+            # one we try hardest to avoid.)
+            self.ui.debug("iterating over %d heads for .hgtags fnodes...\n"
+                          % len(heads))
+            self.allheads = []
+            staleheads = []
+            for head in heads:
+                cctx = self.repo[head]
+                try:
+                    fnode = cctx.filenode('.hgtags')
+                    staleheads.append((head, fnode))
+                except error.LookupError:
+                    fnode = None
+                self.allheads.append((cctx.rev(), head, fnode))
+                    
+            return (staleheads, {})
+            
+        # Read the cache up to the delimiting blank line.
+        self.allheads = []
+        cacheheads = []                 # list of head nodes
+        for line in cachefile:
+            line = line.strip()
+            if not line:
+                break
+
+            line = line.split()
+            headrev = int(line[0])
+            headbin = bin(line[1])
+            cacheheads.append(headbin)
+            if len(line) == 2:
+                fnode = None
+            elif len(line) == 3:
+                fnode = bin(line[1])
+            self.allheads.append((headrev, headbin, fnode))
+
+        # And read the rest of the file: we're going to need it sooner
+        # or later.
+        cachetags = self.repo._readtags(cachefile, cachefile.name)
+        cachefile.close()
+
+        # Check if cached heads == current heads.  If so, that means no
+        # new changesets have entered the repo since the cache was
+        # written, so the cache is completely up-to-date.
+        if cacheheads == heads:
+            self.ui.debug("good news: tagcache up-to-date\n")
+            return ([], cachetags)
+
+        # Tag cache is stale: find new heads added since the cache was
+        # written.  (Heads that have disappeared are not interesting:
+        # that just means they have been merged, and we'll find their
+        # tag information in the new head created from the merge.)
+        newheads = [head for head in heads if head not in set(cacheheads)]
+        assert newheads, "cache heads != current heads, but no new heads found"
+        self.ui.debug("tag cache is stale: found %d new heads (%s)\n"
+                      % (len(newheads), ", ".join(map(short, newheads))))
+
+        # Not all new heads actually have new tag info.  Visit each new
+        # head, checking if the .hgtags version there is a version we've
+        # ever seen before.  If not, that's a stale head and we'll have
+        # to reread its .hgtags.
+        staleheads = []
+        fnodes = set([fnode for (_, _, fnode) in self.allheads
+                      if fnode is not None])
+        for head in newheads:
+            cctx = self.repo[head]
+            try:
+                newfnode = cctx.filenode('.hgtags')
+            except error.LookupError:
+                newfnode = None
+            self.allheads.append((cctx.rev(), head, newfnode))
+            if newfnode and newfnode not in fnodes:
+                # XXX what happens if user removes .hgtags from some
+                # head? does that make filenode() raise LookupError?  if
+                # so, this might be the killer case that means cached
+                # tags have to store more info than each .hgtags (ie. we
+                # have to persist the alltags dict)
+                staleheads.append((head, newfnode))
+
+        self.allheads.sort()            # order by rev, oldest to tip
+        self.ui.debug("tagcache: found %d stale heads (%s)\n"
+                      % (len(staleheads), ", ".join([short(sh) for (sh, fn) in staleheads])))
+        return (staleheads, cachetags)
+
+    def writecache(self, tags):
+        """Write the tags.cache file.  tags is the mapping of all global
+        tags, whether read from the cache or from .hgtags files on stale
+        heads, in the format returned by repo.readtags()."""
+
+        assert self.allheads is not None, \
+               "readcache() must be called before writecache()"
+
+        # The cache file has two sections, delimited by a blank line.
+        #
+        # Section 1 is the list of heads:
+        #   <headrev> <headnode> [<tagnode>]
+        # where <headnode> and <headrev> redundantly identify the head,
+        # and <tagnode> is the filenode of .hgtags at that head.  Heads
+        # with no .hgtags file are still listed, but with no <tagnode>.
+        #
+        # Section 2 is the list of tags in the same format as .hgtags
+        # itself:
+        #   <node> <name>
+        cachefile = self.repo.opener("tags.cache", "wt")
+        self.ui.debug("writing %s\n" % cachefile.name)
+        for (rev, node, fnode) in self.allheads:
+            if fnode is None:
+                cachefile.write("%d %s\n" % (rev, hex(node)))
+            else:
+                cachefile.write("%d %s %s\n" % (rev, hex(node), hex(fnode)))
+        cachefile.write("\n")
+
+        for (name, (node, _)) in tags.iteritems():
+            cachefile.write("%s %s\n" % (hex(node), name))
+
+        cachefile.close()
+
 # used to avoid circular references so destructors work
 def aftertrans(files):
     renamefiles = [tuple(t) for t in files]


More information about the Mercurial-devel mailing list