[PATCH 1 of 2 v2] localrepo: persistent caching of branch names

Mads Kiilerich mads at kiilerich.com
Wed Oct 15 13:14:22 CDT 2014


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1413396806 -7200
#      Wed Oct 15 20:13:26 2014 +0200
# Node ID bf7a0169677c0545a63e64690b0e49e50b376703
# Parent  48c0b101a9de1fdbd638daa858da845cd05a6be7
localrepo: persistent caching of branch names

It is expensive to retrieve the branch name. Very expensive when creating a
changectx and calling .branch() - slightly less when using
changelog.branchinfo().

Now, to really speed things up, cache the results on disk. For each repo
revision store the node hash and a reference to the branch name. To avoid using
too much space, each branch name is only stored once. To make it 100% stable
against repository mutations, always check the node hash before using the
cache content.

This change promise to speed some operations up 4-6 times when it actually is
used.

A simpler approach that didn't store and validate node hashes for every
revision had a 20 x speedup but could be tricked when modifying history. It
would usually reset the cache, but when trying very hard it could be tricked
into not noticing changes.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -6,7 +6,7 @@
 # GNU General Public License version 2 or any later version.
 from node import hex, nullid, short
 from i18n import _
-import urllib
+import urllib, struct, array
 import peer, changegroup, subrepo, pushkey, obsolete, repoview
 import changelog, dirstate, filelog, manifest, context, bookmarks, phases
 import lock as lockmod
@@ -21,6 +21,14 @@ import branchmap, pathutil
 propertycache = util.propertycache
 filecache = scmutil.filecache
 
+# branch name caching
+bcfilename = 'cache/branchnames'
+bcversion = 2345164374
+bcheadfmt = '>LLL'
+bcheadsize = struct.calcsize(bcheadfmt)
+bcrecfmt = '>20sH'
+bcrecsize = struct.calcsize(bcrecfmt)
+
 class repofilecache(filecache):
     """All filecache usage on repo are done for logic that should be unfiltered
     """
@@ -179,6 +187,7 @@ class localrepository(object):
     openerreqs = set(('revlogv1', 'generaldelta'))
     requirements = ['revlogv1']
     filtername = None
+    _branchcachedirty = None
 
     # a list of (ui, featureset) functions.
     # only functions defined in module of enabled extensions are invoked
@@ -298,7 +307,7 @@ class localrepository(object):
         self.filteredrevcache = {}
 
     def close(self):
-        pass
+        self._branchcachesave()
 
     def _restrictcapabilities(self, caps):
         # bundle2 is not ready for prime time, drop it unless explicitly
@@ -723,6 +732,74 @@ class localrepository(object):
         repo = (remote and remote.local()) and remote or self
         return repo[key].branch()
 
+    def _branchcacheload(self):
+        """Load cached branch values."""
+        try:
+            data = self.vfs.open(bcfilename).read()
+        except IOError:
+            data = ''
+
+        self._branches = []
+        self._branchrecs = array.array('c') # bytes of struct type bcrecfmt
+        self.__dict__['_branchcachedirty'] = True
+        reporecslen = len(self) * bcrecsize
+        if len(data) >= bcheadsize:
+            v, recsstart, recslen = struct.unpack_from(bcheadfmt, data)
+            if v == bcversion and len(data) == recsstart + recslen:
+                if recsstart:
+                    self._branches = \
+                        data[bcheadsize:recsstart].split('\0')
+                self._branchrecs.fromstring(
+                    buffer(data, recsstart, min(recslen, reporecslen)))
+                self.__dict__['_branchcachedirty'] = recslen > reporecslen
+            else:
+                self.ui.debug('branch cache file was invalid\n')
+
+        if len(self._branchrecs) < reporecslen:
+            self._branchrecs.extend(
+                '\xff' * (reporecslen - len(self._branchrecs)))
+
+        self._branchnamesindex = dict((b, r)
+                                      for r, b in enumerate(self._branches))
+
+    def branch(self, rev):
+        """return branch name of rev, using and updating persistent cache."""
+        if self._branchcachedirty is None:
+            self._branchcacheload()
+
+        node = self.changelog.node(rev)
+        cachenode, branchidx = struct.unpack_from(bcrecfmt, self._branchrecs,
+                                                  rev * bcrecsize)
+        if cachenode == node and branchidx < len(self._branches):
+            return self._branches[branchidx]
+        b, _close = self.changelog.branchinfo(rev)
+        if b in self._branchnamesindex:
+            branchidx = self._branchnamesindex[b]
+        else:
+            branchidx = len(self._branches)
+            self._branches.append(b)
+            self._branchnamesindex[b] = branchidx
+        struct.pack_into(bcrecfmt, self._branchrecs, rev * bcrecsize,
+                         node, branchidx)
+        self.__dict__['_branchcachedirty'] = True
+        return b
+
+    def _branchcachesave(self):
+        """save branch cache if it is dirty"""
+        if self._branchcachedirty:
+            self.ui.debug('writing branch cache file\n')
+            try:
+                f = self.vfs.open(bcfilename, 'w', atomictemp=True)
+                s = '\0'.join(self._branches)
+                f.write(struct.pack(bcheadfmt, bcversion,
+                                    bcheadsize + len(s), len(self._branchrecs)))
+                f.write(s)
+                f.write(self._branchrecs)
+                f.close()
+            except IOError:
+                pass
+            self.__dict__['_branchcachedirty'] = False
+
     def known(self, nodes):
         nm = self.changelog.nodemap
         pc = self._phasecache


More information about the Mercurial-devel mailing list