[PATCH] revlog: cache chain info after calculating it for a rev (issue4452)

Siddharth Agarwal sid0 at fb.com
Thu Nov 13 23:38:01 CST 2014


# HG changeset patch
# User Siddharth Agarwal <sid0 at fb.com>
# Date 1415943398 28800
#      Thu Nov 13 21:36:38 2014 -0800
# Node ID 63ddc568071bdb7818a31d536941ec9b9a7490b5
# Parent  1e93f17982d3c8ebdc81867756540e52883c9ad4
revlog: cache chain info after calculating it for a rev (issue4452)

This dumb cache works surprisingly well: on a repository with typical delta
chains ~50k in length, unbundling a linear series of 5000 revisions (changelogs
and manifests only) went from 60 seconds to 3.

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -270,6 +270,7 @@
             self.nodemap = self._nodecache = nodemap
         if not self._chunkcache:
             self._chunkclear()
+        self._chaininfocache = {}
 
     def tip(self):
         return self.node(len(self.index) - 2)
@@ -355,7 +356,11 @@
         return base
     def chainlen(self, rev):
         return self._chaininfo(rev)[0]
+
     def _chaininfo(self, rev):
+        chaininfocache = self._chaininfocache
+        if rev in chaininfocache:
+            return chaininfocache[rev]
         index = self.index
         generaldelta = self._generaldelta
         iterrev = rev
@@ -369,10 +374,20 @@
                 iterrev = e[3]
             else:
                 iterrev -= 1
+            if iterrev in chaininfocache:
+                t = chaininfocache[iterrev]
+                clen += t[0]
+                compresseddeltalen += t[1]
+                break
             e = index[iterrev]
-        # add text length of base since decompressing that also takes work
-        compresseddeltalen += e[1]
-        return clen, compresseddeltalen
+        else:
+            # Add text length of base since decompressing that also takes
+            # work. For cache hits the length is already included.
+            compresseddeltalen += e[1]
+        r = (clen, compresseddeltalen)
+        chaininfocache[rev] = r
+        return r
+
     def flags(self, rev):
         return self.index[rev][0] & 0xFFFF
     def rawsize(self, rev):
@@ -1453,6 +1468,7 @@
 
         # then reset internal state in memory to forget those revisions
         self._cache = None
+        self._chaininfocache = {}
         self._chunkclear()
         for x in xrange(rev, len(self)):
             del self.nodemap[self.node(x)]


More information about the Mercurial-devel mailing list