[PATCH 2 of 2] revlog: add Mercurial config variable for limiting delta-chain length

Mateusz Kwapich mitrandir at fb.com
Mon Nov 10 13:27:41 CST 2014


# HG changeset patch
# User Mateusz Kwapich <mitrandir at fb.com>
# Date 1415312405 28800
#      Thu Nov 06 14:20:05 2014 -0800
# Node ID 0c2718661ea13e8054ab0d336cb5784b85991a5a
# Parent  79ae6c4132b5c582ea7dbd1aa4af8e2bcd2f5973
revlog: add Mercurial config variable for limiting delta-chain length

The current heuristic for deciding between storing delta and full texts
is based on ratio of (sizeofdeltas)/(sizeoffulltext).

In some cases (for example for mercurial Manifest for huge repo) this approach
can result in extremely long delta chains (~30,000) which are very slow to
read. (In case of Manifest ~500ms are added to every hg command because of that).

This commit introduces "revlog.maxchainlength" configuration variable that will
limit delta chain length.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -316,6 +316,9 @@
         chunkcachesize = self.ui.configint('format', 'chunkcachesize')
         if chunkcachesize is not None:
             self.sopener.options['chunkcachesize'] = chunkcachesize
+        maxchainlen = self.ui.configint('revlog', 'maxchainlen')
+        if maxchainlen is not None:
+            self.sopener.options['maxchainlen'] = maxchainlen
 
     def _writerequirements(self):
         reqfile = self.opener("requires", "w")
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -204,6 +204,7 @@
         self._basecache = None
         self._chunkcache = (0, '')
         self._chunkcachesize = 65536
+        self._maxchainlen = None
         self.index = []
         self._pcache = {}
         self._nodecache = {nullid: nullrev}
@@ -219,6 +220,8 @@
                 v = 0
             if 'chunkcachesize' in opts:
                 self._chunkcachesize = opts['chunkcachesize']
+            if 'maxchainlen' in opts:
+                self._maxchainlen = opts['maxchainlen']
 
         if self._chunkcachesize <= 0:
             raise RevlogError(_('revlog chunk cache size %r is not greater '
@@ -1216,11 +1219,13 @@
                 base = rev
             else:
                 base = chainbase
-            return dist, l, data, base, chainbase
+            chainlen = self.chainlen(rev) + 1
+            return dist, l, data, base, chainbase, chainlen
 
         curr = len(self)
         prev = curr - 1
         base = chainbase = curr
+        chainlen = None
         offset = self.end(prev)
         flags = 0
         d = None
@@ -1240,7 +1245,7 @@
                     d = builddelta(prev)
             else:
                 d = builddelta(prev)
-            dist, l, data, base, chainbase = d
+            dist, l, data, base, chainbase, chainlen = d
 
         # full versions are inserted when the needed deltas
         # become comparable to the uncompressed text
@@ -1249,7 +1254,8 @@
                                         cachedelta[1])
         else:
             textlen = len(text)
-        if d is None or dist > textlen * 2:
+        if (d is None or dist > textlen * 2 or
+            self._maxchainlen and chainlen > self._maxchainlen):
             text = buildtext()
             data = self.compress(text)
             l = len(data[1]) + len(data[0])
diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t
--- a/tests/test-debugcommands.t
+++ b/tests/test-debugcommands.t
@@ -24,6 +24,40 @@
   full revision size (min/max/avg)     : 44 / 44 / 44
   delta size (min/max/avg)             : 0 / 0 / 0
 
+Test max chain len
+  $ cat >> $HGRCPATH << EOF
+  > [revlog]
+  > maxchainlen=4
+  > EOF
+
+  $ echo "This test checks if maxchainlen config value is respected also it can serve as basic test for debugrevlog -d <file>.\n" >> a
+  $ hg ci -m a
+  $ echo "b\n" >> a
+  $ hg ci -m a
+  $ echo "c\n" >> a
+  $ hg ci -m a
+  $ echo "d\n" >> a
+  $ hg ci -m a
+  $ echo "e\n" >> a
+  $ hg ci -m a
+  $ echo "f\n" >> a
+  $ hg ci -m a
+  $ echo 'g\n' >> a
+  $ hg ci -m a
+  $ echo 'h\n' >> a
+  $ hg ci -m a
+  $ hg debugrevlog -d a
+  # rev p1rev p2rev start   end deltastart base   p1   p2 rawsize totalsize compression heads chainlen
+      0    -1    -1     0   ???          0    0    0    0     ???      ????           ?     1        0 (glob)
+      1     0    -1   ???   ???          0    0    0    0     ???      ????           ?     1        1 (glob)
+      2     1    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        2 (glob)
+      3     2    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        3 (glob)
+      4     3    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        4 (glob)
+      5     4    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        0 (glob)
+      6     5    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        1 (glob)
+      7     6    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        2 (glob)
+      8     7    -1   ???   ???        ???  ???  ???    0     ???      ????           ?     1        3 (glob)
+  $ cd ..
 
 Test internal debugstacktrace command
 


More information about the Mercurial-devel mailing list