[PATCH 18 of 19] snapshot: also consider the snapshot chain of one unrelated revision

Sat Sep 8 06:57:12 EDT 2018

# HG changeset patch
# User Boris Feld <boris.feld at octobus.net>
# Date 1536333525 14400
#      Fri Sep 07 11:18:45 2018 -0400
# Node ID f8b71b0b9d035171dae9162f87747ce02da4b086
# Parent  535a778ae14a756b207901db5b3c0e5a078ae6cd
# EXP-Topic sparse-snapshot
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r f8b71b0b9d03
snapshot: also consider the snapshot chain of one unrelated revision

To maximize the chance of good delta chain reuse, we inject an unrelated delta
chain into our search. To do so, we search for the highest revision unrelated
to the parents of the current revision and use its snapshot chain too.

Adding this extra snapshot into the mix can have a performance impact. We'll
deal with performance impact in a later series.

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -719,6 +719,36 @@ def _rawgroups(revlog, p1, p2, cachedelt
                 parents_snaps[idx].add(s)
         snapfloor = min(parents_snaps[0]) + 1
         _findsnapshots(revlog, snapshots, snapfloor)
+        # search for the highest "unrelated" revision
+        #
+        # Adding snapshots used by "unrelated" revision increase the odd we
+        # reuse an independant, yet better snapshot chain.
+        #
+        # XXX instead of building a set of revisions, we could lazily enumerate
+        # over the chains. That would be more efficient, however we stick to
+        # simple code for now.
+        all_revs = set()
+        for chain in candidate_chains:
+            all_revs.update(chain)
+        other = None
+        for r in revlog.revs(prev, snapfloor):
+            if r not in all_revs:
+                other = r
+                break
+        if other is not None:
+            # To avoid unfair competition, we won't use unrelated intermediate
+            # snapshot that are deeper than the ones from the parent delta
+            # chain.
+            max_depth = max(parents_snaps.keys())
+            chain = deltachain(other)
+            for idx, s in enumerate(chain):
+                if s < snapfloor:
+                    continue
+                if max_depth < idx:
+                    break
+                if not revlog.issnapshot(s):
+                    break
+                parents_snaps[idx].add(s)
         # Test them as possible intermediate snapshot base
         # We test them from highest to lowest level. High level one are more
         # likely to result in small delta
@@ -756,9 +786,10 @@ def _rawgroups(revlog, p1, p2, cachedelt
         # more and more snapshot as the repository grow.
         yield tuple(snapshots[nullrev])
 
-    # other approach failed try against prev to hopefully save us a
-    # fulltext.
-    yield (prev,)
+    if not sparse:
+        # other approach failed try against prev to hopefully save us a
+        # fulltext.
+        yield (prev,)
 
 class deltacomputer(object):
     def __init__(self, revlog):
diff --git a/tests/test-sparse-revlog.t b/tests/test-sparse-revlog.t
--- a/tests/test-sparse-revlog.t
+++ b/tests/test-sparse-revlog.t
@@ -77,7 +77,7 @@ repeatedly while some of it changes rare
   
 
   $ f -s .hg/store/data/*.d
-  .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=59302280
+  .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=59230936
   $ hg debugrevlog *
   format : 1
   flags  : generaldelta
@@ -89,45 +89,45 @@ repeatedly while some of it changes rare
       empty     :        0 ( 0.00%)
                      text  :        0 (100.00%)
                      delta :        0 (100.00%)
-      snapshot  :      168 ( 3.36%)
-        lvl-0   :              4 ( 0.08%)
-        lvl-1   :             18 ( 0.36%)
-        lvl-2   :             39 ( 0.78%)
-        lvl-3   :             54 ( 1.08%)
-        lvl-4   :             53 ( 1.06%)
-      deltas    :     4833 (96.64%)
-  revision size : 59302280
-      snapshot  :  5833942 ( 9.84%)
-        lvl-0   :         804068 ( 1.36%)
-        lvl-1   :        1378470 ( 2.32%)
-        lvl-2   :        1608138 ( 2.71%)
-        lvl-3   :        1222158 ( 2.06%)
-        lvl-4   :         821108 ( 1.38%)
-      deltas    : 53468338 (90.16%)
+      snapshot  :      176 ( 3.52%)
+        lvl-0   :              3 ( 0.06%)
+        lvl-1   :             17 ( 0.34%)
+        lvl-2   :             45 ( 0.90%)
+        lvl-3   :             56 ( 1.12%)
+        lvl-4   :             55 ( 1.10%)
+      deltas    :     4825 (96.48%)
+  revision size : 59230936
+      snapshot  :  5770371 ( 9.74%)
+        lvl-0   :         602962 ( 1.02%)
+        lvl-1   :        1534153 ( 2.59%)
+        lvl-2   :        1604445 ( 2.71%)
+        lvl-3   :        1218174 ( 2.06%)
+        lvl-4   :         810637 ( 1.37%)
+      deltas    : 53460565 (90.26%)
   
   chunks        :     5001
       0x78 (x)  :     5001 (100.00%)
-  chunks size   : 59302280
-      0x78 (x)  : 59302280 (100.00%)
+  chunks size   : 59230936
+      0x78 (x)  : 59230936 (100.00%)
   
   avg chain length  :       17
   max chain length  :       45
-  max chain reach   : 22744720
+  max chain reach   : 25326012
   compression ratio :       29
   
   uncompressed data size (min/max/avg) : 346468 / 346472 / 346471
-  full revision size (min/max/avg)     : 200985 / 201050 / 201017
-  inter-snapshot size (min/max/avg)    : 11598 / 163304 / 30669
-      level-1   (min/max/avg)          : 15616 / 163304 / 76581
-      level-2   (min/max/avg)          : 11602 / 86428 / 41234
-      level-3   (min/max/avg)          : 11598 / 42390 / 22632
-      level-4   (min/max/avg)          : 11603 / 19649 / 15492
-  delta size (min/max/avg)             : 10649 / 105465 / 11063
+  full revision size (min/max/avg)     : 200897 / 201050 / 200987
+  inter-snapshot size (min/max/avg)    : 11598 / 171990 / 29869
+      level-1   (min/max/avg)          : 14037 / 171990 / 90244
+      level-2   (min/max/avg)          : 11632 / 84456 / 35654
+      level-3   (min/max/avg)          : 11598 / 41486 / 21753
+      level-4   (min/max/avg)          : 11618 / 19913 / 14738
+  delta size (min/max/avg)             : 10649 / 105209 / 11079
   
-  deltas against prev  : 4167 (86.22%)
-      where prev = p1  : 4129     (99.09%)
+  deltas against prev  : 4156 (86.13%)
+      where prev = p1  : 4120     (99.13%)
       where prev = p2  :    0     ( 0.00%)
-      other            :   38     ( 0.91%)
-  deltas against p1    :  643 (13.30%)
+      other            :   36     ( 0.87%)
+  deltas against p1    :  646 (13.39%)
   deltas against p2    :   23 ( 0.48%)
   deltas against other :    0 ( 0.00%)