[PATCH 11 of 13] revlog: add function to slice chunk down to a given size
Boris Feld
boris.feld at octobus.net
Tue Jul 10 09:27:20 EDT 2018
# HG changeset patch
# User Boris Feld <boris.feld at octobus.net>
# Date 1531216590 -7200
# Tue Jul 10 11:56:30 2018 +0200
# Node ID 068687c4911133c6f9d3f5c103634f7648305cc8
# Parent bdc253d8f2cfdbe1a8977a2b330a3950b108b036
# EXP-Topic write-for-sparse-read
# Available At https://bitbucket.org/octobus/mercurial-devel/
# hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 068687c49111
revlog: add function to slice chunk down to a given size
It is possible to encounter situations where the slicing based on density did
not achieve chunk smaller than the 4*textlength limit. To avoid extra memory
consumption in those cases, we need to be able to break down chunk to a given
size. Actual caller comes in the next changesets.
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -338,6 +338,83 @@ def _slicechunk(revlog, revs):
revlog._srmingapsize):
yield chunk
+def _slicechunktosize(revlog, revs, targetsize):
+ """slice revs to match the target size
+
+ This is intended to be used on chunk that density slicing selected by that
+ are still too large compared to the read garantee of revlog. This might
+ happens when "minimal gap size" interrupted the slicing or when chain are
+ built in a way that create large blocks next to each other.
+
+ >>> revlog = _testrevlog([
+ ... 3, #0 (3)
+ ... 5, #1 (2)
+ ... 6, #2 (1)
+ ... 8, #3 (2)
+ ... 8, #4 (empty)
+ ... 11, #5 (3)
+ ... 12, #6 (1)
+ ... 13, #7 (1)
+ ... 14, #8 (1)
+ ... ])
+
+ Cases where chunk is already small enough
+ >>> list(_slicechunktosize(revlog, [0], 3))
+ [[0]]
+ >>> list(_slicechunktosize(revlog, [6, 7], 3))
+ [[6, 7]]
+ >>> list(_slicechunktosize(revlog, [0], None))
+ [[0]]
+ >>> list(_slicechunktosize(revlog, [6, 7], None))
+ [[6, 7]]
+
+ cases where we need actual slicing
+ >>> list(_slicechunktosize(revlog, [0, 1], 3))
+ [[0], [1]]
+ >>> list(_slicechunktosize(revlog, [1, 3], 3))
+ [[1], [3]]
+ >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
+ [[1, 2], [3]]
+ >>> list(_slicechunktosize(revlog, [3, 5], 3))
+ [[3], [5]]
+ >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
+ [[3], [5]]
+ >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
+ [[5], [6, 7, 8]]
+ >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
+ [[0], [1, 2], [3], [5], [6, 7, 8]]
+
+ Case with too large individual chunk (must return valid chunk)
+ >>> list(_slicechunktosize(revlog, [0, 1], 2))
+ [[0], [1]]
+ >>> list(_slicechunktosize(revlog, [1, 3], 1))
+ [[1], [3]]
+ >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
+ [[3], [5]]
+ """
+ assert targetsize is None or 0 <= targetsize
+ if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
+ yield revs
+ return
+
+ startrevidx = 0
+ startdata = revlog.start(revs[0])
+ endrevidx = 0
+ iterrevs = enumerate(revs)
+ next(iterrevs) # skip first rev
+ for idx, r in iterrevs:
+ span = revlog.end(r) - startdata
+ if span <= targetsize:
+ endrevidx = idx
+ else:
+ chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
+ if chunk:
+ yield chunk
+ startrevidx = idx
+ startdata = revlog.start(r)
+ endrevidx = idx
+ yield _trimchunk(revlog, revs, startrevidx)
+
def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
"""slice revs to reduce the amount of unrelated data to be read from disk.
More information about the Mercurial-devel
mailing list