[PATCH V2] sparse-read: ignore trailing empty revs in each read chunk
Paul Morelle
paul.morelle at octobus.net
Thu Oct 19 08:53:31 UTC 2017
# HG changeset patch
# User Paul Morelle <paul.morelle at octobus.net>
# Date 1508333299 -7200
# Wed Oct 18 15:28:19 2017 +0200
# Node ID ef3d9978b7daf5c2152f624b10fffb13425b06db
# Parent fb2574bd73a9c0d9a7a88407b20fdabc9213bc20
# EXP-Topic optimized-read
# Available At https://bitbucket.org/octobus/mercurial-devel/
# hg pull https://bitbucket.org/octobus/mercurial-devel/ -r ef3d9978b7da
sparse-read: ignore trailing empty revs in each read chunk
An empty entry in the revlog may happen for two reasons:
- when the file is empty, and the revlog stores a snapshot;
- when there is a merge and both parents were identical.
`hg debugindex -m | awk '$3=="0"{print}' | wc -l` gives 1917 of such entries
in my clone of pypy, and 113 on my clone of mercurial.
These empty revision may be located at the end of a sparse chain, and in some
special cases may lead to read relatively large amounts of data for nothing.
diff -r fb2574bd73a9 -r ef3d9978b7da mercurial/revlog.py
--- a/mercurial/revlog.py Wed Oct 18 09:07:48 2017 +0200
+++ b/mercurial/revlog.py Wed Oct 18 15:28:19 2017 +0200
@@ -162,6 +162,20 @@
s.update(text)
return s.digest()
+def _trimchunk(revlog, revs, startidx, endidx=None):
+ """returns revs[startidx:endidx] without empty trailing revs
+ """
+ length = revlog.length
+
+ if endidx is None:
+ endidx = len(revs)
+
+ # Trim empty revs at the end, but never the very first revision of a chain
+ while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
+ endidx -= 1
+
+ return revs[startidx:endidx]
+
def _slicechunk(revlog, revs):
"""slice revs to reduce the amount of unrelated data to be read from disk.
@@ -194,6 +208,10 @@
revstart = start(rev)
revlen = length(rev)
+ # Skip empty revisions to form larger holes
+ if revlen == 0:
+ continue
+
if prevend is not None:
gapsize = revstart - prevend
# only consider holes that are large enough
@@ -222,9 +240,16 @@
previdx = 0
while indicesheap:
idx = heapq.heappop(indicesheap)
- yield revs[previdx:idx]
+
+ chunk = _trimchunk(revlog, revs, previdx, idx)
+ if chunk:
+ yield chunk
+
previdx = idx
- yield revs[previdx:]
+
+ chunk = _trimchunk(revlog, revs, previdx)
+ if chunk:
+ yield chunk
# index v0:
# 4 bytes: offset
More information about the Mercurial-devel
mailing list