[PATCH 3 of 5] revlog: support using an existing file handle when reading revlogs
Gregory Szorc
gregory.szorc at gmail.com
Sun Sep 27 22:32:12 CDT 2015
# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1443394115 25200
# Sun Sep 27 15:48:35 2015 -0700
# Node ID bfcc837cb4817e3e94d95f936e3631acded55c6c
# Parent fc537af05691454c63891f986d7f77cebd9c2068
revlog: support using an existing file handle when reading revlogs
Currently, the low-level revlog reading code always opens a new file
handle. In some key scenarios, the revlog is already opened and an
existing file handle could be used to read. This patch paves the
road to that by teaching various revlog reading functions to accept
an optional existing file handle to read from.
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -931,13 +931,25 @@ class revlog(object):
self._chunkcache = o, d + data
else:
self._chunkcache = offset, data
- def _loadchunk(self, offset, length):
- if self._inline:
- df = self.opener(self.indexfile)
+ def _loadchunk(self, offset, length, df=None):
+ """Load a chunk/segment from the revlog.
+
+ Accepts absolute offset, length to read, and an optional existing
+ file handle to read from.
+
+ If an existing file handle is passed, it will be seeked and the
+ original seek position will NOT be restored.
+ """
+ if df:
+ closehandle = False
else:
- df = self.opener(self.datafile)
+ if self._inline:
+ df = self.opener(self.indexfile)
+ else:
+ df = self.opener(self.datafile)
+ closehandle = True
# Cache data both forward and backward around the requested
# data, in a fixed size window. This helps speed up operations
# involving reading the revlog backwards.
@@ -946,15 +958,16 @@ class revlog(object):
reallength = (((offset + length + cachesize) & ~(cachesize - 1))
- realoffset)
df.seek(realoffset)
d = df.read(reallength)
- df.close()
+ if closehandle:
+ df.close()
self._addchunk(realoffset, d)
if offset != realoffset or reallength != length:
return util.buffer(d, offset - realoffset, length)
return d
- def _getchunk(self, offset, length):
+ def _getchunk(self, offset, length, df=None):
o, d = self._chunkcache
l = len(d)
# is it in the cache?
@@ -964,23 +977,23 @@ class revlog(object):
if cachestart == 0 and cacheend == l:
return d # avoid a copy
return util.buffer(d, cachestart, cacheend - cachestart)
- return self._loadchunk(offset, length)
+ return self._loadchunk(offset, length, df=df)
- def _chunkraw(self, startrev, endrev):
+ def _chunkraw(self, startrev, endrev, df=None):
start = self.start(startrev)
end = self.end(endrev)
if self._inline:
start += (startrev + 1) * self._io.size
end += (endrev + 1) * self._io.size
length = end - start
- return self._getchunk(start, length)
+ return self._getchunk(start, length, df=df)
- def _chunk(self, rev):
- return decompress(self._chunkraw(rev, rev))
+ def _chunk(self, rev, df=None):
+ return decompress(self._chunkraw(rev, rev, df=df))
- def _chunks(self, revs):
+ def _chunks(self, revs, df=None):
'''faster version of [self._chunk(rev) for rev in revs]
Assumes that revs is in ascending order.'''
if not revs:
@@ -998,16 +1011,16 @@ class revlog(object):
try:
while True:
# ensure that the cache doesn't change out from under us
_cache = self._chunkcache
- self._chunkraw(revs[0], revs[-1])
+ self._chunkraw(revs[0], revs[-1], df=df)
if _cache == self._chunkcache:
break
offset, data = _cache
except OverflowError:
# issue4215 - we can't cache a run of chunks greater than
# 2G on Windows
- return [self._chunk(rev) for rev in revs]
+ return [self._chunk(rev, df=df) for rev in revs]
for rev in revs:
chunkstart = start(rev)
if inline:
@@ -1037,11 +1050,14 @@ class revlog(object):
return mdiff.textdiff(self.revision(rev1),
self.revision(rev2))
- def revision(self, nodeorrev):
+ def revision(self, nodeorrev, _df=None):
"""return an uncompressed revision of a given node or revision
number.
+
+ _df is an existing file handle to read from. It is meant to only be
+ used internally.
"""
if isinstance(nodeorrev, int):
rev = nodeorrev
node = self.node(rev)
@@ -1090,9 +1106,9 @@ class revlog(object):
# drop cache to save memory
self._cache = None
- bins = self._chunks(chain)
+ bins = self._chunks(chain, df=_df)
if text is None:
text = str(bins[0])
bins = bins[1:]
More information about the Mercurial-devel
mailing list