D1773: revlog: use named attributes on revlog index entries
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Wed Dec 27 00:36:27 UTC 2017
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
Now that we're using a dedicated type for index entries everywhere,
we can access fields in them by name instead of integer offset.
This change has a significant impact on performance on the Firefox
repository. (Values are from before refactor, parent commit, this
commit.)
$ HGMODULEPOLICY=c hg perfrevset '::tip'
! wall 0.672636 comb 0.670000 user 0.650000 sys 0.020000 (best of 15)
! wall 0.962372 comb 0.960000 user 0.940000 sys 0.020000 (best of 10)
! wall 0.774613 comb 0.780000 user 0.770000 sys 0.010000 (best of 13)
$ HGMODULEPOLICY=py hg perfrevset '::tip'
! wall 1.187288 comb 1.190000 user 1.170000 sys 0.020000 (best of 9)
! wall 1.667181 comb 1.670000 user 1.650000 sys 0.020000 (best of 6)
! wall 1.514946 comb 1.520000 user 1.490000 sys 0.030000 (best of 7)
And with PyPy:
! wall 0.192745 comb 0.180000 user 0.180000 sys 0.000000 (best of 47)
! wall 0.198590 comb 0.200000 user 0.190000 sys 0.010000 (best of 45)
! wall 0.207068 comb 0.220000 user 0.200000 sys 0.020000 (best of 43)
The C extension is ~15% slower than before the refactor. This is a
bit unfortunate. That is a bit steep price to pay for more readable
code and the establishment of a formal interface for index entries.
However, all is not lost! Because we're now using a custom type and
conforming to a yet-to-be-formally-defined interface, there's nothing
preventing us from implementing a backed-by-C custom type for revlog
entries. This type could lazily resolve PyObject instances, which
should result in a massive performance boost for operations that
don't need to access all fields of the index entry. This is easier
done *after* we drop all uses of tuples and their API for referring
to index entries.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D1773
AFFECTED FILES
mercurial/revlog.py
CHANGE DETAILS
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -568,7 +568,7 @@
if p is None:
p = len(i) - 2
for r in xrange(p, -1, -1):
- v = i[r][7]
+ v = i[r].node
n[v] = r
if v == node:
self._nodepos = r - 1
@@ -582,17 +582,17 @@
# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
# are flags.
def start(self, rev):
- return int(self.index[rev][0] >> 16)
+ return int(self.index[rev].offsetflags >> 16)
def flags(self, rev):
- return self.index[rev][0] & 0xFFFF
+ return self.index[rev].offsetflags & 0xFFFF
def length(self, rev):
- return self.index[rev][1]
+ return self.index[rev].chunklength
def rawsize(self, rev):
"""return the length of the uncompressed text for a given revision"""
- l = self.index[rev][2]
+ l = self.index[rev].rawlength
if l >= 0:
return l
@@ -615,16 +615,16 @@
return base
index = self.index
- base = index[rev][3]
+ base = index[rev].baserev
while base != rev:
rev = base
- base = index[rev][3]
+ base = index[rev].baserev
self._chainbasecache[rev] = base
return base
def linkrev(self, rev):
- return self.index[rev][4]
+ return self.index[rev].linkrev
def parentrevs(self, rev):
try:
@@ -634,11 +634,11 @@
raise error.WdirUnsupported
raise
- return entry[5], entry[6]
+ return entry.p1rev, entry.p2rev
def node(self, rev):
try:
- return self.index[rev][7]
+ return self.index[rev].node
except IndexError:
if rev == wdirrev:
raise error.WdirUnsupported
@@ -652,7 +652,7 @@
def parents(self, node):
i = self.index
d = i[self.rev(node)]
- return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
+ return i[d.p1rev].node, i[d.p2rev].node # map revisions to nodes inline
def chainlen(self, rev):
return self._chaininfo(rev)[0]
@@ -667,11 +667,11 @@
e = index[iterrev]
clen = 0
compresseddeltalen = 0
- while iterrev != e[3]:
+ while iterrev != e.baserev:
clen += 1
- compresseddeltalen += e[1]
+ compresseddeltalen += e.chunklength
if generaldelta:
- iterrev = e[3]
+ iterrev = e.baserev
else:
iterrev -= 1
if iterrev in chaininfocache:
@@ -683,7 +683,7 @@
else:
# Add text length of base since decompressing that also takes
# work. For cache hits the length is already included.
- compresseddeltalen += e[1]
+ compresseddeltalen += e.chunklength
r = (clen, compresseddeltalen)
chaininfocache[rev] = r
return r
@@ -712,10 +712,10 @@
iterrev = rev
e = index[iterrev]
- while iterrev != e[3] and iterrev != stoprev:
+ while iterrev != e.baserev and iterrev != stoprev:
chain.append(iterrev)
if generaldelta:
- iterrev = e[3]
+ iterrev = e.baserev
else:
iterrev -= 1
e = index[iterrev]
@@ -1061,7 +1061,7 @@
for r in self:
ishead[r] = 1 # I may be an head
e = index[r]
- ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
+ ishead[e.p1rev] = ishead[e.p2rev] = 0 # my parent are not
return [r for r, val in enumerate(ishead) if val]
def heads(self, start=None, stop=None):
@@ -1217,7 +1217,7 @@
# hex(node)[:...]
l = len(id) // 2 # grab an even number of digits
prefix = bin(id[:l * 2])
- nl = [e[7] for e in self.index if e[7].startswith(prefix)]
+ nl = [e.node for e in self.index if e.node.startswith(prefix)]
nl = [n for n in nl if hex(n).startswith(id) and
self.hasnode(n)]
if len(nl) > 0:
@@ -1384,12 +1384,12 @@
# (functions are expensive).
index = self.index
istart = index[startrev]
- start = int(istart[0] >> 16)
+ start = int(istart.offsetflags >> 16)
if startrev == endrev:
- end = start + istart[1]
+ end = start + istart.chunklength
else:
iend = index[endrev]
- end = int(iend[0] >> 16) + iend[1]
+ end = int(iend.offsetflags >> 16) + iend.chunklength
if self._inline:
start += (startrev + 1) * self._io.size
@@ -1468,7 +1468,7 @@
def deltaparent(self, rev):
"""return deltaparent of the given revision"""
- base = self.index[rev][3]
+ base = self.index[rev].baserev
if base == rev:
return nullrev
elif self._generaldelta:
@@ -2347,11 +2347,11 @@
# Some classes override linkrev to take filtered revs into
# account. Use raw entry from index.
- flags = entry[0] & 0xffff
- linkrev = entry[4]
- p1 = index[entry[5]][7]
- p2 = index[entry[6]][7]
- node = entry[7]
+ flags = entry.offsetflags & 0xffff
+ linkrev = entry.linkrev
+ p1 = index[entry.p1rev].node
+ p2 = index[entry.p2rev].node
+ node = entry.node
# (Possibly) reuse the delta from the revlog if allowed and
# the revlog chunk is a delta.
To: indygreg, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list