D1773: revlog: use named attributes on revlog index entries

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Dec 27 00:36:27 UTC 2017


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Now that we're using a dedicated type for index entries everywhere,
  we can access fields in them by name instead of integer offset.
  
  This change has a significant impact on performance on the Firefox
  repository. (Values are from before refactor, parent commit, this
  commit.)
  
  $ HGMODULEPOLICY=c hg perfrevset '::tip'
  ! wall 0.672636 comb 0.670000 user 0.650000 sys 0.020000 (best of 15)
  ! wall 0.962372 comb 0.960000 user 0.940000 sys 0.020000 (best of 10)
  ! wall 0.774613 comb 0.780000 user 0.770000 sys 0.010000 (best of 13)
  
  $ HGMODULEPOLICY=py hg perfrevset '::tip'
  ! wall 1.187288 comb 1.190000 user 1.170000 sys 0.020000 (best of 9)
  ! wall 1.667181 comb 1.670000 user 1.650000 sys 0.020000 (best of 6)
  ! wall 1.514946 comb 1.520000 user 1.490000 sys 0.030000 (best of 7)
  
  And with PyPy:
  
  ! wall 0.192745 comb 0.180000 user 0.180000 sys 0.000000 (best of 47)
  ! wall 0.198590 comb 0.200000 user 0.190000 sys 0.010000 (best of 45)
  ! wall 0.207068 comb 0.220000 user 0.200000 sys 0.020000 (best of 43)
  
  The C extension is ~15% slower than before the refactor. This is a
  bit unfortunate. That is a bit steep price to pay for more readable
  code and the establishment of a formal interface for index entries.
  
  However, all is not lost! Because we're now using a custom type and
  conforming to a yet-to-be-formally-defined interface, there's nothing
  preventing us from implementing a backed-by-C custom type for revlog
  entries. This type could lazily resolve PyObject instances, which
  should result in a massive performance boost for operations that
  don't need to access all fields of the index entry. This is easier
  done *after* we drop all uses of tuples and their API for referring
  to index entries.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1773

AFFECTED FILES
  mercurial/revlog.py

CHANGE DETAILS

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -568,7 +568,7 @@
             if p is None:
                 p = len(i) - 2
             for r in xrange(p, -1, -1):
-                v = i[r][7]
+                v = i[r].node
                 n[v] = r
                 if v == node:
                     self._nodepos = r - 1
@@ -582,17 +582,17 @@
     # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
     # are flags.
     def start(self, rev):
-        return int(self.index[rev][0] >> 16)
+        return int(self.index[rev].offsetflags >> 16)
 
     def flags(self, rev):
-        return self.index[rev][0] & 0xFFFF
+        return self.index[rev].offsetflags & 0xFFFF
 
     def length(self, rev):
-        return self.index[rev][1]
+        return self.index[rev].chunklength
 
     def rawsize(self, rev):
         """return the length of the uncompressed text for a given revision"""
-        l = self.index[rev][2]
+        l = self.index[rev].rawlength
         if l >= 0:
             return l
 
@@ -615,16 +615,16 @@
             return base
 
         index = self.index
-        base = index[rev][3]
+        base = index[rev].baserev
         while base != rev:
             rev = base
-            base = index[rev][3]
+            base = index[rev].baserev
 
         self._chainbasecache[rev] = base
         return base
 
     def linkrev(self, rev):
-        return self.index[rev][4]
+        return self.index[rev].linkrev
 
     def parentrevs(self, rev):
         try:
@@ -634,11 +634,11 @@
                 raise error.WdirUnsupported
             raise
 
-        return entry[5], entry[6]
+        return entry.p1rev, entry.p2rev
 
     def node(self, rev):
         try:
-            return self.index[rev][7]
+            return self.index[rev].node
         except IndexError:
             if rev == wdirrev:
                 raise error.WdirUnsupported
@@ -652,7 +652,7 @@
     def parents(self, node):
         i = self.index
         d = i[self.rev(node)]
-        return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
+        return i[d.p1rev].node, i[d.p2rev].node # map revisions to nodes inline
 
     def chainlen(self, rev):
         return self._chaininfo(rev)[0]
@@ -667,11 +667,11 @@
         e = index[iterrev]
         clen = 0
         compresseddeltalen = 0
-        while iterrev != e[3]:
+        while iterrev != e.baserev:
             clen += 1
-            compresseddeltalen += e[1]
+            compresseddeltalen += e.chunklength
             if generaldelta:
-                iterrev = e[3]
+                iterrev = e.baserev
             else:
                 iterrev -= 1
             if iterrev in chaininfocache:
@@ -683,7 +683,7 @@
         else:
             # Add text length of base since decompressing that also takes
             # work. For cache hits the length is already included.
-            compresseddeltalen += e[1]
+            compresseddeltalen += e.chunklength
         r = (clen, compresseddeltalen)
         chaininfocache[rev] = r
         return r
@@ -712,10 +712,10 @@
 
         iterrev = rev
         e = index[iterrev]
-        while iterrev != e[3] and iterrev != stoprev:
+        while iterrev != e.baserev and iterrev != stoprev:
             chain.append(iterrev)
             if generaldelta:
-                iterrev = e[3]
+                iterrev = e.baserev
             else:
                 iterrev -= 1
             e = index[iterrev]
@@ -1061,7 +1061,7 @@
         for r in self:
             ishead[r] = 1  # I may be an head
             e = index[r]
-            ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
+            ishead[e.p1rev] = ishead[e.p2rev] = 0  # my parent are not
         return [r for r, val in enumerate(ishead) if val]
 
     def heads(self, start=None, stop=None):
@@ -1217,7 +1217,7 @@
                 # hex(node)[:...]
                 l = len(id) // 2  # grab an even number of digits
                 prefix = bin(id[:l * 2])
-                nl = [e[7] for e in self.index if e[7].startswith(prefix)]
+                nl = [e.node for e in self.index if e.node.startswith(prefix)]
                 nl = [n for n in nl if hex(n).startswith(id) and
                       self.hasnode(n)]
                 if len(nl) > 0:
@@ -1384,12 +1384,12 @@
         # (functions are expensive).
         index = self.index
         istart = index[startrev]
-        start = int(istart[0] >> 16)
+        start = int(istart.offsetflags >> 16)
         if startrev == endrev:
-            end = start + istart[1]
+            end = start + istart.chunklength
         else:
             iend = index[endrev]
-            end = int(iend[0] >> 16) + iend[1]
+            end = int(iend.offsetflags >> 16) + iend.chunklength
 
         if self._inline:
             start += (startrev + 1) * self._io.size
@@ -1468,7 +1468,7 @@
 
     def deltaparent(self, rev):
         """return deltaparent of the given revision"""
-        base = self.index[rev][3]
+        base = self.index[rev].baserev
         if base == rev:
             return nullrev
         elif self._generaldelta:
@@ -2347,11 +2347,11 @@
 
                 # Some classes override linkrev to take filtered revs into
                 # account. Use raw entry from index.
-                flags = entry[0] & 0xffff
-                linkrev = entry[4]
-                p1 = index[entry[5]][7]
-                p2 = index[entry[6]][7]
-                node = entry[7]
+                flags = entry.offsetflags & 0xffff
+                linkrev = entry.linkrev
+                p1 = index[entry.p1rev].node
+                p2 = index[entry.p2rev].node
+                node = entry.node
 
                 # (Possibly) reuse the delta from the revlog if allowed and
                 # the revlog chunk is a delta.



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list