D1765: parsers: use an attr-derived class for revlog index entries

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Dec 27 00:35:56 UTC 2017


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Currently, revlog index entries are tuples.
  
  This commit introduces a new type to represent revlog v1 index
  entries. The pure Python parser has been converted to use it.
  
  The type is defined in mercurial.pure.parsers. This may not be the
  most appropriate location. However, as C extensions will need to
  reference this type, we can't put it in revlog.py because that
  would create an import cycle. We /could/ put it in repository.py.
  But repository.py is for interfaces - not concrete implementations.
  A future commit will expose the type from the C extension, so any
  "parsers" handles will have access to the attribute.
  
  To provide backwards compatibility with existing code (including the
  C extension), the type has a __getitem__ to facilitate index
  lookups. The intent is to remove this __getitem__ once all consumers
  are using named attributes.
  
  Since the pure Python index parser is now using our new type,
  test-parseindex2.py had to be updated to deal with the type mismatch.
  Once all index parsers are converted and the new type is ubiquitous,
  we can restore the simplicity of test-parseindex2.py. We also had to
  (temporarily) add "# no-check-code" to test-parseindex2.py to allow
  the import of the pure module. This will be removed once the C
  extension exposes the type.
  
  Because consumers are going through an extra Python function call to
  resolve attributes by index, this change regresses performance on a
  simple DAG walking revset for the Firefox repository:
  
  $ HGMODULEPOLICY=py hg perfrevset '::tip'
  ! wall 1.187288 comb 1.190000 user 1.170000 sys 0.020000 (best of 9)
  ! wall 1.667181 comb 1.670000 user 1.650000 sys 0.020000 (best of 6)
  
  Using PyPy, a major regression is not apparent:
  
  ! wall 0.192745 comb 0.180000 user 0.180000 sys 0.000000 (best of 47)
  ! wall 0.198590 comb 0.200000 user 0.190000 sys 0.010000 (best of 45)

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1765

AFFECTED FILES
  mercurial/pure/parsers.py
  tests/test-parseindex2.py

CHANGE DETAILS

diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py
--- a/tests/test-parseindex2.py
+++ b/tests/test-parseindex2.py
@@ -13,6 +13,10 @@
     nullid,
     nullrev,
 )
+# no-check-code
+from mercurial.pure import (
+    parsers as pureparsers,
+)
 from mercurial import (
     policy,
 )
@@ -165,6 +169,21 @@
     testversionfail(4, makehex(major, minor + 1, micro))
     testversionfail(5, "'foo'")
 
+def index_equal(a, b):
+    """Determine if 2 index objects are equal."""
+    # Normalize all entries to IndexV1Entry instances.
+    def normvalue(x):
+        if isinstance(x, pureparsers.IndexV1Entry):
+            return x
+
+        assert isinstance(x, tuple)
+        return pureparsers.IndexV1Entry(*x)
+
+    idxa = [map(normvalue, a[0])]
+    idxb = [map(normvalue, b[0])]
+
+    return (idxa, a[1]) == (idxb, b[1])
+
 def runtest() :
     # Only test the version-detection logic if it is present.
     try:
@@ -191,10 +210,10 @@
     py_res_2 = py_parseindex(data_non_inlined, False)
     c_res_2 = parse_index2(data_non_inlined, False)
 
-    if py_res_1 != c_res_1:
+    if not index_equal(py_res_1, c_res_1):
         print("Parse index result (with inlined data) differs!")
 
-    if py_res_2 != c_res_2:
+    if not index_equal(py_res_2, c_res_2):
         print("Parse index result (no inlined data) differs!")
 
     ix = parsers.parse_index2(data_inlined, True)[0]
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -11,6 +11,7 @@
 import zlib
 
 from ..node import nullid
+from ..thirdparty import attr
 from .. import pycompat
 stringio = pycompat.stringio
 
@@ -37,13 +38,49 @@
 def offset_type(offset, type):
     return int(int(offset) << 16 | type)
 
+ at attr.s
+class IndexV1Entry(object):
+    """Represents a revlog v1 index entry."""
+    offsetflags = attr.ib()
+    chunklength = attr.ib()
+    rawlength = attr.ib()
+    baserev = attr.ib()
+    linkrev = attr.ib()
+    p1rev = attr.ib()
+    p2rev = attr.ib()
+    node = attr.ib()
+
+    def __getitem__(self, x):
+        if x == 0:
+            return self.offsetflags
+        elif x == 1:
+            return self.chunklength
+        elif x == 2:
+            return self.rawlength
+        elif x == 3:
+            return self.baserev
+        elif x == 4:
+            return self.linkrev
+        elif x == 5:
+            return self.p1rev
+        elif x == 6:
+            return self.p2rev
+        elif x == 7:
+            return self.node
+        else:
+            raise IndexError('index out of range')
+
 class BaseIndexObject(object):
     def __len__(self):
         return self._lgt + len(self._extra) + 1
 
-    def insert(self, i, tup):
+    def insert(self, i, entry):
         assert i == -1
-        self._extra.append(tup)
+
+        if isinstance(entry, tuple):
+            entry = IndexV1Entry(*entry)
+
+        self._extra.append(entry)
 
     def _fix_index(self, i):
         if not isinstance(i, int):
@@ -57,17 +94,17 @@
     def __getitem__(self, i):
         i = self._fix_index(i)
         if i == len(self) - 1:
-            return (0, 0, 0, -1, -1, -1, -1, nullid)
+            return IndexV1Entry(0, 0, 0, -1, -1, -1, -1, nullid)
         if i >= self._lgt:
             return self._extra[i - self._lgt]
         index = self._calculate_index(i)
         r = struct.unpack(indexformatng, self._data[index:index + indexsize])
         if i == 0:
             e = list(r)
             type = gettype(e[0])
             e[0] = offset_type(0, type)
-            return tuple(e)
-        return r
+            return IndexV1Entry(*e)
+        return IndexV1Entry(*r)
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list