D1765: parsers: use an attr-derived class for revlog index entries
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Wed Dec 27 00:35:56 UTC 2017
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
Currently, revlog index entries are tuples.
This commit introduces a new type to represent revlog v1 index
entries. The pure Python parser has been converted to use it.
The type is defined in mercurial.pure.parsers. This may not be the
most appropriate location. However, as C extensions will need to
reference this type, we can't put it in revlog.py because that
would create an import cycle. We /could/ put it in repository.py.
But repository.py is for interfaces - not concrete implementations.
A future commit will expose the type from the C extension, so any
"parsers" handles will have access to the attribute.
To provide backwards compatibility with existing code (including the
C extension), the type has a __getitem__ to facilitate index
lookups. The intent is to remove this __getitem__ once all consumers
are using named attributes.
Since the pure Python index parser is now using our new type,
test-parseindex2.py had to be updated to deal with the type mismatch.
Once all index parsers are converted and the new type is ubiquitous,
we can restore the simplicity of test-parseindex2.py. We also had to
(temporarily) add "# no-check-code" to test-parseindex2.py to allow
the import of the pure module. This will be removed once the C
extension exposes the type.
Because consumers are going through an extra Python function call to
resolve attributes by index, this change regresses performance on a
simple DAG walking revset for the Firefox repository:
$ HGMODULEPOLICY=py hg perfrevset '::tip'
! wall 1.187288 comb 1.190000 user 1.170000 sys 0.020000 (best of 9)
! wall 1.667181 comb 1.670000 user 1.650000 sys 0.020000 (best of 6)
Using PyPy, a major regression is not apparent:
! wall 0.192745 comb 0.180000 user 0.180000 sys 0.000000 (best of 47)
! wall 0.198590 comb 0.200000 user 0.190000 sys 0.010000 (best of 45)
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D1765
AFFECTED FILES
mercurial/pure/parsers.py
tests/test-parseindex2.py
CHANGE DETAILS
diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py
--- a/tests/test-parseindex2.py
+++ b/tests/test-parseindex2.py
@@ -13,6 +13,10 @@
nullid,
nullrev,
)
+# no-check-code
+from mercurial.pure import (
+ parsers as pureparsers,
+)
from mercurial import (
policy,
)
@@ -165,6 +169,21 @@
testversionfail(4, makehex(major, minor + 1, micro))
testversionfail(5, "'foo'")
+def index_equal(a, b):
+ """Determine if 2 index objects are equal."""
+ # Normalize all entries to IndexV1Entry instances.
+ def normvalue(x):
+ if isinstance(x, pureparsers.IndexV1Entry):
+ return x
+
+ assert isinstance(x, tuple)
+ return pureparsers.IndexV1Entry(*x)
+
+ idxa = [map(normvalue, a[0])]
+ idxb = [map(normvalue, b[0])]
+
+ return (idxa, a[1]) == (idxb, b[1])
+
def runtest() :
# Only test the version-detection logic if it is present.
try:
@@ -191,10 +210,10 @@
py_res_2 = py_parseindex(data_non_inlined, False)
c_res_2 = parse_index2(data_non_inlined, False)
- if py_res_1 != c_res_1:
+ if not index_equal(py_res_1, c_res_1):
print("Parse index result (with inlined data) differs!")
- if py_res_2 != c_res_2:
+ if not index_equal(py_res_2, c_res_2):
print("Parse index result (no inlined data) differs!")
ix = parsers.parse_index2(data_inlined, True)[0]
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -11,6 +11,7 @@
import zlib
from ..node import nullid
+from ..thirdparty import attr
from .. import pycompat
stringio = pycompat.stringio
@@ -37,13 +38,49 @@
def offset_type(offset, type):
return int(int(offset) << 16 | type)
+ at attr.s
+class IndexV1Entry(object):
+ """Represents a revlog v1 index entry."""
+ offsetflags = attr.ib()
+ chunklength = attr.ib()
+ rawlength = attr.ib()
+ baserev = attr.ib()
+ linkrev = attr.ib()
+ p1rev = attr.ib()
+ p2rev = attr.ib()
+ node = attr.ib()
+
+ def __getitem__(self, x):
+ if x == 0:
+ return self.offsetflags
+ elif x == 1:
+ return self.chunklength
+ elif x == 2:
+ return self.rawlength
+ elif x == 3:
+ return self.baserev
+ elif x == 4:
+ return self.linkrev
+ elif x == 5:
+ return self.p1rev
+ elif x == 6:
+ return self.p2rev
+ elif x == 7:
+ return self.node
+ else:
+ raise IndexError('index out of range')
+
class BaseIndexObject(object):
def __len__(self):
return self._lgt + len(self._extra) + 1
- def insert(self, i, tup):
+ def insert(self, i, entry):
assert i == -1
- self._extra.append(tup)
+
+ if isinstance(entry, tuple):
+ entry = IndexV1Entry(*entry)
+
+ self._extra.append(entry)
def _fix_index(self, i):
if not isinstance(i, int):
@@ -57,17 +94,17 @@
def __getitem__(self, i):
i = self._fix_index(i)
if i == len(self) - 1:
- return (0, 0, 0, -1, -1, -1, -1, nullid)
+ return IndexV1Entry(0, 0, 0, -1, -1, -1, -1, nullid)
if i >= self._lgt:
return self._extra[i - self._lgt]
index = self._calculate_index(i)
r = struct.unpack(indexformatng, self._data[index:index + indexsize])
if i == 0:
e = list(r)
type = gettype(e[0])
e[0] = offset_type(0, type)
- return tuple(e)
- return r
+ return IndexV1Entry(*e)
+ return IndexV1Entry(*r)
class IndexObject(BaseIndexObject):
def __init__(self, data):
To: indygreg, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list