D7834: nodemap: have some python code serializing a nodemap
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Mon Jan 13 10:09:12 EST 2020
marmoute updated this revision to Diff 19171.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D7834?vs=19154&id=19171
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D7834/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D7834
AFFECTED FILES
mercurial/debugcommands.py
mercurial/revlogutils/nodemap.py
tests/test-completion.t
tests/test-help.t
tests/test-persistent-nodemap.t
CHANGE DETAILS
diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
new file mode 100644
--- /dev/null
+++ b/tests/test-persistent-nodemap.t
@@ -0,0 +1,26 @@
+===================================
+Test the persistent on-disk nodemap
+===================================
+
+
+ $ hg init test-repo
+ $ cd test-repo
+ $ hg debugbuilddag .+5000
+ $ hg debugnodemap --dump | f --sha256 --bytes=256 --hexdump --size
+ size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c
+ 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0030: ff ff ff ff ff ff fa c2 ff ff ff ff ff ff ff ff |................|
+ 0040: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ed b3 |................|
+ 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ee 34 |...............4|
+ 0090: 00 00 00 00 00 00 00 00 ff ff ff ff ff ff ff ff |................|
+ 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00b0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00c0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
diff --git a/tests/test-help.t b/tests/test-help.t
--- a/tests/test-help.t
+++ b/tests/test-help.t
@@ -1017,6 +1017,7 @@
print merge state
debugnamecomplete
complete "names" - tags, open branch names, bookmark names
+ debugnodemap write and inspect on disk nodemap
debugobsolete
create arbitrary obsolete marker
debugoptADV (no help text available)
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -107,6 +107,7 @@
debugmanifestfulltextcache
debugmergestate
debugnamecomplete
+ debugnodemap
debugobsolete
debugp1copies
debugp2copies
@@ -289,6 +290,7 @@
debugmanifestfulltextcache: clear, add
debugmergestate:
debugnamecomplete:
+ debugnodemap: dump
debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template
debugp1copies: rev
debugp2copies: rev
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -7,9 +7,156 @@
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
-from .. import error
+
+import struct
+
+from .. import (
+ error,
+ node as nodemod,
+)
class NodeMap(dict):
def __missing__(self, x):
raise error.RevlogError(b'unknown node: %s' % x)
+
+
+### Nodemap Trie
+#
+# This is a simple reference implementation to compute and serialise a nodemap
+# trie. This reference implementation is write only. The python version of this
+# is not expected to be actually used, since it wont provide performance
+# improvement over existing non-persistent C implementation.
+#
+# The nodemap is serialized as Trie using 4bits-address/16-entries block. each
+# revision can be adressed using its node shortest prefix.
+#
+# The trie is stored as a sequence of block. Each block contains 16 entries
+# (signed 64bit integer, big endian). Each entry can be one of the following:
+#
+# * value >= 0 -> index of sub-block
+# * value == -1 -> no value
+# * value < -1 -> a revision value: rev = -(value+10)
+#
+# The implementation focus on simplicity, not on performance. A Rust
+# implementation should provide a efficient version of the same serialization.
+# This reference python implementation is never meant to be extensively use in
+# production.
+
+
+def persistent_data(index):
+ """return the serialised data of a nodemap for a given index
+ """
+ trie = _build_trie(index)
+ return _dump_trie(trie)
+
+
+S_BLOCK = struct.Struct(">" + ("q" * 16))
+
+NO_ENTRY = -1
+# rev-0 need to be -2 because 0 is used by block, -1 is a special value.
+REV_OFFSET = 2
+
+
+def _transform_rev(rev):
+ """Return the number used to represent the rev in the tree.
+
+ (or retrieve a rev number from such representation)
+
+ Note that this is an involution.
+ """
+ return -(rev + REV_OFFSET)
+
+
+def _to_int(hex_digit):
+ """turn an hexadecimal digit into a proper integer"""
+ return int(hex_digit, 16)
+
+
+def _build_trie(index):
+ """build a nodemap trie
+
+ The nodemap store revision number for each unique prefix.
+
+ Each block is a dictionnary with key in `[0, 15]`. Value are either
+ another block or a revision number.
+ """
+ root = {}
+ for rev in range(len(index)):
+ hex = nodemod.hex(index[rev][7])
+ _insert_into_block(index, 0, root, rev, hex)
+ return root
+
+
+def _insert_into_block(index, level, block, current_rev, current_hex):
+ """insert a new revision in a block
+
+ index: the index we are adding revision for
+ level: the depth of the current block in the trie
+ block: the block currently being considered
+ current_rev: the revision number we are adding
+ current_hex: the hexadecimal representation of the of that revision
+ """
+ entry = block.get(_to_int(current_hex[level]))
+ if entry is None:
+ # no entry, simply store the revision number
+ block[_to_int(current_hex[level])] = current_rev
+ elif isinstance(entry, dict):
+ # need to recurse to an underlying block
+ _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+ else:
+ # collision with a previously unique prefix, inserting new
+ # vertices to fit both entry.
+ other_hex = nodemod.hex(index[entry][7])
+ other_rev = entry
+ while current_hex[level] == other_hex[level]:
+ new = {}
+ block[_to_int(current_hex[level])] = new
+ block = new
+ level += 1
+ block[_to_int(current_hex[level])] = current_rev
+ block[_to_int(other_hex[level])] = other_rev
+
+
+def _dump_trie(root):
+ """serialise a nodemap trie
+
+ See `_build_trie` for nodemap trie structure"""
+ block_map = {}
+ chunks = []
+ for tn in _walk_trie(root):
+ block_map[id(tn)] = len(chunks)
+ chunks.append(_dump_block(tn, block_map))
+ return ''.join(chunks)
+
+
+def _walk_trie(block):
+ """yield all the block in a trie
+
+ Children blocks are always yield before their parent block.
+ """
+ for (_, item) in sorted(block.items()):
+ if isinstance(item, dict):
+ for sub_block in _walk_trie(item):
+ yield sub_block
+ yield block
+
+
+def _dump_block(block_node, block_map):
+ """serialise a single block
+
+ Children block are assumed to be already serialized and present in
+ block_map.
+ """
+ data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
+ return S_BLOCK.pack(*data)
+
+
+def _to_value(item, block_map):
+ """serialize any value as an integer"""
+ if item is None:
+ return NO_ENTRY
+ elif isinstance(item, dict):
+ return block_map[id(item)]
+ else:
+ return _transform_rev(item)
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -93,7 +93,10 @@
stringutil,
)
-from .revlogutils import deltas as deltautil
+from .revlogutils import (
+ deltas as deltautil,
+ nodemap,
+)
release = lockmod.release
@@ -2075,6 +2078,20 @@
@command(
+ b'debugnodemap',
+ [('', b'dump', False, _(b'write a (binary) serialised nodemap on stdin'))],
+)
+def debugnodemap(ui, repo, **args):
+ """write and inspect on disk nodemap
+ """
+ if args['dump']:
+ unfi = repo.unfiltered()
+ cl = unfi.changelog
+ data = nodemap.persistent_data(cl.index)
+ ui.write(data)
+
+
+ at command(
b'debugobsolete',
[
(b'', b'flags', 0, _(b'markers flag')),
To: marmoute, #hg-reviewers
Cc: mjpieters, mercurial-devel
More information about the Mercurial-devel
mailing list