D7889: nodemap: track the total and unused amount of data in the rawdata file
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Wed Jan 15 14:58:27 UTC 2020
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
We need to keep that information around:
- total data will allow transaction to start appending new information without confusing other reader.
- unused data will allow to detect when we should regenerate new rawdata file.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D7889
AFFECTED FILES
mercurial/debugcommands.py
mercurial/pure/parsers.py
mercurial/revlogutils/nodemap.py
tests/test-persistent-nodemap.t
CHANGE DETAILS
diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
$ hg debugnodemap --metadata
uid: ???????????????? (glob)
tip-rev: 5000
+ data-length: 245760
+ data-unused: 0
$ f --size .hg/store/00changelog.n
- .hg/store/00changelog.n: size=26
+ .hg/store/00changelog.n: size=42
$ f --sha256 .hg/store/00changelog-*.nd
.hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
$ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
$ echo foo > foo
$ hg add foo
$ hg ci -m 'foo'
+
+#if pure
$ hg debugnodemap --metadata
uid: ???????????????? (glob)
tip-rev: 5001
+ data-length: 246144
+ data-unused: 384
+#else
+ $ hg debugnodemap --metadata
+ uid: ???????????????? (glob)
+ tip-rev: 5001
+ data-length: 245760
+ data-unused: 0
+#endif
$ f --size .hg/store/00changelog.n
- .hg/store/00changelog.n: size=26
+ .hg/store/00changelog.n: size=42
(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
return None
offset += S_VERSION.size
headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
- uid_size, tip_rev = headers
+ uid_size, tip_rev, data_length, data_unused = headers
offset += S_HEADER.size
docket = NodeMapDocket(pdata[offset : offset + uid_size])
docket.tip_rev = tip_rev
+ docket.data_length = data_length
+ docket.data_unused = data_unused
filename = _rawdata_filepath(revlog, docket)
return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
# first attemp an incremental update of the data
if can_incremental and ondisk_docket is not None:
target_docket = revlog._nodemap_docket.copy()
- data = revlog.index.nodemap_data_incremental()
+ data_changed_count, data = revlog.index.nodemap_data_incremental()
datafile = _rawdata_filepath(revlog, target_docket)
# EXP-TODO: if this is a cache, this should use a cache vfs, not a
# store vfs
with revlog.opener(datafile, 'a') as fd:
fd.write(data)
+ target_docket.data_length += len(data)
+ target_docket.data_unused += data_changed_count
else:
# otherwise fallback to a full new export
target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
# store vfs
with revlog.opener(datafile, 'w') as fd:
fd.write(data)
+ target_docket.data_length = len(data)
target_docket.tip_rev = revlog.tiprev()
# EXP-TODO: if this is a cache, this should use a cache vfs, not a
# store vfs
@@ -143,9 +148,8 @@
# version 0 is experimental, no BC garantee, do no use outside of tests.
ONDISK_VERSION = 0
-
S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
ID_SIZE = 8
@@ -168,17 +172,26 @@
uid = _make_uid()
self.uid = uid
self.tip_rev = None
+ self.data_length = None
+ self.data_unused = 0
def copy(self):
new = NodeMapDocket(uid=self.uid)
new.tip_rev = self.tip_rev
+ new.data_length = self.data_length
+ new.data_unused = self.data_unused
return new
def serialize(self):
"""return serialized bytes for a docket using the passed uid"""
data = []
data.append(S_VERSION.pack(ONDISK_VERSION))
- headers = (len(self.uid), self.tip_rev)
+ headers = (
+ len(self.uid),
+ self.tip_rev,
+ self.data_length,
+ self.data_unused,
+ )
data.append(S_HEADER.pack(*headers))
data.append(self.uid)
return b''.join(data)
@@ -236,8 +249,8 @@
def update_persistent_data(index, root, max_idx, last_rev):
"""return the serialised data of a nodemap for a given index
"""
- trie = _update_trie(index, root, last_rev)
- return _dump_trie(trie, existing_idx=max_idx)
+ changed_block, trie = _update_trie(index, root, last_rev)
+ return changed_block * S_BLOCK.size, _dump_trie(trie, existing_idx=max_idx)
S_BLOCK = struct.Struct(">" + ("q" * 16))
@@ -293,10 +306,11 @@
def _update_trie(index, root, last_rev):
"""consume"""
+ changed = 0
for rev in range(last_rev + 1, len(index)):
hex = nodemod.hex(index[rev][7])
- _insert_into_block(index, 0, root, rev, hex)
- return root
+ changed += _insert_into_block(index, 0, root, rev, hex)
+ return changed, root
def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -308,6 +322,7 @@
current_rev: the revision number we are adding
current_hex: the hexadecimal representation of the of that revision
"""
+ changed = 1
if block.ondisk_id is not None:
block.ondisk_id = None
entry = block.get(_to_int(current_hex[level]))
@@ -316,7 +331,9 @@
block[_to_int(current_hex[level])] = current_rev
elif isinstance(entry, dict):
# need to recurse to an underlying block
- _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+ changed += _insert_into_block(
+ index, level + 1, entry, current_rev, current_hex
+ )
else:
# collision with a previously unique prefix, inserting new
# vertices to fit both entry.
@@ -329,6 +346,7 @@
level += 1
block[_to_int(current_hex[level])] = current_rev
block[_to_int(other_hex[level])] = other_rev
+ return changed
def _dump_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
"""
if self._nm_root is None:
return None
- data = nodemaputil.update_persistent_data(
+ changed, data = nodemaputil.update_persistent_data(
self, self._nm_root, self._nm_max_idx, self._nm_rev
)
self._nm_root = self._nm_max_idx = self._nm_rev = None
- return data
+ return changed, data
def update_nodemap_data(self, docket, nm_data):
"""provide full serialiazed data from a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2131,6 +2131,8 @@
docket, data = nm_data
ui.write((b"uid: %s\n") % docket.uid)
ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+ ui.write((b"data-length: %s\n") % docket.data_length)
+ ui.write((b"data-unused: %s\n") % docket.data_unused)
@command(
To: marmoute, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list