D7894: nodemap: introduce an option to use mmap to read the nodemap mapping

marmoute (Pierre-Yves David) phabricator at mercurial-scm.org
Wed Jan 15 09:59:52 EST 2020


marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The performance and memory benefit is much greater if we don't have to copy all
  the data in memory for each information. So we introduce an option (on by
  default) to read the data using mmap.
  
  This changeset is the last one definition the API for index support nodemap
  data. (they have to be able to use the mmaping).

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7894

AFFECTED FILES
  mercurial/configitems.py
  mercurial/debugcommands.py
  mercurial/localrepo.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -84,3 +84,37 @@
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
+
+Test code path without mmap
+---------------------------
+
+  $ echo bar > bar
+  $ hg add bar
+  $ hg ci -m 'bar' --config experimental.exp-persistent-nodemap.mmap=no
+
+  $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=yes
+  revision in index:   5003
+  revision in nodemap: 5003
+  $ hg debugnodemap --check --config experimental.exp-persistent-nodemap.mmap=no
+  revision in index:   5003
+  revision in nodemap: 5003
+
+
+#if pure
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5002
+  data-length: 246656
+  data-unused: 768
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=246656, sha256=8221807a0860a7a65002d2d3e0d33512d28aa6db2433db966e56aa17dcf6329f (glob)
+
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5002
+  data-length: 245888
+  data-unused: 0
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=245888, sha256=42233b63e5567fd362fb7847fa7a9f4d40ad93c28f8571197b356a69fe8bd271 (glob)
+#endif
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -8,6 +8,7 @@
 
 from __future__ import absolute_import
 
+import errno
 import os
 import re
 import struct
@@ -46,10 +47,17 @@
 
     filename = _rawdata_filepath(revlog, docket)
     data = revlog.opener.tryread(filename)
+    try:
+        with revlog.opener(filename) as fd:
+            if revlog.opener.options.get("exp-persistent-nodemap.mmap"):
+                data = fd.read(data_length)
+            else:
+                data = util.buffer(util.mmapread(fd, data_length))
+    except OSError as e:
+        if e.errno != errno.ENOENT:
+            raise
     if len(data) < data_length:
         return None
-    elif len(data) > data_length:
-        data = data[:data_length]
     return docket, data
 
 
@@ -81,6 +89,8 @@
 
     can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
     ondisk_docket = revlog._nodemap_docket
+    feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
+    use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
 
     data = None
     # first attemp an incremental update of the data
@@ -97,12 +107,18 @@
             datafile = _rawdata_filepath(revlog, target_docket)
             # EXP-TODO: if this is a cache, this should use a cache vfs, not a
             # store vfs
+            new_length = target_docket.data_length + len(data)
             with revlog.opener(datafile, 'r+') as fd:
                 fd.seek(target_docket.data_length)
                 fd.write(data)
-                fd.seek(0)
-                new_data = fd.read(target_docket.data_length + len(data))
-            target_docket.data_length += len(data)
+                if feed_data:
+                    if use_mmap:
+                        fd.seek(0)
+                        new_data = fd.read(new_length)
+                    else:
+                        fd.flush()
+                        new_data = util.buffer(util.mmapread(fd, new_length))
+            target_docket.data_length = new_length
             target_docket.data_unused += data_changed_count
 
     if data is None:
@@ -115,9 +131,14 @@
             data = persistent_data(revlog.index)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
-        new_data = data
-        with revlog.opener(datafile, 'w') as fd:
+        with revlog.opener(datafile, 'w+') as fd:
             fd.write(data)
+            if feed_data:
+                if use_mmap:
+                    new_data = data
+                else:
+                    fd.flush()
+                    new_data = util.buffer(util.mmapread(fd, len(data)))
         target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
@@ -125,7 +146,7 @@
     with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
         fp.write(target_docket.serialize())
     revlog._nodemap_docket = target_docket
-    if util.safehasattr(revlog.index, "update_nodemap_data"):
+    if feed_data:
         revlog.index.update_nodemap_data(target_docket, new_data)
 
     # EXP-TODO: if the transaction abort, we should remove the new data and
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -931,6 +931,8 @@
         options[b'rust.index'] = True
     if ui.configbool('experimental', 'exp-persistent-nodemap'):
         options[b'exp-persistent-nodemap'] = True
+    if ui.configbool('experimental', 'exp-persistent-nodemap.mmap'):
+        options[b'exp-persistent-nodemap.mmap'] = True
     if ui.configbool('devel', 'persistent-nodemap'):
         options[b'devel-force-nodemap'] = True
 
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2115,7 +2115,7 @@
         nm_data = nodemap.persisted_data(cl)
         if nm_data is not None:
             docket, data = nm_data
-            ui.write(data)
+            ui.write(data[:])
     elif args['check']:
         unfi = repo.unfiltered()
         cl = unfi.changelog
diff --git a/mercurial/configitems.py b/mercurial/configitems.py
--- a/mercurial/configitems.py
+++ b/mercurial/configitems.py
@@ -666,6 +666,9 @@
     b'experimental', b'exp-persistent-nodemap', default=False,
 )
 coreconfigitem(
+    b'experimental', b'exp-persistent-nodemap.mmap', default=True,
+)
+coreconfigitem(
     b'experimental', b'server.filesdata.recommended-batch-size', default=50000,
 )
 coreconfigitem(



To: marmoute, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list