D477: revlog: add option to mmap revlog index

mbthomas (Mark Thomas) phabricator at mercurial-scm.org
Wed Sep 13 13:28:27 EDT 2017


mbthomas updated this revision to Diff 1786.
mbthomas added a comment.


  Rebase and remove mmapindexthreshold from revlog object

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D477?vs=1297&id=1786

BRANCH
  mmap-revlog (bookmark) on default (branch)

REVISION DETAIL
  https://phab.mercurial-scm.org/D477

AFFECTED FILES
  mercurial/changelog.py
  mercurial/localrepo.py
  mercurial/manifest.py
  mercurial/revlog.py
  tests/test-revlog-mmapindex.t

CHANGE DETAILS

diff --git a/tests/test-revlog-mmapindex.t b/tests/test-revlog-mmapindex.t
new file mode 100644
--- /dev/null
+++ b/tests/test-revlog-mmapindex.t
@@ -0,0 +1,54 @@
+create verbosemmap.py
+  $ cat << EOF > verbosemmap.py
+  > # extension to make util.mmapread verbose
+  > 
+  > from __future__ import absolute_import
+  > 
+  > from mercurial import (
+  >     extensions,
+  >     util,
+  > )
+  > 
+  > def mmapread(orig, fp):
+  >     print "mmapping %s" % fp.name
+  >     return orig(fp)
+  > 
+  > def extsetup(ui):
+  >     extensions.wrapfunction(util, 'mmapread', mmapread)
+  > EOF
+
+setting up base repo
+  $ hg init a
+  $ cd a
+  $ touch a
+  $ hg add a
+  $ hg commit -qm base
+  $ for i in `seq 1 100` ; do
+  > echo $i > a
+  > hg commit -qm $i
+  > done
+
+set up verbosemmap extension
+  $ cat << EOF >> $HGRCPATH
+  > [extensions]
+  > verbosemmap=$TESTTMP/verbosemmap.py
+  > EOF
+
+mmap index which is now more than 4k long
+  $ hg log -l 5 -T '{rev}\n' --config experimental.mmapindexthreshold=4k
+  mmapping $TESTTMP/a/.hg/store/00changelog.i (glob)
+  100
+  99
+  98
+  97
+  96
+
+do not mmap index which is still less than 32k
+  $ hg log -l 5 -T '{rev}\n' --config experimental.mmapindexthreshold=32k
+  100
+  99
+  98
+  97
+  96
+
+  $ cd ..
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -268,8 +268,13 @@
 
     If checkambig, indexfile is opened with checkambig=True at
     writing, to avoid file stat ambiguity.
+
+    If mmaplargeindex is True, and an mmapindexthreshold is set, the
+    index will be mmapped rather than read if it is larger than the
+    configured threshold.
     """
-    def __init__(self, opener, indexfile, datafile=None, checkambig=False):
+    def __init__(self, opener, indexfile, datafile=None, checkambig=False,
+                 mmaplargeindex=False):
         """
         create a revlog object
 
@@ -301,6 +306,7 @@
         self._compengine = 'zlib'
         self._maxdeltachainspan = -1
 
+        mmapindexthreshold = None
         v = REVLOG_DEFAULT_VERSION
         opts = getattr(opener, 'options', None)
         if opts is not None:
@@ -323,6 +329,8 @@
                 self._compengine = opts['compengine']
             if 'maxdeltachainspan' in opts:
                 self._maxdeltachainspan = opts['maxdeltachainspan']
+            if mmaplargeindex and 'mmapindexthreshold' in opts:
+                mmapindexthreshold = opts['mmapindexthreshold']
 
         if self._chunkcachesize <= 0:
             raise RevlogError(_('revlog chunk cache size %r is not greater '
@@ -335,7 +343,11 @@
         self._initempty = True
         try:
             f = self.opener(self.indexfile)
-            indexdata = f.read()
+            if (mmapindexthreshold is not None and
+                    self.opener.fstat(f).st_size >= mmapindexthreshold):
+                indexdata = util.buffer(util.mmapread(f))
+            else:
+                indexdata = f.read()
             f.close()
             if len(indexdata) > 0:
                 v = versionformat_unpack(indexdata[:4])[0]
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -1231,7 +1231,8 @@
 
         super(manifestrevlog, self).__init__(opener, indexfile,
                                              # only root indexfile is cached
-                                             checkambig=not bool(dir))
+                                             checkambig=not bool(dir),
+                                             mmaplargeindex=True)
 
     @property
     def fulltextcache(self):
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -599,6 +599,10 @@
         chainspan = self.ui.configbytes('experimental', 'maxdeltachainspan', -1)
         if 0 <= chainspan:
             self.svfs.options['maxdeltachainspan'] = chainspan
+        mmapindexthreshold = self.ui.configbytes('experimental',
+                                                 'mmapindexthreshold', None)
+        if mmapindexthreshold is not None:
+            self.svfs.options['mmapindexthreshold'] = mmapindexthreshold
 
         for r in self.requirements:
             if r.startswith('exp-compression-'):
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -275,7 +275,7 @@
 
         datafile = '00changelog.d'
         revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
-                               checkambig=True)
+                               checkambig=True, mmaplargeindex=True)
 
         if self._initempty:
             # changelogs don't benefit from generaldelta



To: mbthomas, #fbhgext, indygreg, #hg-reviewers, durin42, quark
Cc: quark, durin42, simonfar, mercurial-devel


More information about the Mercurial-devel mailing list