[PATCH 3 of 2] treemanifest: store sub-manifests revlog per directory

Martin von Zweigbergk martinvonz at google.com
Wed Apr 15 15:34:52 CDT 2015


# HG changeset patch
# User Martin von Zweigbergk <martinvonz at google.com>
# Date 1428968680 25200
#      Mon Apr 13 16:44:40 2015 -0700
# Node ID 71ffbbae4096ca945a3106fd9a2297137ddad168
# Parent  f739948917053db6732e30c62a1089390f4ba06f
treemanifest: store sub-manifests revlog per directory

With this change, when experiment.treemanifest=True, commits will be
written with one manifest revlog per directory. The manifest revlogs
are stored in .hg/store/metadata/$dir/00manifest.[id].

Flat manifests can still be read and interacted with as usual (they
are also read into treemanifest instances). The functionality for
writing treemanifest as trees to disk is in addition to the current
functionality of writing treemanifest as a flat manifest to disk;
tests still pass with '_treeinmem=True' hardcoded.

Important functionality that is not yet done:

 * .hg/requires is not updated, so running e.g. 'hg diff --change .'
   will crash if '.' is stored as a tree

 * Exchange is not implemented. It will probably involve a
   changegroupv3 that allows multiple manifest revlogs.

diff -r f73994891705 -r 71ffbbae4096 mercurial/manifest.py
--- a/mercurial/manifest.py	Sun Apr 12 23:01:18 2015 -0700
+++ b/mercurial/manifest.py	Mon Apr 13 16:44:40 2015 -0700
@@ -444,11 +444,15 @@
 class treemanifest(object):
     def __init__(self, dir='', text=''):
         self._dir = dir
+        self._node = revlog.nullid
         self._dirs = {}
         # Using _lazymanifest here is a little slower than plain old dicts
         self._files = {}
         self._flags = {}
-        self.parse(text)
+        def readsubtree(subdir, subm):
+            raise AssertionError('treemanifest constructor only accepts '
+                                 'flat manifests')
+        self.parse(text, readsubtree)
 
     def _subpath(self, path):
         return self._dir + path
@@ -464,7 +468,22 @@
                 util.all(m._isempty() for m in self._dirs.values())))
 
     def __str__(self):
-        return '<treemanifest dir=%s>' % self._dir
+        return ('<treemanifest dir=%s, node=%s>' %
+                (self._dir, revlog.hex(self._node)))
+
+    def dir(self):
+        '''The directory that this tree manifest represents, including a
+        trailing '/'. Empty string for the repo root directory.'''
+        return self._dir
+
+    def node(self):
+        '''This node of this instance. nullid for unsaved instances. Should
+        be updated when the instance is read or written from a revlog.
+        '''
+        return self._node
+
+    def setnode(self, node):
+        self._node = node
 
     def iteritems(self):
         for p, n in sorted(self._dirs.items() + self._files.items()):
@@ -557,6 +576,7 @@
 
     def setflag(self, f, flags):
         """Set the flags (symlink, executable) for path f."""
+        assert 'd' not in flags
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
@@ -567,6 +587,7 @@
 
     def copy(self):
         copy = treemanifest(self._dir)
+        copy._node = self._node
         for d in self._dirs:
             copy._dirs[d] = self._dirs[d].copy()
         copy._files = dict.copy(self._files)
@@ -737,11 +758,18 @@
         _diff(self, m2)
         return result
 
-    def parse(self, text):
+    def parse(self, text, readsubtree):
         for f, n, fl in _parse(text):
-            self[f] = n
-            if fl:
-                self.setflag(f, fl)
+            if fl == 'd':
+                f = f + '/'
+                self._dirs[f] = readsubtree(self._subpath(f), n)
+            else:
+                # Use __setitem__ and setflag rather than assigning directly
+                # to _files and _flags, thereby letting us parse flat manifests
+                # as well as tree manifests.
+                self[f] = n
+                if fl:
+                    self.setflag(f, fl)
 
     def text(self, usemanifestv2=False):
         """Get the full data of this manifest as a bytestring."""
@@ -749,8 +777,26 @@
         return _text(((f, self[f], flags(f)) for f in self.keys()),
                      usemanifestv2)
 
+    def dirtext(self, usemanifestv2=False):
+        """Get the full data of this directory as a bytestring. Make sure that
+        any submanifests have been written first, so their nodeids are correct.
+        """
+        flags = self.flags
+        dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
+        files = [(f, self._files[f], flags(f)) for f in self._files]
+        return _text(sorted(dirs + files), usemanifestv2)
+
+    def writesubtrees(self, m1, m2, writesubtree):
+        emptytree = treemanifest()
+        for d, subm in self._dirs.iteritems():
+            subp1 = m1._dirs.get(d, emptytree)._node
+            subp2 = m2._dirs.get(d, emptytree)._node
+            if subp1 == revlog.nullid:
+                subp1, subp2 = subp2, subp1
+            writesubtree(subm, subp1, subp2)
+
 class manifest(revlog.revlog):
-    def __init__(self, opener):
+    def __init__(self, opener, dir=''):
         # During normal operations, we expect to deal with not more than four
         # revs at a time (such as during commit --amend). When rebasing large
         # stacks of commits, the number can go up, hence the config knob below.
@@ -763,14 +809,19 @@
             usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
             usemanifestv2 = opts.get('manifestv2', usemanifestv2)
         self._mancache = util.lrucachedict(cachesize)
-        revlog.revlog.__init__(self, opener, "00manifest.i")
         self._treeinmem = usetreemanifest
         self._treeondisk = usetreemanifest
         self._usemanifestv2 = usemanifestv2
+        indexfile = "00manifest.i"
+        if dir:
+            assert self._treeondisk
+            indexfile = "metadata/" + dir + "00manifest.i"
+        revlog.revlog.__init__(self, opener, indexfile)
+        self._dir = dir
 
     def _newmanifest(self, data=''):
         if self._treeinmem:
-            return treemanifest('', data)
+            return treemanifest(self._dir, data)
         return manifestdict(data)
 
     def _slowreaddelta(self, node):
@@ -806,8 +857,17 @@
         if node in self._mancache:
             return self._mancache[node][0]
         text = self.revision(node)
-        arraytext = array.array('c', text)
-        m = self._newmanifest(text)
+        if self._treeondisk:
+            def readsubtree(dir, subm):
+                sublog = manifest(self.opener, dir)
+                return sublog.read(subm)
+            m = self._newmanifest()
+            m.parse(text, readsubtree)
+            m.setnode(node)
+            arraytext = None
+        else:
+            m = self._newmanifest(text)
+            arraytext = array.array('c', text)
         self._mancache[node] = (m, arraytext)
         return m
 
@@ -845,10 +905,34 @@
             # just encode a fulltext of the manifest and pass that
             # through to the revlog layer, and let it handle the delta
             # process.
-            text = m.text(self._usemanifestv2)
-            arraytext = array.array('c', text)
-            n = self.addrevision(text, transaction, link, p1, p2)
+            if self._treeondisk:
+                m1 = self.read(p1)
+                m2 = self.read(p2)
+                n = self._addtree(m, transaction, link, m1, m2)
+                arraytext = None
+            else:
+                text = m.text(self._usemanifestv2)
+                n = self.addrevision(text, transaction, link, p1, p2)
+                arraytext = array.array('c', text)
 
         self._mancache[n] = (m, arraytext)
 
         return n
+
+    def _addtree(self, m, transaction, link, m1, m2):
+        def writesubtree(subm, subp1, subp2):
+            sublog = manifest(self.opener, subm.dir())
+            sublog.add(subm, transaction, link, subp1, subp2, None, None)
+        m.writesubtrees(m1, m2, writesubtree)
+        text = m.dirtext(self._usemanifestv2)
+        # If the manifest is unchanged compared to one parent,
+        # don't write a new revision
+        if text == m1.dirtext(self._usemanifestv2):
+            n = m1.node()
+        elif text == m2.dirtext(self._usemanifestv2):
+            n = m2.node()
+        else:
+            n = self.addrevision(text, transaction, link, m1.node(), m2.node())
+        # Save nodeid so parent manifest can calculate its nodeid
+        m.setnode(n)
+        return n
diff -r f73994891705 -r 71ffbbae4096 tests/test-manifest-tree.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-manifest-tree.t	Mon Apr 13 16:44:40 2015 -0700
@@ -0,0 +1,158 @@
+
+Set up repo
+
+  $ hg init repo
+  $ cd repo
+
+  $ cat >> .hg/hgrc <<EOF
+  > [experimental]
+  > treemanifest = True
+  > EOF
+
+Without directories, looks like any other repo
+
+  $ echo 0 > a
+  $ echo 0 > b
+  $ hg ci -Aqm initial
+  $ hg debugdata -m 0
+  a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+  b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+
+Submanifest is stored in separate revlog
+
+  $ mkdir dir1
+  $ echo 1 > dir1/a
+  $ echo 1 > dir1/b
+  $ echo 1 > e
+  $ hg ci -Aqm 'add dir1'
+  $ hg debugdata -m 1
+  a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+  b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
+  dir1\x008b3ffd73f901e83304c83d33132c8e774ceac44ed (esc)
+  e\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
+  $ hg debugdata .hg/store/metadata/dir1/00manifest.i 0
+  a\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
+  b\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
+
+Can add nested directories
+
+  $ mkdir dir1/dir1
+  $ echo 2 > dir1/dir1/a
+  $ echo 2 > dir1/dir1/b
+  $ mkdir dir1/dir2
+  $ echo 2 > dir1/dir2/a
+  $ echo 2 > dir1/dir2/b
+  $ hg ci -Aqm 'add dir1/dir1'
+  $ hg files -r .
+  a
+  b
+  dir1/a
+  dir1/b
+  dir1/dir1/a
+  dir1/dir1/b
+  dir1/dir2/a
+  dir1/dir2/b
+  e
+
+Revision is not created for unchanged directory
+
+  $ mkdir dir2
+  $ echo 3 > dir2/a
+  $ hg ci -Aqm 'add dir2'
+  $ hg debugdata -m 2 | grep -v dir1/ > before
+  $ hg debugdata -m 3 | grep -v dir2/ > after
+  $ diff before after
+  $ rm before after
+
+Removing directory does not create an revlog entry
+
+  $ hg rm dir1/dir1
+  removing dir1/dir1/a
+  removing dir1/dir1/b
+  $ hg debugindex .hg/store/metadata/dir1/dir1/00manifest.i > before
+  $ hg ci -qm 'remove dir1/dir1'
+  $ hg debugindex .hg/store/metadata/dir1/dir1/00manifest.i > after
+  $ diff before after
+  $ rm before after
+
+Check that hg files (calls treemanifest.walk()) works
+
+  $ hg co 'desc("add dir2")'
+  2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg files -r . dir1
+  dir1/a
+  dir1/b
+  dir1/dir1/a
+  dir1/dir1/b
+  dir1/dir2/a
+  dir1/dir2/b
+
+Check that status between revisions works (calls treemanifest.matches())
+
+  $ hg status --rev 'desc("add dir1")' --rev . dir1
+  A dir1/dir1/a
+  A dir1/dir1/b
+  A dir1/dir2/a
+  A dir1/dir2/b
+
+Merge creates 2-parent revision of directory revlog
+
+  $ echo 5 > dir1/a
+  $ hg ci -Aqm 'modify dir1/a'
+  $ hg co '.^'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo 6 > dir1/b
+  $ hg ci -Aqm 'modify dir1/b'
+  $ hg merge 'desc("modify dir1/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg ci -m 'conflict-free merge involving dir1/'
+  $ cat dir1/a
+  5
+  $ cat dir1/b
+  6
+  $ hg debugindex .hg/store/metadata/dir1/00manifest.i
+     rev    offset  length   base linkrev nodeid       p1           p2
+       0         0      54      0       1 8b3ffd73f901 000000000000 000000000000
+       1        54      68      0       2 b66d046c644f 8b3ffd73f901 000000000000
+       2       122      12      0       4 b87265673c8a b66d046c644f 000000000000
+       3       134      95      0       5 aa5d3adcec72 b66d046c644f 000000000000
+       4       229      81      0       6 e29b066b91ad b66d046c644f 000000000000
+       5       310     107      5       7 a120ce2b83f5 e29b066b91ad aa5d3adcec72
+
+Merge keeping directory from parent 1 does not create revlog entry. (Note that
+dir1's manifest does change, but only because dir1/a's filelog changes.)
+
+  $ hg co 'desc("add dir2")'
+  2 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo 8 > dir2/a
+  $ hg ci -m 'modify dir2/a'
+  created new head
+
+  $ hg debugindex .hg/store/metadata/dir2/00manifest.i > before
+  $ hg merge 'desc("modify dir1/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg revert -r 'desc("modify dir2/a")' .
+  reverting dir1/a (glob)
+  $ hg ci -m 'merge, keeping parent 1'
+  $ hg debugindex .hg/store/metadata/dir2/00manifest.i > after
+  $ diff before after
+  $ rm before after
+
+Merge keeping directory from parent 2 does not create revlog entry. (Note that
+dir2's manifest does change, but only because dir2/a's filelog changes.)
+
+  $ hg co 'desc("modify dir2/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ hg debugindex .hg/store/metadata/dir1/00manifest.i > before
+  $ hg merge 'desc("modify dir1/a")'
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  (branch merge, don't forget to commit)
+  $ hg revert -r 'desc("modify dir1/a")' .
+  reverting dir2/a (glob)
+  $ hg ci -m 'merge, keeping parent 2'
+  created new head
+  $ hg debugindex .hg/store/metadata/dir1/00manifest.i > after
+  $ diff before after
+  $ rm before after


More information about the Mercurial-devel mailing list