[PATCH 3 of 2] treemanifest: store sub-manifests revlog per directory
Martin von Zweigbergk
martinvonz at google.com
Wed Apr 15 15:45:42 CDT 2015
On Wed, Apr 15, 2015 at 1:37 PM Drew Gottlieb <drgott at google.com> wrote:
> What if we different flags for using in-memory tree manifests and actually
> using tree revlogs?
>
I don't think there's any reason for the user to want treemanifests in
memory if the on-disk format is flat, so there's no separate config.
>
> On Wed, Apr 15, 2015 at 4:35 PM Martin von Zweigbergk <
> martinvonz at google.com> wrote:
>
>> # HG changeset patch
>> # User Martin von Zweigbergk <martinvonz at google.com>
>> # Date 1428968680 25200
>> # Mon Apr 13 16:44:40 2015 -0700
>> # Node ID 71ffbbae4096ca945a3106fd9a2297137ddad168
>> # Parent f739948917053db6732e30c62a1089390f4ba06f
>> treemanifest: store sub-manifests revlog per directory
>>
>> With this change, when experiment.treemanifest=True, commits will be
>> written with one manifest revlog per directory. The manifest revlogs
>> are stored in .hg/store/metadata/$dir/00manifest.[id].
>>
>> Flat manifests can still be read and interacted with as usual (they
>> are also read into treemanifest instances). The functionality for
>> writing treemanifest as trees to disk is in addition to the current
>> functionality of writing treemanifest as a flat manifest to disk;
>> tests still pass with '_treeinmem=True' hardcoded.
>>
>> Important functionality that is not yet done:
>>
>> * .hg/requires is not updated, so running e.g. 'hg diff --change .'
>> will crash if '.' is stored as a tree
>>
>> * Exchange is not implemented. It will probably involve a
>> changegroupv3 that allows multiple manifest revlogs.
>>
>> diff -r f73994891705 -r 71ffbbae4096 mercurial/manifest.py
>> --- a/mercurial/manifest.py Sun Apr 12 23:01:18 2015 -0700
>> +++ b/mercurial/manifest.py Mon Apr 13 16:44:40 2015 -0700
>> @@ -444,11 +444,15 @@
>> class treemanifest(object):
>> def __init__(self, dir='', text=''):
>> self._dir = dir
>> + self._node = revlog.nullid
>> self._dirs = {}
>> # Using _lazymanifest here is a little slower than plain old
>> dicts
>> self._files = {}
>> self._flags = {}
>> - self.parse(text)
>> + def readsubtree(subdir, subm):
>> + raise AssertionError('treemanifest constructor only accepts '
>> + 'flat manifests')
>> + self.parse(text, readsubtree)
>>
>> def _subpath(self, path):
>> return self._dir + path
>> @@ -464,7 +468,22 @@
>> util.all(m._isempty() for m in self._dirs.values())))
>>
>> def __str__(self):
>> - return '<treemanifest dir=%s>' % self._dir
>> + return ('<treemanifest dir=%s, node=%s>' %
>> + (self._dir, revlog.hex(self._node)))
>> +
>> + def dir(self):
>> + '''The directory that this tree manifest represents, including a
>> + trailing '/'. Empty string for the repo root directory.'''
>> + return self._dir
>> +
>> + def node(self):
>> + '''This node of this instance. nullid for unsaved instances.
>> Should
>> + be updated when the instance is read or written from a revlog.
>> + '''
>> + return self._node
>> +
>> + def setnode(self, node):
>> + self._node = node
>>
>> def iteritems(self):
>> for p, n in sorted(self._dirs.items() + self._files.items()):
>> @@ -557,6 +576,7 @@
>>
>> def setflag(self, f, flags):
>> """Set the flags (symlink, executable) for path f."""
>> + assert 'd' not in flags
>> dir, subpath = _splittopdir(f)
>> if dir:
>> if dir not in self._dirs:
>> @@ -567,6 +587,7 @@
>>
>> def copy(self):
>> copy = treemanifest(self._dir)
>> + copy._node = self._node
>> for d in self._dirs:
>> copy._dirs[d] = self._dirs[d].copy()
>> copy._files = dict.copy(self._files)
>> @@ -737,11 +758,18 @@
>> _diff(self, m2)
>> return result
>>
>> - def parse(self, text):
>> + def parse(self, text, readsubtree):
>> for f, n, fl in _parse(text):
>> - self[f] = n
>> - if fl:
>> - self.setflag(f, fl)
>> + if fl == 'd':
>> + f = f + '/'
>> + self._dirs[f] = readsubtree(self._subpath(f), n)
>> + else:
>> + # Use __setitem__ and setflag rather than assigning
>> directly
>> + # to _files and _flags, thereby letting us parse flat
>> manifests
>> + # as well as tree manifests.
>> + self[f] = n
>> + if fl:
>> + self.setflag(f, fl)
>>
>> def text(self, usemanifestv2=False):
>> """Get the full data of this manifest as a bytestring."""
>> @@ -749,8 +777,26 @@
>> return _text(((f, self[f], flags(f)) for f in self.keys()),
>> usemanifestv2)
>>
>> + def dirtext(self, usemanifestv2=False):
>> + """Get the full data of this directory as a bytestring. Make
>> sure that
>> + any submanifests have been written first, so their nodeids are
>> correct.
>> + """
>> + flags = self.flags
>> + dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
>> + files = [(f, self._files[f], flags(f)) for f in self._files]
>> + return _text(sorted(dirs + files), usemanifestv2)
>> +
>> + def writesubtrees(self, m1, m2, writesubtree):
>> + emptytree = treemanifest()
>> + for d, subm in self._dirs.iteritems():
>> + subp1 = m1._dirs.get(d, emptytree)._node
>> + subp2 = m2._dirs.get(d, emptytree)._node
>> + if subp1 == revlog.nullid:
>> + subp1, subp2 = subp2, subp1
>> + writesubtree(subm, subp1, subp2)
>> +
>> class manifest(revlog.revlog):
>> - def __init__(self, opener):
>> + def __init__(self, opener, dir=''):
>> # During normal operations, we expect to deal with not more than
>> four
>> # revs at a time (such as during commit --amend). When rebasing
>> large
>> # stacks of commits, the number can go up, hence the config knob
>> below.
>> @@ -763,14 +809,19 @@
>> usetreemanifest = opts.get('usetreemanifest',
>> usetreemanifest)
>> usemanifestv2 = opts.get('manifestv2', usemanifestv2)
>> self._mancache = util.lrucachedict(cachesize)
>> - revlog.revlog.__init__(self, opener, "00manifest.i")
>> self._treeinmem = usetreemanifest
>> self._treeondisk = usetreemanifest
>> self._usemanifestv2 = usemanifestv2
>> + indexfile = "00manifest.i"
>> + if dir:
>> + assert self._treeondisk
>> + indexfile = "metadata/" + dir + "00manifest.i"
>> + revlog.revlog.__init__(self, opener, indexfile)
>> + self._dir = dir
>>
>> def _newmanifest(self, data=''):
>> if self._treeinmem:
>> - return treemanifest('', data)
>> + return treemanifest(self._dir, data)
>> return manifestdict(data)
>>
>> def _slowreaddelta(self, node):
>> @@ -806,8 +857,17 @@
>> if node in self._mancache:
>> return self._mancache[node][0]
>> text = self.revision(node)
>> - arraytext = array.array('c', text)
>> - m = self._newmanifest(text)
>> + if self._treeondisk:
>> + def readsubtree(dir, subm):
>> + sublog = manifest(self.opener, dir)
>> + return sublog.read(subm)
>> + m = self._newmanifest()
>> + m.parse(text, readsubtree)
>> + m.setnode(node)
>> + arraytext = None
>> + else:
>> + m = self._newmanifest(text)
>> + arraytext = array.array('c', text)
>> self._mancache[node] = (m, arraytext)
>> return m
>>
>> @@ -845,10 +905,34 @@
>> # just encode a fulltext of the manifest and pass that
>> # through to the revlog layer, and let it handle the delta
>> # process.
>> - text = m.text(self._usemanifestv2)
>> - arraytext = array.array('c', text)
>> - n = self.addrevision(text, transaction, link, p1, p2)
>> + if self._treeondisk:
>> + m1 = self.read(p1)
>> + m2 = self.read(p2)
>> + n = self._addtree(m, transaction, link, m1, m2)
>> + arraytext = None
>> + else:
>> + text = m.text(self._usemanifestv2)
>> + n = self.addrevision(text, transaction, link, p1, p2)
>> + arraytext = array.array('c', text)
>>
>> self._mancache[n] = (m, arraytext)
>>
>> return n
>> +
>> + def _addtree(self, m, transaction, link, m1, m2):
>> + def writesubtree(subm, subp1, subp2):
>> + sublog = manifest(self.opener, subm.dir())
>> + sublog.add(subm, transaction, link, subp1, subp2, None, None)
>> + m.writesubtrees(m1, m2, writesubtree)
>> + text = m.dirtext(self._usemanifestv2)
>> + # If the manifest is unchanged compared to one parent,
>> + # don't write a new revision
>> + if text == m1.dirtext(self._usemanifestv2):
>> + n = m1.node()
>> + elif text == m2.dirtext(self._usemanifestv2):
>> + n = m2.node()
>> + else:
>> + n = self.addrevision(text, transaction, link, m1.node(),
>> m2.node())
>> + # Save nodeid so parent manifest can calculate its nodeid
>> + m.setnode(n)
>> + return n
>> diff -r f73994891705 -r 71ffbbae4096 tests/test-manifest-tree.t
>> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
>> +++ b/tests/test-manifest-tree.t Mon Apr 13 16:44:40 2015 -0700
>> @@ -0,0 +1,158 @@
>> +
>> +Set up repo
>> +
>> + $ hg init repo
>> + $ cd repo
>> +
>> + $ cat >> .hg/hgrc <<EOF
>> + > [experimental]
>> + > treemanifest = True
>> + > EOF
>> +
>> +Without directories, looks like any other repo
>> +
>> + $ echo 0 > a
>> + $ echo 0 > b
>> + $ hg ci -Aqm initial
>> + $ hg debugdata -m 0
>> + a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
>> + b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
>> +
>> +Submanifest is stored in separate revlog
>> +
>> + $ mkdir dir1
>> + $ echo 1 > dir1/a
>> + $ echo 1 > dir1/b
>> + $ echo 1 > e
>> + $ hg ci -Aqm 'add dir1'
>> + $ hg debugdata -m 1
>> + a\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
>> + b\x00362fef284ce2ca02aecc8de6d5e8a1c3af0556fe (esc)
>> + dir1\x008b3ffd73f901e83304c83d33132c8e774ceac44ed (esc)
>> + e\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
>> + $ hg debugdata .hg/store/metadata/dir1/00manifest.i 0
>> + a\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
>> + b\x00b8e02f6433738021a065f94175c7cd23db5f05be (esc)
>> +
>> +Can add nested directories
>> +
>> + $ mkdir dir1/dir1
>> + $ echo 2 > dir1/dir1/a
>> + $ echo 2 > dir1/dir1/b
>> + $ mkdir dir1/dir2
>> + $ echo 2 > dir1/dir2/a
>> + $ echo 2 > dir1/dir2/b
>> + $ hg ci -Aqm 'add dir1/dir1'
>> + $ hg files -r .
>> + a
>> + b
>> + dir1/a
>> + dir1/b
>> + dir1/dir1/a
>> + dir1/dir1/b
>> + dir1/dir2/a
>> + dir1/dir2/b
>> + e
>> +
>> +Revision is not created for unchanged directory
>> +
>> + $ mkdir dir2
>> + $ echo 3 > dir2/a
>> + $ hg ci -Aqm 'add dir2'
>> + $ hg debugdata -m 2 | grep -v dir1/ > before
>> + $ hg debugdata -m 3 | grep -v dir2/ > after
>> + $ diff before after
>> + $ rm before after
>> +
>> +Removing directory does not create an revlog entry
>> +
>> + $ hg rm dir1/dir1
>> + removing dir1/dir1/a
>> + removing dir1/dir1/b
>> + $ hg debugindex .hg/store/metadata/dir1/dir1/00manifest.i > before
>> + $ hg ci -qm 'remove dir1/dir1'
>> + $ hg debugindex .hg/store/metadata/dir1/dir1/00manifest.i > after
>> + $ diff before after
>> + $ rm before after
>> +
>> +Check that hg files (calls treemanifest.walk()) works
>> +
>> + $ hg co 'desc("add dir2")'
>> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + $ hg files -r . dir1
>> + dir1/a
>> + dir1/b
>> + dir1/dir1/a
>> + dir1/dir1/b
>> + dir1/dir2/a
>> + dir1/dir2/b
>> +
>> +Check that status between revisions works (calls treemanifest.matches())
>> +
>> + $ hg status --rev 'desc("add dir1")' --rev . dir1
>> + A dir1/dir1/a
>> + A dir1/dir1/b
>> + A dir1/dir2/a
>> + A dir1/dir2/b
>> +
>> +Merge creates 2-parent revision of directory revlog
>> +
>> + $ echo 5 > dir1/a
>> + $ hg ci -Aqm 'modify dir1/a'
>> + $ hg co '.^'
>> + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + $ echo 6 > dir1/b
>> + $ hg ci -Aqm 'modify dir1/b'
>> + $ hg merge 'desc("modify dir1/a")'
>> + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + (branch merge, don't forget to commit)
>> + $ hg ci -m 'conflict-free merge involving dir1/'
>> + $ cat dir1/a
>> + 5
>> + $ cat dir1/b
>> + 6
>> + $ hg debugindex .hg/store/metadata/dir1/00manifest.i
>> + rev offset length base linkrev nodeid p1 p2
>> + 0 0 54 0 1 8b3ffd73f901 000000000000
>> 000000000000
>> + 1 54 68 0 2 b66d046c644f 8b3ffd73f901
>> 000000000000
>> + 2 122 12 0 4 b87265673c8a b66d046c644f
>> 000000000000
>> + 3 134 95 0 5 aa5d3adcec72 b66d046c644f
>> 000000000000
>> + 4 229 81 0 6 e29b066b91ad b66d046c644f
>> 000000000000
>> + 5 310 107 5 7 a120ce2b83f5 e29b066b91ad
>> aa5d3adcec72
>> +
>> +Merge keeping directory from parent 1 does not create revlog entry.
>> (Note that
>> +dir1's manifest does change, but only because dir1/a's filelog changes.)
>> +
>> + $ hg co 'desc("add dir2")'
>> + 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + $ echo 8 > dir2/a
>> + $ hg ci -m 'modify dir2/a'
>> + created new head
>> +
>> + $ hg debugindex .hg/store/metadata/dir2/00manifest.i > before
>> + $ hg merge 'desc("modify dir1/a")'
>> + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + (branch merge, don't forget to commit)
>> + $ hg revert -r 'desc("modify dir2/a")' .
>> + reverting dir1/a (glob)
>> + $ hg ci -m 'merge, keeping parent 1'
>> + $ hg debugindex .hg/store/metadata/dir2/00manifest.i > after
>> + $ diff before after
>> + $ rm before after
>> +
>> +Merge keeping directory from parent 2 does not create revlog entry.
>> (Note that
>> +dir2's manifest does change, but only because dir2/a's filelog changes.)
>> +
>> + $ hg co 'desc("modify dir2/a")'
>> + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + $ hg debugindex .hg/store/metadata/dir1/00manifest.i > before
>> + $ hg merge 'desc("modify dir1/a")'
>> + 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
>> + (branch merge, don't forget to commit)
>> + $ hg revert -r 'desc("modify dir1/a")' .
>> + reverting dir2/a (glob)
>> + $ hg ci -m 'merge, keeping parent 2'
>> + created new head
>> + $ hg debugindex .hg/store/metadata/dir1/00manifest.i > after
>> + $ diff before after
>> + $ rm before after
>>
> _______________________________________________
>> Mercurial-devel mailing list
>> Mercurial-devel at selenic.com
>> http://selenic.com/mailman/listinfo/mercurial-devel
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20150415/8ca6147c/attachment.html>
More information about the Mercurial-devel
mailing list