[PATCH 1 of 5 v2] verify: check directory manifests

Martin von Zweigbergk martinvonz at google.com
Mon Feb 22 07:12:07 UTC 2016


# HG changeset patch
# User Martin von Zweigbergk <martinvonz at google.com>
# Date 1454908404 28800
#      Sun Feb 07 21:13:24 2016 -0800
# Node ID ec503337bacbd39a4b23a91a4d8b074e3f8a9c6d
# Parent  3db70405779e6da52bd6fd45ced2e3cc669fce4d
# Available At http://42.netv6.net/martinvonz-wip/mercurial/
#              hg pull http://42.netv6.net/martinvonz-wip/mercurial/
-r ec503337bacb
verify: check directory manifests

In repos with treemanifests, there is no specific verification of
directory manifest revlogs. It simply collects all file nodes by
reading each manifest delta. With treemanifests, that's means calling
the manifest._slowreaddelta(). If there are missing revlog entries in
a subdirectory revlog, 'hg verify' will simply report the exception
that occurred while trying to read the root manifest:


  manifest at 0: reading delta 1700e2e92882:
meta/b/00manifest.i at 67688a370455: no node

This patch changes the verify code to load only the root manifest at
first and verify all revisions of it, then verify all revisions of
each direct subdirectory, and so on, recursively. The above message
becomes

  b/@0: parent-directory manifest refers to unknown revision 67688a370455

Since the new algorithm reads a single revlog at a time and in order,
'hg verify' on a treemanifest version of the hg core repo goes from
~50s to ~14s. As expected, there is no significant difference on a
repo with flat manifests.

diff -r 3db70405779e -r ec503337bacb mercurial/manifest.py
--- a/mercurial/manifest.py     Fri Feb 12 18:39:48 2016 +0900
+++ b/mercurial/manifest.py     Sun Feb 07 21:13:24 2016 -0800
@@ -325,6 +325,9 @@
     def iteritems(self):
         return (x[:2] for x in self._lm.iterentries())

+    def iterentries(self):
+        return self._lm.iterentries()
+
     def text(self, usemanifestv2=False):
         if usemanifestv2:
             return _textv2(self._lm.iterentries())
@@ -920,7 +923,8 @@
         return manifestdict(data)

     def dirlog(self, dir):
-        assert self._treeondisk
+        if dir:
+            assert self._treeondisk
         if dir not in self._dirlogcache:
             self._dirlogcache[dir] = manifest(self.opener, dir,
                                               self._dirlogcache)
@@ -945,6 +949,22 @@
         d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
         return self._newmanifest(d)

+    def readshallowdelta(self, node):
+        '''For flat manifests, this is the same as readdelta(). For
+        treemanifests, this will read the delta for this revlog's directory,
+        without recursively reading subdirectory manifests. Instead, any
+        subdirectory entry will be reported as it appears in the
manifests, i.e.
+        the subdirectory will be reported among files and distinguished only by
+        its 't' flag.'''
+        if not self._treeondisk:
+            return self.readdelta(node)
+        if self._usemanifestv2:
+            raise error.Abort(
+                "readshallowdelta() not implemented for manifestv2")
+        r = self.rev(node)
+        d = mdiff.patchtext(self.revdiff(self.deltaparent(r), r))
+        return manifestdict(d)
+
     def readfast(self, node):
         '''use the faster of readdelta or read

diff -r 3db70405779e -r ec503337bacb mercurial/verify.py
--- a/mercurial/verify.py       Fri Feb 12 18:39:48 2016 +0900
+++ b/mercurial/verify.py       Sun Feb 07 21:13:24 2016 -0800
@@ -197,46 +197,73 @@
         ui.progress(_('checking'), None)
         return mflinkrevs, filelinkrevs

-    def _verifymanifest(self, mflinkrevs):
+    def _verifymanifest(self, mflinkrevs, dir=""):
         repo = self.repo
         ui = self.ui
-        mf = self.repo.manifest
+        mf = self.repo.manifest.dirlog(dir)

-        ui.status(_("checking manifests\n"))
+        if not dir:
+            self.ui.status(_("checking manifests\n"))
+
         filenodes = {}
+        subdirnodes = {}
         seen = {}
         label = "manifest"
+        if dir:
+            label = dir
         if self.refersmf:
             # Do not check manifest if there are only changelog entries with
             # null manifests.
             self.checklog(mf, label, 0)
         total = len(mf)
         for i in mf:
-            ui.progress(_('checking'), i, total=total, unit=_('manifests'))
+            if not dir:
+                ui.progress(_('checking'), i, total=total, unit=_('manifests'))
             n = mf.node(i)
             lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
             if n in mflinkrevs:
                 del mflinkrevs[n]
+            elif dir:
+                self.err(lr, _("%s not in parent-directory manifest") %
+                         short(n), label)
             else:
                 self.err(lr, _("%s not in changesets") % short(n), label)

             try:
-                for f, fn in mf.readdelta(n).iteritems():
+                for f, fn, fl in mf.readshallowdelta(n).iterentries():
                     if not f:
-                        self.err(lr, _("file without name in manifest"))
-                    elif f != "/dev/null": # ignore this in very old repos
-                        if _validpath(repo, f):
-                            filenodes.setdefault(
-                                _normpath(f), {}).setdefault(fn, lr)
+                        self.err(lr, _("entry without name in manifest"))
+                    elif f == "/dev/null":  # ignore this in very old repos
+                        continue
+                    fullpath = dir + _normpath(f)
+                    if not _validpath(repo, fullpath):
+                        continue
+                    if fl == 't':
+                        subdirnodes.setdefault(fullpath + '/', {}).setdefault(
+                            fn, []).append(lr)
+                    else:
+                        filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
             except Exception as inst:
                 self.exc(lr, _("reading delta %s") % short(n), inst, label)
-        ui.progress(_('checking'), None)
+        if not dir:
+            ui.progress(_('checking'), None)

         if self.havemf:
             for c, m in sorted([(c, m) for m in mflinkrevs
                         for c in mflinkrevs[m]]):
-                self.err(c, _("changeset refers to unknown revision %s") %
-                         short(m), label)
+                if dir:
+                    self.err(c, _("parent-directory manifest refers
to unknown "
+                                  "revision %s") % short(m), label)
+                else:
+                    self.err(c, _("changeset refers to unknown revision %s") %
+                             short(m), label)
+
+        if not dir and subdirnodes:
+            self.ui.status(_("checking directory manifests\n"))
+        for subdir, linkrevs in subdirnodes.iteritems():
+            subdirfilenodes = self._verifymanifest(linkrevs, subdir)
+            for f, onefilenodes in subdirfilenodes.iteritems():
+                filenodes.setdefault(f, {}).update(onefilenodes)

         return filenodes

diff -r 3db70405779e -r ec503337bacb tests/test-treemanifest.t
--- a/tests/test-treemanifest.t Fri Feb 12 18:39:48 2016 +0900
+++ b/tests/test-treemanifest.t Sun Feb 07 21:13:24 2016 -0800
@@ -471,6 +471,7 @@
   $ hg verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -503,6 +504,56 @@
 Finish first server
   $ killdaemons.py

+Back up the recently added revlogs
+  $ cp -r .hg/store .hg/store-newcopy
+
+Verify reports missing dirlog
+  $ rm .hg/store/meta/b/00manifest.*
+  $ hg verify
+  checking changesets
+  checking manifests
+  checking directory manifests
+   0: empty or missing b/
+   b/@0: parent-directory manifest refers to unknown revision 67688a370455
+   b/@1: parent-directory manifest refers to unknown revision f38e85d334c5
+   b/@2: parent-directory manifest refers to unknown revision 99c9792fd4b0
+  crosschecking files in changesets and manifests
+   b/bar/fruits.txt at 0: in changeset but not in manifest
+   b/bar/orange/fly/gnat.py at 0: in changeset but not in manifest
+   b/bar/orange/fly/housefly.txt at 0: in changeset but not in manifest
+   b/foo/apple/bees/flower.py at 0: in changeset but not in manifest
+  checking files
+  8 files, 3 changesets, 10 total revisions
+  8 integrity errors encountered!
+  (first damaged changeset appears to be 0)
+  [1]
+  $ cp -rT .hg/store-newcopy .hg/store
+
+Verify reports missing dirlog entry
+  $ mv -f .hg/store-copy/meta/b/00manifest.* .hg/store/meta/b/
+  $ hg verify
+  checking changesets
+  checking manifests
+  checking directory manifests
+   b/@1: parent-directory manifest refers to unknown revision f38e85d334c5
+   b/@2: parent-directory manifest refers to unknown revision 99c9792fd4b0
+   b/bar/@?: rev 1 points to unexpected changeset 1
+   b/bar/@?: 5e03c4ee5e4a not in parent-directory manifest
+   b/bar/@?: rev 2 points to unexpected changeset 2
+   b/bar/@?: 1b16940d66d6 not in parent-directory manifest
+   b/bar/orange/@?: rev 1 points to unexpected changeset 2
+   (expected None)
+   b/bar/orange/fly/@?: rev 1 points to unexpected changeset 2
+   (expected None)
+  crosschecking files in changesets and manifests
+  checking files
+  8 files, 3 changesets, 10 total revisions
+  2 warnings encountered!
+  8 integrity errors encountered!
+  (first damaged changeset appears to be 1)
+  [1]
+  $ cp -rT .hg/store-newcopy .hg/store
+
 Test cloning a treemanifest repo over http.
   $ hg serve -p $HGPORT -d --pid-file=hg.pid --errorlog=errors.log
   $ cat hg.pid >> $DAEMON_PIDS
@@ -547,6 +598,7 @@
   $ hg verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -591,6 +643,7 @@
   $ hg -R local-clone-basicstore verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -600,6 +653,7 @@
   $ hg -R local-clone-encodedstore verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -609,6 +663,7 @@
   $ hg -R local-clone-fncachestore verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -624,6 +679,7 @@
   $ hg -R stream-clone-basicstore verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -639,6 +695,7 @@
   $ hg -R stream-clone-encodedstore verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions
@@ -654,6 +711,7 @@
   $ hg -R stream-clone-fncachestore verify
   checking changesets
   checking manifests
+  checking directory manifests
   crosschecking files in changesets and manifests
   checking files
   8 files, 3 changesets, 10 total revisions


More information about the Mercurial-devel mailing list