D4745: revlog: move revision verification out of verify

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Sep 26 15:37:36 EDT 2018


indygreg updated this revision to Diff 11417.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4745?vs=11400&id=11417

REVISION DETAIL
  https://phab.mercurial-scm.org/D4745

AFFECTED FILES
  mercurial/filelog.py
  mercurial/repository.py
  mercurial/revlog.py
  mercurial/verify.py

CHANGE DETAILS

diff --git a/mercurial/verify.py b/mercurial/verify.py
--- a/mercurial/verify.py
+++ b/mercurial/verify.py
@@ -343,7 +343,10 @@
 
         state = {
             # TODO this assumes revlog storage for changelog.
-            'expectedversion': self.repo.changelog.version & 0xFFFF
+            'expectedversion': self.repo.changelog.version & 0xFFFF,
+            'skipflags': self.skipflags,
+            # experimental config: censor.policy
+            'erroroncensored': ui.config('censor', 'policy') == 'abort',
         }
 
         files = sorted(set(filenodes) | set(filelinkrevs))
@@ -381,18 +384,25 @@
             if not len(fl) and (self.havecl or self.havemf):
                 self.err(lr, _("empty or missing %s") % f)
             else:
+                # Guard against implementations not setting this.
+                state['skipread'] = set()
                 for problem in fl.verifyintegrity(state):
+                    if problem.node is not None:
+                        linkrev = fl.linkrev(fl.rev(problem.node))
+                    else:
+                        linkrev = None
+
                     if problem.warning:
                         self.warn(problem.warning)
                     elif problem.error:
-                        self.err(lr, problem.error, f)
+                        self.err(linkrev if linkrev is not None else lr,
+                                 problem.error, f)
                     else:
                         raise error.ProgrammingError(
                             'problem instance does not set warning or error '
                             'attribute: %s' % problem.msg)
 
             seen = {}
-            rp = None
             for i in fl:
                 revisions += 1
                 n = fl.node(i)
@@ -403,75 +413,15 @@
                     else:
                         del filenodes[f][n]
 
-                # Verify contents. 4 cases to care about:
-                #
-                #   common: the most common case
-                #   rename: with a rename
-                #   meta: file content starts with b'\1\n', the metadata
-                #         header defined in filelog.py, but without a rename
-                #   ext: content stored externally
-                #
-                # More formally, their differences are shown below:
-                #
-                #                       | common | rename | meta  | ext
-                #  -------------------------------------------------------
-                #   flags()             | 0      | 0      | 0     | not 0
-                #   renamed()           | False  | True   | False | ?
-                #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
-                #
-                # "rawtext" means the raw text stored in revlog data, which
-                # could be retrieved by "revision(rev, raw=True)". "text"
-                # mentioned below is "revision(rev, raw=False)".
-                #
-                # There are 3 different lengths stored physically:
-                #  1. L1: rawsize, stored in revlog index
-                #  2. L2: len(rawtext), stored in revlog data
-                #  3. L3: len(text), stored in revlog data if flags==0, or
-                #     possibly somewhere else if flags!=0
-                #
-                # L1 should be equal to L2. L3 could be different from them.
-                # "text" may or may not affect commit hash depending on flag
-                # processors (see revlog.addflagprocessor).
-                #
-                #              | common  | rename | meta  | ext
-                # -------------------------------------------------
-                #    rawsize() | L1      | L1     | L1    | L1
-                #       size() | L1      | L2-LM  | L1(*) | L1 (?)
-                # len(rawtext) | L2      | L2     | L2    | L2
-                #    len(text) | L2      | L2     | L2    | L3
-                #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
-                #
-                # LM:  length of metadata, depending on rawtext
-                # (*): not ideal, see comment in filelog.size
-                # (?): could be "- len(meta)" if the resolved content has
-                #      rename metadata
-                #
-                # Checks needed to be done:
-                #  1. length check: L1 == L2, in all cases.
-                #  2. hash check: depending on flag processor, we may need to
-                #     use either "text" (external), or "rawtext" (in revlog).
-                try:
-                    skipflags = self.skipflags
-                    if skipflags:
-                        skipflags &= fl.flags(i)
-                    if not skipflags:
-                        fl.read(n) # side effect: read content and do checkhash
-                        rp = fl.renamed(n)
-                    # the "L1 == L2" check
-                    l1 = fl.rawsize(i)
-                    l2 = len(fl.revision(n, raw=True))
-                    if l1 != l2:
-                        self.err(lr, _("unpacked size is %s, %s expected") %
-                                 (l2, l1), f)
-                except error.CensoredNodeError:
-                    # experimental config: censor.policy
-                    if ui.config("censor", "policy") == "abort":
-                        self.err(lr, _("censored file data"), f)
-                except Exception as inst:
-                    self.exc(lr, _("unpacking %s") % short(n), inst, f)
+                if n in state['skipread']:
+                    continue
 
                 # check renames
                 try:
+                    # This requires resolving fulltext (at least on revlogs). We
+                    # may want ``verifyintegrity()`` to pass a set of nodes with
+                    # rename metadata as an optimization.
+                    rp = fl.renamed(n)
                     if rp:
                         if lr is not None and ui.verbose:
                             ctx = lrugetctx(lr)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -29,6 +29,7 @@
     nullhex,
     nullid,
     nullrev,
+    short,
     wdirfilenodeids,
     wdirhex,
     wdirid,
@@ -260,6 +261,7 @@
 class revlogproblem(object):
     warning = attr.ib(default=None)
     error = attr.ib(default=None)
+    node = attr.ib(default=None)
 
 # index v0:
 #  4 bytes: offset
@@ -2644,6 +2646,89 @@
                 warning=_("warning: '%s' uses revlog format %d; expected %d") %
                         (self.indexfile, version, state['expectedversion']))
 
+        state['skipread'] = set()
+
+        for rev in self:
+            node = self.node(rev)
+
+            # Verify contents. 4 cases to care about:
+            #
+            #   common: the most common case
+            #   rename: with a rename
+            #   meta: file content starts with b'\1\n', the metadata
+            #         header defined in filelog.py, but without a rename
+            #   ext: content stored externally
+            #
+            # More formally, their differences are shown below:
+            #
+            #                       | common | rename | meta  | ext
+            #  -------------------------------------------------------
+            #   flags()             | 0      | 0      | 0     | not 0
+            #   renamed()           | False  | True   | False | ?
+            #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
+            #
+            # "rawtext" means the raw text stored in revlog data, which
+            # could be retrieved by "revision(rev, raw=True)". "text"
+            # mentioned below is "revision(rev, raw=False)".
+            #
+            # There are 3 different lengths stored physically:
+            #  1. L1: rawsize, stored in revlog index
+            #  2. L2: len(rawtext), stored in revlog data
+            #  3. L3: len(text), stored in revlog data if flags==0, or
+            #     possibly somewhere else if flags!=0
+            #
+            # L1 should be equal to L2. L3 could be different from them.
+            # "text" may or may not affect commit hash depending on flag
+            # processors (see revlog.addflagprocessor).
+            #
+            #              | common  | rename | meta  | ext
+            # -------------------------------------------------
+            #    rawsize() | L1      | L1     | L1    | L1
+            #       size() | L1      | L2-LM  | L1(*) | L1 (?)
+            # len(rawtext) | L2      | L2     | L2    | L2
+            #    len(text) | L2      | L2     | L2    | L3
+            #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
+            #
+            # LM:  length of metadata, depending on rawtext
+            # (*): not ideal, see comment in filelog.size
+            # (?): could be "- len(meta)" if the resolved content has
+            #      rename metadata
+            #
+            # Checks needed to be done:
+            #  1. length check: L1 == L2, in all cases.
+            #  2. hash check: depending on flag processor, we may need to
+            #     use either "text" (external), or "rawtext" (in revlog).
+
+            try:
+                skipflags = state.get('skipflags', 0)
+                if skipflags:
+                    skipflags &= self.flags(rev)
+
+                if skipflags:
+                    state['skipread'].add(node)
+                else:
+                    # Side-effect: read content and verify hash.
+                    self.revision(node)
+
+                l1 = self.rawsize(rev)
+                l2 = len(self.revision(node, raw=True))
+
+                if l1 != l2:
+                    yield revlogproblem(
+                        error=_('unpacked size is %d, %d expected') % (l2, l1),
+                        node=node)
+
+            except error.CensoredNodeError:
+                if state['erroroncensored']:
+                    yield revlogproblem(error=_('censored file data'),
+                                        node=node)
+                    state['skipread'].add(node)
+            except Exception as e:
+                yield revlogproblem(
+                    error=_('unpacking %s: %s') % (short(node), e),
+                    node=node)
+                state['skipread'].add(node)
+
     def storageinfo(self, exclusivefiles=False, sharedfiles=False,
                     revisionscount=False, trackedsize=False,
                     storedsize=False):
diff --git a/mercurial/repository.py b/mercurial/repository.py
--- a/mercurial/repository.py
+++ b/mercurial/repository.py
@@ -341,6 +341,12 @@
     error = interfaceutil.Attribute(
         """Message indicating a fatal problem.""")
 
+    node = interfaceutil.Attribute(
+        """Revision encountering the problem.
+
+        ``None`` means the problem doesn't apply to a single revision.
+        """)
+
 class irevisiondelta(interfaceutil.Interface):
     """Represents a delta between one revision and another.
 
@@ -790,6 +796,10 @@
         used to communicate data between invocations of multiple storage
         primitives.
 
+        If individual revisions cannot have their revision content resolved,
+        the method is expected to set the ``skipread`` key to a set of nodes
+        that encountered problems.
+
         The method yields objects conforming to the ``iverifyproblem``
         interface.
         """
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -53,7 +53,7 @@
     def linkrev(self, rev):
         return self._revlog.linkrev(rev)
 
-    # Used by verify.
+    # Unused.
     def flags(self, rev):
         return self._revlog.flags(rev)
 
@@ -77,7 +77,7 @@
     def iscensored(self, rev):
         return self._revlog.iscensored(rev)
 
-    # Used by repo verify.
+    # Unused.
     def rawsize(self, rev):
         return self._revlog.rawsize(rev)
 



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list