D4701: verify: start to abstract file verification

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Mon Sep 24 16:12:47 UTC 2018


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Currently, the file storage interface has a handful of attributes
  that are exclusively or near-exclusively used by repo verification
  code. In order to support verification on non-revlog/alternate
  storage backends, we'll need to abstract verification so it can
  be performed in a storage-agnostic way.
  
  This commit starts that process.
  
  We establish a new verifyintegrity() method on revlogs and expose
  it to the file storage interface. Most of verify.verifier.checklog()
  has been ported to this new method.
  
  We need a way to represent verification problems. So we invent an
  interface to represent a verification problem, invent a revlog type
  to implement that interface, and use it.
  
  The arguments to verifyintegrity() will almost certainly change in
  the future, once more functionality is ported from the verify code.
  And the "revlogv1" version check is very hacky. (The code in verify
  is actually buggy because it is comparing the full 32-bit header
  integer instead of just the revlog version short. I'll likely fix
  this in a subsequent commit.)

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4701

AFFECTED FILES
  mercurial/filelog.py
  mercurial/repository.py
  mercurial/revlog.py
  mercurial/verify.py
  tests/test-check-interfaces.py

CHANGE DETAILS

diff --git a/tests/test-check-interfaces.py b/tests/test-check-interfaces.py
--- a/tests/test-check-interfaces.py
+++ b/tests/test-check-interfaces.py
@@ -229,4 +229,8 @@
         basenode=b'')
     checkzobject(rdr)
 
+    ziverify.verifyClass(repository.iverifyproblem,
+                         revlog.revlogproblem)
+    checkzobject(revlog.revlogproblem())
+
 main()
diff --git a/mercurial/verify.py b/mercurial/verify.py
--- a/mercurial/verify.py
+++ b/mercurial/verify.py
@@ -341,6 +341,10 @@
             elif (size > 0 or not revlogv1) and f.startswith('data/'):
                 storefiles.add(_normpath(f))
 
+        state = {
+            'revlogv1': self.revlogv1,
+        }
+
         files = sorted(set(filenodes) | set(filelinkrevs))
         revisions = 0
         progress = ui.makeprogress(_('checking'), unit=_('files'),
@@ -373,7 +377,19 @@
                                   ff)
                         self.fncachewarned = True
 
-            self.checklog(fl, f, lr)
+            if not len(fl) and (self.havecl or self.havemf):
+                self.err(lr, _("empty or missing %s") % f)
+            else:
+                for problem in fl.verifyintegrity(state):
+                    if problem.warning:
+                        self.warn(problem.warning)
+                    elif problem.error:
+                        self.err(lr, problem.error, f)
+                    else:
+                        raise error.ProgrammingError(
+                            'problem instance does not set warning or error '
+                            'attribute: %s' % problem.msg)
+
             seen = {}
             rp = None
             for i in fl:
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -254,6 +254,12 @@
     revision = attr.ib()
     delta = attr.ib()
 
+ at interfaceutil.implementer(repository.iverifyproblem)
+ at attr.s(frozen=True)
+class revlogproblem(object):
+    warning = attr.ib(default=None)
+    error = attr.ib(default=None)
+
 # index v0:
 #  4 bytes: offset
 #  4 bytes: compressed length
@@ -2581,3 +2587,23 @@
         if dataread is not idxread:
             dataread.close()
             datawrite.close()
+
+    def verifyintegrity(self, state):
+        """Verifies the integrity of the revlog.
+
+        Yields ``revlogproblem`` instances describing problems that are
+        found.
+        """
+        dd, di = self.checksize()
+        if dd:
+            yield revlogproblem(error=_('data length off by %d bytes') % dd)
+        if di:
+            yield revlogproblem(error=_('index contains %d extra bytes') % di)
+
+        if self.version != REVLOGV0:
+            if not state['revlogv1']:
+                yield revlogproblem(warning=_("warning: `%s' uses revlog "
+                                             "format 1") % self.indexfile)
+        elif state['revlogv1']:
+            yield revlogproblem(warning=_("warning: `%s' uses revlog "
+                                          "format 0") % self.indexfile)
diff --git a/mercurial/repository.py b/mercurial/repository.py
--- a/mercurial/repository.py
+++ b/mercurial/repository.py
@@ -318,6 +318,20 @@
             _('cannot %s; remote repository does not support the %r '
               'capability') % (purpose, name))
 
+class iverifyproblem(interfaceutil.Interface):
+    """Represents a problem with the integrity of the repository.
+
+    Instances of this interface are emitted to describe an integrity issue
+    with a repository (e.g. corrupt storage, missing data, etc).
+
+    Instances are essentially messages associated with severity.
+    """
+    warning = interfaceutil.Attribute(
+        """Message indicating a non-fatal problem.""")
+
+    error = interfaceutil.Attribute(
+        """Message indicating a fatal problem.""")
+
 class irevisiondelta(interfaceutil.Interface):
     """Represents a delta between one revision and another.
 
@@ -749,6 +763,17 @@
         TODO this is used by verify and it should not be part of the interface.
         """
 
+    def verifyintegrity(state):
+        """Verifies the integrity of file storage.
+
+        ``state`` is a dict holding state of the verifier process. It can be
+        used to communicate data between invocations of multiple storage
+        primitives.
+
+        The method yields objects conforming to the ``iverifyproblem``
+        interface.
+        """
+
 class idirs(interfaceutil.Interface):
     """Interface representing a collection of directories from paths.
 
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -189,6 +189,9 @@
 
         return True
 
+    def verifyintegrity(self, state):
+        return self._revlog.verifyintegrity(state)
+
     # TODO these aren't part of the interface and aren't internal methods.
     # Callers should be fixed to not use them.
 



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list