[PATCH 2 of 3] verify store entries

Adrian Buehlmann adrian at cadifra.com
Sun Jul 27 13:08:02 CDT 2008


# HG changeset patch
# User Adrian Buehlmann <adrian at cadifra.com>
# Date 1217181626 -7200
# Node ID 977c8bb9760998964da2344c7fa1d573422f16dc
# Parent  4a78146f1df80549da42973905f3d4e76f09514e
verify store entries

* reports decode failures
* warns about orphaned non-empty files

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1332,3 +1332,9 @@
             di = 0
 
         return (dd, di)
+
+    def files(self):
+        res = [ self.indexfile ]
+        if not self._inline:
+            res.append(self.datafile)
+        return res
diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -5,6 +5,7 @@
 # This software may be used and distributed according to the terms
 # of the GNU General Public License, incorporated herein by reference.
 
+from i18n import _
 import os, stat, osutil, util
 
 def _buildencodefun():
@@ -59,6 +60,7 @@
         except OSError:
             mode = None
         self.createmode = mode
+        self.encodefn = lambda x: x
 
     def join(self, f):
         return os.path.join(self.path, f)
@@ -77,14 +79,14 @@
             if (len(f) > 2) and f[-2:] in filetypes:
                 yield util.pconvert(f[striplen:]), size
 
-    def _datafiles(self):
-        for x in self._revlogfiles('data', True):
-            yield x
+    def datafiles(self, reporterror=None):
+        for f, size in self._revlogfiles('data', True):
+            yield f, size
 
     def walk(self):
         '''yields (direncoded filename, size)'''
         # yield data files first
-        for x in self._datafiles():
+        for x in self.datafiles():
             yield x
         # yield manifest before changelog
         meta = util.sort(self._revlogfiles())
@@ -95,7 +97,6 @@
 class directstore(_store):
     def __init__(self, path):
         _store.__init__(self, path)
-        self.encodefn = lambda x: x
         self.opener = util.opener(self.path)
         self.opener.createmode = self.createmode
 
@@ -107,9 +108,14 @@
         op.createmode = self.createmode
         self.opener = lambda f, *args, **kw: op(self.encodefn(f), *args, **kw)
 
-    def _datafiles(self):
+    def datafiles(self, reporterror=None):
         for f, size in self._revlogfiles('data', True):
-            yield decodefilename(f), size
+            try:
+                yield decodefilename(f), size
+            except KeyError:
+                if not reporterror:
+                    raise
+                reporterror(_("cannot decode filename '%s'") % f)
 
     def join(self, f):
         return os.path.join(self.path, self.encodefn(f))
diff --git a/mercurial/verify.py b/mercurial/verify.py
--- a/mercurial/verify.py
+++ b/mercurial/verify.py
@@ -93,6 +93,35 @@
         seen[n] = i
         return lr
 
+    class checkstore:
+        def err(self, text):
+            ui.warn(_(" store: %s\n") % text)
+            errors[0] += 1
+
+        def __init__(self, store):
+            self.store = store
+            self.datafiles = []
+            for f, size in self.store.datafiles(self.err):
+                if size > 0:
+                    self.datafiles.append(f)
+            self.filelogfiles = []
+
+        def checkfilelog(self, fl, f):
+            for ff in fl.files():
+                if not ff in self.datafiles:
+                    eff = self.store.encodefn(ff)
+                    self.err(_("missing file '%s' for '%s'") % (eff, f))
+                self.filelogfiles.append(ff)
+
+        def warnorphans(self):
+            for f in self.datafiles:
+                if not f in self.filelogfiles:
+                    ef = self.store.encodefn(f)
+                    t = _("warning: orphaned store file '%s'")
+                    if f != ef:
+                       t += _(" (decodes to '%s')") % f
+                    warn(t % ef)
+
     revlogv1 = cl.version != revlog.REVLOGV0
     if ui.verbose or not revlogv1:
         ui.status(_("repository uses revlog format %d\n") %
@@ -158,10 +187,12 @@
                 err(lr, _("in manifest but not in changeset"), f)
 
     ui.status(_("checking files\n"))
+    cs = checkstore(repo.store)
     files = util.sort(util.unique(filenodes.keys() + filelinkrevs.keys()))
     for f in files:
         fl = repo.file(f)
         checklog(fl, f)
+        cs.checkfilelog(fl, f)
         seen = {}
         for i in fl:
             revisions += 1
@@ -204,6 +235,8 @@
             fns = [(mf.linkrev(l), n) for n,l in filenodes[f].items()]
             for lr, node in util.sort(fns):
                 err(lr, _("%s in manifests not found") % short(node), f)
+    
+    cs.warnorphans()
 
     ui.status(_("%d files, %d changesets, %d total revisions\n") %
                    (len(files), len(cl), revisions))


More information about the Mercurial-devel mailing list