D4869: revlog: rewrite censoring logic

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Thu Oct 4 18:52:37 EDT 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rHGdf59040a0fbb: revlog: rewrite censoring logic (authored by indygreg, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4869?vs=11650&id=11689

REVISION DETAIL
  https://phab.mercurial-scm.org/D4869

AFFECTED FILES
  mercurial/filelog.py
  mercurial/localrepo.py
  mercurial/revlog.py
  mercurial/testing/storage.py
  tests/test-storage.py

CHANGE DETAILS

diff --git a/tests/test-storage.py b/tests/test-storage.py
--- a/tests/test-storage.py
+++ b/tests/test-storage.py
@@ -30,7 +30,7 @@
     return fl
 
 def maketransaction(self):
-    vfsmap = {'plain': STATE['vfs']}
+    vfsmap = {'plain': STATE['vfs'], 'store': STATE['vfs']}
 
     return transaction.transaction(STATE['ui'].warn, STATE['vfs'], vfsmap,
                                    b'journal', b'undo')
diff --git a/mercurial/testing/storage.py b/mercurial/testing/storage.py
--- a/mercurial/testing/storage.py
+++ b/mercurial/testing/storage.py
@@ -1175,14 +1175,9 @@
         self.assertEqual(list(f.revs()), [0, 1, 2])
 
         self.assertEqual(f.read(node0), b'foo\n' * 30)
+        self.assertEqual(f.read(node2), b'foo\n' * 32)
 
-        # TODO revlog can't resolve revision after censor. Probably due to a
-        # cache on the revlog instance.
-        with self.assertRaises(error.StorageError):
-            self.assertEqual(f.read(node2), b'foo\n' * 32)
-
-        # TODO should raise CensoredNodeError, but fallout from above prevents.
-        with self.assertRaises(error.StorageError):
+        with self.assertRaises(error.CensoredNodeError):
             f.read(node1)
 
     def testgetstrippointnoparents(self):
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -2341,94 +2341,69 @@
             destrevlog._lazydeltabase = oldlazydeltabase
             destrevlog._deltabothparents = oldamd
 
-    def censorrevision(self, node, tombstone=b''):
+    def censorrevision(self, tr, censornode, tombstone=b''):
         if (self.version & 0xFFFF) == REVLOGV0:
             raise error.RevlogError(_('cannot censor with version %d revlogs') %
                                     self.version)
 
-        rev = self.rev(node)
+        censorrev = self.rev(censornode)
         tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
 
-        if len(tombstone) > self.rawsize(rev):
+        if len(tombstone) > self.rawsize(censorrev):
             raise error.Abort(_('censor tombstone must be no longer than '
                                 'censored data'))
 
-        # Using two files instead of one makes it easy to rewrite entry-by-entry
-        idxread = self.opener(self.indexfile, 'r')
-        idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
-        if self.version & FLAG_INLINE_DATA:
-            dataread, datawrite = idxread, idxwrite
-        else:
-            dataread = self.opener(self.datafile, 'r')
-            datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
+        # Rewriting the revlog in place is hard. Our strategy for censoring is
+        # to create a new revlog, copy all revisions to it, then replace the
+        # revlogs on transaction close.
 
-        # Copy all revlog data up to the entry to be censored.
-        offset = self.start(rev)
-
-        for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
-            idxwrite.write(chunk)
-        for chunk in util.filechunkiter(dataread, limit=offset):
-            datawrite.write(chunk)
+        newindexfile = self.indexfile + b'.tmpcensored'
+        newdatafile = self.datafile + b'.tmpcensored'
 
-        def rewriteindex(r, newoffs, newdata=None):
-            """Rewrite the index entry with a new data offset and new data.
+        # This is a bit dangerous. We could easily have a mismatch of state.
+        newrl = revlog(self.opener, newindexfile, newdatafile,
+                       censorable=True)
+        newrl.version = self.version
+        newrl._generaldelta = self._generaldelta
+        newrl._io = self._io
 
-            The newdata argument, if given, is a tuple of three positive
-            integers: (new compressed, new uncompressed, added flag bits).
-            """
-            offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
-            flags = gettype(offlags)
-            if newdata:
-                comp, uncomp, nflags = newdata
-                flags |= nflags
-            offlags = offset_type(newoffs, flags)
-            e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
-            idxwrite.write(self._io.packentry(e, None, self.version, r))
-            idxread.seek(self._io.size, 1)
+        for rev in self.revs():
+            node = self.node(rev)
+            p1, p2 = self.parents(node)
 
-        def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
-            """Write the given fulltext with the given data offset.
+            if rev == censorrev:
+                newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
+                                     p1, p2, censornode, REVIDX_ISCENSORED)
 
-            Returns:
-                The integer number of data bytes written, for tracking data
-                offsets.
-            """
-            flag, compdata = self.compress(data)
-            newcomp = len(flag) + len(compdata)
-            rewriteindex(r, offs, (newcomp, len(data), nflags))
-            datawrite.write(flag)
-            datawrite.write(compdata)
-            dataread.seek(self.length(r), 1)
-            return newcomp
-
-        # Rewrite censored entry with (padded) tombstone data.
-        pad = ' ' * (self.rawsize(rev) - len(tombstone))
-        offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
+                if newrl.deltaparent(rev) != nullrev:
+                    raise error.Abort(_('censored revision stored as delta; '
+                                        'cannot censor'),
+                                      hint=_('censoring of revlogs is not '
+                                             'fully implemented; please report '
+                                             'this bug'))
+                continue
 
-        # Rewrite all following filelog revisions fixing up offsets and deltas.
-        for srev in pycompat.xrange(rev + 1, len(self)):
-            if rev in self.parentrevs(srev):
-                # Immediate children of censored node must be re-added as
-                # fulltext.
-                try:
-                    revdata = self.revision(srev)
-                except error.CensoredNodeError as e:
-                    revdata = e.tombstone
-                dlen = rewrite(srev, offset, revdata)
+            if self.iscensored(rev):
+                if self.deltaparent(rev) != nullrev:
+                    raise error.Abort(_('cannot censor due to censored '
+                                        'revision having delta stored'))
+                rawtext = self._chunk(rev)
             else:
-                # Copy any other revision data verbatim after fixing up the
-                # offset.
-                rewriteindex(srev, offset)
-                dlen = self.length(srev)
-                for chunk in util.filechunkiter(dataread, limit=dlen):
-                    datawrite.write(chunk)
-            offset += dlen
+                rawtext = self.revision(rev, raw=True)
+
+            newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
+                                 self.flags(rev))
 
-        idxread.close()
-        idxwrite.close()
-        if dataread is not idxread:
-            dataread.close()
-            datawrite.close()
+        tr.addbackup(self.indexfile, location='store')
+        if not self._inline:
+            tr.addbackup(self.datafile, location='store')
+
+        self.opener.rename(newrl.indexfile, self.indexfile)
+        if not self._inline:
+            self.opener.rename(newrl.datafile, self.datafile)
+
+        self.clearcaches()
+        self._loadindex(self.version, None)
 
     def verifyintegrity(self, state):
         """Verifies the integrity of the revlog.
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1682,7 +1682,7 @@
             rp = report
         else:
             rp = self.ui.warn
-        vfsmap = {'plain': self.vfs} # root of .hg/
+        vfsmap = {'plain': self.vfs, 'store': self.svfs} # root of .hg/
         # we must avoid cyclic reference between repo and transaction.
         reporef = weakref.ref(self)
         # Code to track tag movement
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -101,7 +101,7 @@
         return self._revlog.strip(minlink, transaction)
 
     def censorrevision(self, tr, node, tombstone=b''):
-        return self._revlog.censorrevision(node, tombstone=tombstone)
+        return self._revlog.censorrevision(tr, node, tombstone=tombstone)
 
     def files(self):
         return self._revlog.files()



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list