[PATCH 6 of 6 censor RFC] revlog: addgroup checks if incoming deltas add censored revs, sets flag bit
Mike Edgar
adgar at google.com
Fri Jan 23 17:53:26 CST 2015
# HG changeset patch
# User Mike Edgar <adgar at google.com>
# Date 1421266568 18000
# Wed Jan 14 15:16:08 2015 -0500
# Node ID 9bc824ed68e8f9b726d5d227c263b62bdebda6cb
# Parent a047ca8ca035158ecd9d034b9b4232be8b8080d4
revlog: addgroup checks if incoming deltas add censored revs, sets flag bit
A censored revision stored in a revlog should have the censored revlog index
flag bit set. This implies we must know if a revision is censored before we
add it to the revlog.
Censor metadata is stored in the revision text. When adding a changegroup to
a revlog, we have a sequence of deltas, not a sequence of full revision texts.
This means we won't always have easy access to the censor metadata, so we
don't have an easy way to know when to set the relevant index flag bit.
This change introduces a heuristic based on assumptions around the Mercurial
delta format and filelog metadata. Since filelog metadata is at the start of
the revision text, we can be sure that when a delta produces a censored
revision, the first patch in that delta will introduce the censor metadata
line. There are only two possible such patches: one which also adds the
"\1\n" line delimiting the start of filelog metadata, and one which doesn't.
See more at http://mercurial.selenic.com/wiki/CensorPlan?action=subscribe
diff -r a047ca8ca035 -r 9bc824ed68e8 mercurial/filelog.py
--- a/mercurial/filelog.py Wed Jan 21 22:09:32 2015 -0500
+++ b/mercurial/filelog.py Wed Jan 14 15:16:08 2015 -0500
@@ -6,7 +6,7 @@
# GNU General Public License version 2 or any later version.
import error, revlog
-import re
+import re, struct
_mdre = re.compile('\1\n')
def parsemeta(text):
@@ -110,3 +110,40 @@
def _file(self, f):
return filelog(self.opener, f)
+
+ def _peek_iscensored(self, baserev, delta, flush):
+ """Quickly check if a delta produces a censored revision."""
+ # Fragile heuristic: unless new file meta keys are added alphabetically
+ # preceding "censored", all censored revisions are prefixed by
+ # "\1\ncensored:". A delta producing such a prefix either:
+ #
+ # 1. Has no \1\n prefix in its base, and must start with at least three
+ # new lines to add "\1\ncensored:<data>\n\1\n" (more if the censored
+ # revision was a copy).
+ # 2. Has a censored base and must replace the whole revision, adding
+ # at least three lines as in case 1.
+ # 3. Has a \1\n prefix in its base, perhaps because it was copied but
+ # not necessarily. The "censored:<data>\n" line will be added at
+ # position 2.
+ hlen = struct.calcsize(">lll")
+ if len(delta) <= hlen:
+ return False
+ start, skip, copy = struct.unpack(">lll", delta[:hlen])
+ # Cases 1, 2: first patch must insert metadata at position 0.
+ add = "\1\ncensored:"
+ addlen = len(add)
+ if (start == 0 and copy >= addlen and len(delta) >= hlen + addlen
+ and delta[hlen:hlen + addlen] == add):
+ return True
+ # Case 3: first patch starts after "\1\n" of base, position 2.
+ ins = "censored:"
+ inslen = len(ins)
+ if (start == 2 and copy >= inslen and len(delta) >= hlen + inslen
+ and delta[hlen:hlen + inslen] == ins):
+ # Likely censored, but we need to verify the base really does start
+ # with "\1\n". False positives should be rare enough to justify
+ # flushing file handles and decoding the base.
+ flush()
+ return self.revision(baserev)[0:2] == "\1\n"
+
+ return False
diff -r a047ca8ca035 -r 9bc824ed68e8 mercurial/revlog.py
--- a/mercurial/revlog.py Wed Jan 21 22:09:32 2015 -0500
+++ b/mercurial/revlog.py Wed Jan 14 15:16:08 2015 -0500
@@ -1374,7 +1374,10 @@
transaction.add(self.indexfile, isize, r)
transaction.add(self.datafile, end)
dfh = self.opener(self.datafile, "a")
-
+ def flush():
+ if dfh:
+ dfh.flush()
+ ifh.flush()
try:
# loop through our set of deltas
chain = None
@@ -1418,9 +1421,13 @@
raise error.CensoredBaseError(self.indexfile,
self.node(baserev))
+ flags = REVIDX_DEFAULT_FLAGS
+ if self._peek_iscensored(baserev, delta, flush):
+ flags |= REVIDX_ISCENSORED
+
chain = self._addrevision(node, None, transaction, link,
- p1, p2, REVIDX_DEFAULT_FLAGS,
- (baserev, delta), ifh, dfh)
+ p1, p2, flags, (baserev, delta),
+ ifh, dfh)
if not dfh and not self._inline:
# addrevision switched from inline to conventional
# reopen the index
@@ -1438,6 +1445,10 @@
"""Check if a file revision is censored."""
return False
+ def _peek_iscensored(self, baserev, delta, flush):
+ """Quickly check if a delta produces a censored revision."""
+ return False
+
def getstrippoint(self, minlink):
"""find the minimum rev that must be stripped to strip the linkrev
More information about the Mercurial-devel
mailing list