[PATCH 1 of 4 v5] revlog: add ability to apply transforms on flags

Pierre-Yves David pierre-yves.david at ens-lyon.org
Sat Dec 17 03:09:20 EST 2016



On 12/14/2016 12:58 PM, Remi Chaintron wrote:
> # HG changeset patch
> # User Remi Chaintron <remi at fb.com>
> # Date 1481716603 0
> #      Wed Dec 14 11:56:43 2016 +0000
> # Branch stable
> # Node ID 7599256f2800bbacc3b478ac8d2c4a5746eb4cd1
> # Parent  d34c04da52a8f2eb61b51bb0aa9cedf1e4b43474
> revlog: add ability to apply transforms on flags
>
> Add a mechanism for extensions/subclasses of revlog to register transforms on
> read/write operations on revlog, based on revision flags.
>
> Due to some operations being non-commutative, transforms applied in a specific
> order when writing the contents of the revlog need to be applied on the reverse
> order when reading.
> In order to allow the composition of such operation, the 'processflags' method
> applies transforms on flags in the order defined by REVIDX_FLAGS_ORDER.
>
> Extensions can use the `localrepository.registerflagtransform()` method to
> register default transforms on flags in `reposetup()` in order to ensure their
> transform will not override/be overridden by other extensions' transforms.

As a general principle, we don't add new methods on localrepo. Its 
tempting to add stuff to that central object but this lead to horrible 
horrible class bloat in the past. In addition, your method does not 
seems to make any use of the repo itself. (if we did not wrote that down 
somewhere, we should)

Instead, you should go for one of these two options:
- a small function defined next to the dictionary
- our shiny and nice "registrar" mechanism. See 
https://www.mercurial-scm.org/repo/hg/file/tip/hgext/rebase.py#l109 for 
an example.

I would probably pick registrar because the code exists, but given the 
low amount of registrees it might be overkill.

Couple of feedback inlined


> diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
> --- a/mercurial/bundlerepo.py
> +++ b/mercurial/bundlerepo.py
> @@ -147,7 +147,9 @@
>              delta = self._chunk(chain.pop())
>              text = mdiff.patches(text, [delta])
>
> -        self.checkhash(text, node, rev=rev)
> +        text, validatehash = self.processflags(text, self.flags(rev))
> +        if validatehash is True:

Should be:

   if validatehash:

> +            self.checkhash(text, node, rev=rev)
>          self._cache = (node, rev, text)
>          return text
>
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -48,6 +48,7 @@
>      phases,
>      pushkey,
>      repoview,
> +    revlog,
>      revset,
>      scmutil,
>      store,
> @@ -1959,6 +1960,21 @@
>              fp.close()
>          return self.pathto(fp.name[len(self.root) + 1:])
>
> +    def registerflagtransform(self, flag, transform):
> +        """Register transform on revlog flags.
> +
> +        Extensions can rely on this method to set default transforms on revlog
> +        flags, and ensure they are not overwritten by/overwriting other
> +        extensions'.
> +        """
> +        if not flag & revlog.REVIDX_KNOWN_FLAGS:
> +            raise error.Abort(_("Cannot register transform on unknown flag."))

note that we neither capitalize nor punctuate our error messages.

> +        transform = revlog.REVIDX_FLAGS_TRANSFORMS.get(flag, None)
> +        if transform is not None:
> +            raise error.Abort(
> +                _("Cannot register multiple transforms on flags."))

ditto

> +        revlog.REVIDX_FLAGS_TRANSFORMS[flag] = transform
> +
>  # used to avoid circular references so destructors work
>  def aftertrans(files):
>      renamefiles = [tuple(t) for t in files]
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> @@ -56,6 +56,12 @@
>  REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
>  REVIDX_DEFAULT_FLAGS = 0
>  REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
> +# stable order in which flags need to be processed and their transforms applied
> +REVIDX_FLAGS_ORDER = [
> +    REVIDX_ISCENSORED,
> +]
> +REVIDX_FLAGS_TRANSFORMS = { }

Small nits: as we have no "public" accessors of this, we could add a '_' 
in front of it to discourage people to access it directly.

> +
>
>  # max size of revlog with inline data
>  _maxinline = 131072
> @@ -1203,11 +1209,6 @@
>          if rev is None:
>              rev = self.rev(node)
>
> -        # check rev flags
> -        if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
> -            raise RevlogError(_('incompatible revision flag %x') %
> -                              (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
> -
>          chain, stopped = self._deltachain(rev, stoprev=cachedrev)
>          if stopped:
>              text = self._cache[2]
> @@ -1221,7 +1222,11 @@
>              bins = bins[1:]
>
>          text = mdiff.patches(text, bins)
> -        self.checkhash(text, node, rev=rev)
> +
> +        text, validatehash = self.processflags(text, self.flags(rev))
> +        if validatehash:
> +            self.checkhash(text, node, rev=rev)
> +
>          self._cache = (node, rev, text)
>          return text
>
> @@ -1233,6 +1238,45 @@
>          """
>          return hash(text, p1, p2)
>
> +    def processflags(self, text, flags, reverse=False):
> +        """Process revision flags and apply registered transforms.
> +
> +        ``text`` - the revision data to process
> +        ``flags`` - the revision flags
> +        ``reverse`` - an option argument describing whether the flags should be
> +        processed in order or reverse order.
> +
> +        This method processes the flags in the order (or reverse order if
> +        ``reverse`` is False) defined by REVIDX_FLAGS_TRANSFORMS, applying the
> +        transforms registered for present flags. The order of flags defined in
> +        REVIDX_FLAGS_TRANSFORMS needs to be stable for non-commutative
> +        operations.
> +
> +        Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
> +        processed text and ``validatehash`` is a bool indicating whether the
> +        returned text should be checked for hash integrity.
> +        """
> +        # Check all flags are known.
> +        if flags & ~REVIDX_KNOWN_FLAGS:
> +            raise RevlogError(_('incompatible revision flag %x') %
> +                              (flags & ~REVIDX_KNOWN_FLAGS))
> +
> +        validatehash = True
> +        # Depending on the operation (read or write), the order might be
> +        # reversed due to non-commutative transforms.
> +        orderedflags = REVIDX_FLAGS_ORDER
> +        if reverse:
> +            orderedflags = reversed(orderedflags)
> +
> +        for flag in orderedflags:
> +            # If a transform has been registered for a known flag, apply and
> +            # update result tuple.
> +            if flag & flags:
> +                f = REVIDX_FLAGS_TRANSFORMS.get(flag, None)
> +                if f is not None:
> +                    text, validatehash = f(self, text)
> +        return text, validatehash
> +
>      def checkhash(self, text, node, p1=None, p2=None, rev=None):
>          """Check node hash integrity.
>
> @@ -1415,7 +1459,10 @@
>                  basetext = self.revision(self.node(baserev), _df=fh)
>                  btext[0] = mdiff.patch(basetext, delta)
>              try:
> -                self.checkhash(btext[0], node, p1=p1, p2=p2)
> +                btext[0], validatehash = self.processflags(btext[0], flags,
> +                                                           reverse=True)

Should be:

  if validatehash:


> +                if validatehash is True:
> +                    self.checkhash(btext[0], node, p1=p1, p2=p2)
>                  if flags & REVIDX_ISCENSORED:
>                      raise RevlogError(_('node %s is not censored') % node)
>              except CensoredNodeError:

Cheers,

-- 
Pierre-Yves David


More information about the Mercurial-devel mailing list