[PATCH 07 of 11 V2] revlogdelta: always return a delta info object in finddeltainfo

Gregory Szorc gregory.szorc at gmail.com
Wed Aug 29 12:52:32 EDT 2018


On Mon, Aug 27, 2018 at 3:06 AM Boris Feld <boris.feld at octobus.net> wrote:

> # HG changeset patch
> # User Boris Feld <boris.feld at octobus.net>
> # Date 1534387137 -7200
> #      Thu Aug 16 04:38:57 2018 +0200
> # Node ID 2c73ef87329fd8f0a515dad9e2cd6de938615aa7
> # Parent  050101b1b6db5bb9a6d6e887ec3145f8bff4188b
> # EXP-Topic sparse-snapshot
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> 2c73ef87329f
> revlogdelta: always return a delta info object in finddeltainfo
>
> Previously, the method returned `None` when a full snapshot was needed. The
> caller had to determine how to produce one itself.
>
> In practice, building a `_deltainfo` object for a full snapshot is simple.
> So
> we build it at the `finddeltainfo` level and always return a `_deltainfo`
> object.
>
> The caller can now simply process the `_deltainfo` return in all cases.
>
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> @@ -1967,31 +1967,23 @@ class revlog(object):
>
>          deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
>
> -        if deltainfo is not None:
> -            base = deltainfo.base
> -            chainbase = deltainfo.chainbase
> -            data = deltainfo.data
> -            l = deltainfo.deltalen
> -        else:
> -            rawtext = deltacomputer.buildtext(revinfo, fh)
> -            data = self.compress(rawtext)
> -            l = len(data[1]) + len(data[0])
> -            base = chainbase = curr
> -
> -        e = (offset_type(offset, flags), l, textlen,
> -             base, link, p1r, p2r, node)
> +        e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
> +             deltainfo.base, link, p1r, p2r, node)
>          self.index.append(e)
>          self.nodemap[node] = curr
>
>          entry = self._io.packentry(e, self.node, self.version, curr)
> -        self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
> +        self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
> +                         link, offset)
> +
> +        rawtext = btext[0]
>

This line gave me trouble during review. But that's only because the
existing code is hard to follow (we pass an array around then possibly
replace its first element so we can later use it in a cache). I would
happily review a follow-up patch that explicitly returns the rawtext from a
called function instead of mutating an array as a side-effect.


>
>          if alwayscache and rawtext is None:
>              rawtext = deltacomputer.buildtext(revinfo, fh)
>
>          if type(rawtext) == bytes: # only accept immutable objects
>              self._cache = (node, curr, rawtext)
> -        self._chainbasecache[curr] = chainbase
> +        self._chainbasecache[curr] = deltainfo.chainbase
>          return node
>
>      def _writeentry(self, transaction, ifh, dfh, entry, data, link,
> offset):
> diff --git a/mercurial/revlogutils/deltas.py
> b/mercurial/revlogutils/deltas.py
> --- a/mercurial/revlogutils/deltas.py
> +++ b/mercurial/revlogutils/deltas.py
> @@ -691,6 +691,19 @@ class deltacomputer(object):
>                            chainbase, chainlen, compresseddeltalen,
>                            snapshotdepth)
>
> +    def _fullsnapshotinfo(self, fh, revinfo):
> +        curr = len(self.revlog)
> +        rawtext = self.buildtext(revinfo, fh)
> +        data = self.revlog.compress(rawtext)
> +        compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
> +        deltabase = chainbase = curr
> +        snapshotdepth = 0
> +        chainlen = 1
> +
> +        return _deltainfo(dist, deltalen, data, deltabase,
> +                          chainbase, chainlen, compresseddeltalen,
> +                          snapshotdepth)
> +
>      def finddeltainfo(self, revinfo, fh):
>          """Find an acceptable delta against a candidate revision
>
> @@ -700,15 +713,18 @@ class deltacomputer(object):
>
>          Returns the first acceptable candidate revision, as ordered by
>          _getcandidaterevs
> +
> +        If no suitable deltabase is found, we return delta info for a full
> +        snapshot.
>          """
>          if not revinfo.textlen:
> -            return None # empty file do not need delta
> +            return self._fullsnapshotinfo(fh, revinfo)
>
>          # no delta for flag processor revision (see "candelta" for why)
>          # not calling candelta since only one revision needs test, also to
>          # avoid overhead fetching flags again.
>          if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
> -            return None
> +            return self._fullsnapshotinfo(fh, revinfo)
>
>          cachedelta = revinfo.cachedelta
>          p1 = revinfo.p1
> @@ -743,4 +759,6 @@ class deltacomputer(object):
>                  deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
>                  break
>
> +        if deltainfo is None:
> +            deltainfo = self._fullsnapshotinfo(fh, revinfo)
>          return deltainfo
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20180829/147c53b8/attachment.html>


More information about the Mercurial-devel mailing list