[PATCH] obsolete: introduce caches for all meaningful sets

Steven Brown stevengbrown at gmail.com
Tue Aug 28 08:34:41 CDT 2012


On Aug 28, 2012 12:31 AM, <pierre-yves.david at logilab.fr> wrote:
>
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david at logilab.fr>
> # Date 1346084195 -7200
> # Node ID e8e6662faf370bc9209a067cf2d951338bbbe584
> # Parent  99a2a4ae35e2180b7f825ef2677c36d538eac4ba
> obsolete: introduce caches for all meaningful sets
>
> This changeset introduces caches on the `obsstore` that keeps track of
sets of
> revisions meaningful for obsolescence related logics. For now they are:
>
> - obsolete: changesets used as precursors (and not public),
> - extinct:  obsolete changesets with osbolete descendants only,
> - unstable: non obsolete changesets with obsolete ancestors.
>
> The cache is accessed using the `getobscache(repo, '<set-name>')`
function which
> builds the cache on demand. The `clearobscaches(repo)` function takes
care of
> clearing the caches if any.
>
> Caches are cleared when one of these events happens:
>
> - a new marker is added,
> - a new changeset is added,
> - some changesets are made public,
> - some public changesets are demoted to draft or secret.
>
> Declaration of more sets is made easy because we will have to handle at
least
> two other "troubles" (latecomer and conflicting).
>
> Caches are now used by revset and changectx. It is usually not much more
> expensive to compute the whole set than to check the property of a few
elements.
> The performance boost is welcome in case we apply obsolescence logic on a
lot of
> revisions. This makes the feature usable!
>
> diff --git a/mercurial/context.py b/mercurial/context.py
> --- a/mercurial/context.py
> +++ b/mercurial/context.py
> @@ -9,10 +9,11 @@ from node import nullid, nullrev, short,
>  from i18n import _
>  import ancestor, mdiff, error, util, scmutil, subrepo, patch, encoding,
phases
>  import copies
>  import match as matchmod
>  import os, errno, stat
> +import obsolete as obsmod
>
>  propertycache = util.propertycache
>
>  class changectx(object):
>      """A changecontext object makes access to data related to a
particular
> @@ -230,42 +231,27 @@ class changectx(object):
>          for d in self._repo.changelog.descendants([self._rev]):
>              yield changectx(self._repo, d)
>
>      def obsolete(self):
>          """True if the changeset is obsolete"""
> -        return (self.node() in self._repo.obsstore.precursors
> -                and self.phase() > phases.public)
> +        return self.rev() in obsmod.getobscache(self._repo, 'obsolete')
>
>      def extinct(self):
>          """True if the changeset is extinct"""
>          # We should just compute a cache a check againts it.
>          # see revset implementation for details
>          #
>          # But this naive implementation does not require cache

Looks like this comment should be removed.

> -        if self.phase() <= phases.public:
> -            return False
> -        if not self.obsolete():
> -            return False
> -        for desc in self.descendants():
> -            if not desc.obsolete():
> -                return False
> -        return True
> +        return self.rev() in obsmod.getobscache(self._repo, 'extinct')
>
>      def unstable(self):
>          """True if the changeset is not obsolete but it's ancestor are"""
>          # We should just compute /(obsolete()::) - obsolete()/
>          # and keep it in a cache.
>          #
>          # But this naive implementation does not require cache

And this one.

> -        if self.phase() <= phases.public:
> -            return False
> -        if self.obsolete():
> -            return False
> -        for anc in self.ancestors():
> -            if anc.obsolete():
> -                return True
> -        return False
> +        return self.rev() in obsmod.getobscache(self._repo, 'unstable')
>
>      def _fileinfo(self, path):
>          if '_manifest' in self.__dict__:
>              try:
>                  return self._manifest[path], self._manifest.flags(path)
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -1038,10 +1038,11 @@ class localrepository(object):
>
>          delcache('_tagscache')
>
>          self._branchcache = None # in UTF-8
>          self._branchcachetip = None
> +        obsolete.clearobscaches(self)
>
>      def invalidatedirstate(self):
>          '''Invalidates the dirstate, causing the next call to dirstate
>          to check if it was modified since the last time it was read,
>          rereading it if it has.
> @@ -2400,10 +2401,11 @@ class localrepository(object):
>                  htext = _(" (%+d heads)") % dh
>
>              self.ui.status(_("added %d changesets"
>                               " with %d changes to %d files%s\n")
>                               % (changesets, revisions, files, htext))
> +            obsolete.clearobscaches(self)
>
>              if changesets > 0:
>                  p = lambda: cl.writepending() and self.root or ""
>                  self.hook('pretxnchangegroup', throw=True,
>                            node=hex(cl.node(clstart)), source=srctype,
> diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
> --- a/mercurial/obsolete.py
> +++ b/mercurial/obsolete.py
> @@ -161,10 +161,12 @@ class obsstore(object):
>      - precursors: old -> set(new)
>      - successors: new -> set(old)
>      """
>
>      def __init__(self, sopener):
> +        # caches for various obsolescence related cache
> +        self.caches = {}
>          self._all = []
>          # new markers to serialize
>          self.precursors = {}
>          self.successors = {}
>          self.sopener = sopener
> @@ -220,10 +222,12 @@ class obsstore(object):
>              finally:
>                  # XXX: f.close() == filecache invalidation == obsstore
rebuilt.
>                  # call 'filecacheentry.refresh()'  here
>                  f.close()
>              self._load(new)
> +            # new marker *may* have changed several set. invalidate the
cache.
> +            self.caches.clear()
>          return len(new)
>
>      def mergemarkers(self, transation, data):
>          markers = _readmarkers(data)
>          self.add(transation, markers)
> @@ -327,5 +331,69 @@ def anysuccessors(obsstore, node):
>          for mark in obsstore.precursors.get(current, ()):
>              for suc in mark[1]:
>                  if suc not in seen:
>                      seen.add(suc)
>                      remaining.add(suc)
> +
> +# mapping of 'set-name' -> <function to computer this set>
> +cachefuncs = {}
> +def cachefor(name):
> +    """Decorator to register a function as computing the cache for a
set"""
> +    def decorator(func):
> +        assert name not in cachefuncs
> +        cachefuncs[name] = func
> +        return func
> +    return decorator
> +
> +def getobscache(repo, name):
> +    """Return the set of revision that belong to the <name> set
> +
> +    Such access may compute the set and cache it for future use"""
> +    if not repo.obsstore:
> +        return ()
> +    if name not in repo.obsstore.caches:
> +        repo.obsstore.caches[name] = cachefuncs[name](repo)
> +    return repo.obsstore.caches[name]
> +
> +# To be simple we need to invalidate obsolescence cache when:
> +#
> +# - new changeset is added:
> +# - public phase is changed
> +# - obsolescence marker are added
> +# - strip is used a repo
> +def clearobscaches(repo):
> +    """Remove all obsolescence related cache from a repo
> +
> +    This remove all cache in obsstore is the obsstore already exist on
the
> +    repo.
> +
> +    (We could be smarter here given the exact event that trigger the
cache
> +    clearing)"""
> +    # only clear cache is there is obsstore data in this repo
> +    if 'obsstore' in repo._filecache:
> +        repo.obsstore.caches.clear()
> +
> + at cachefor('obsolete')
> +def _computeobsoleteset(repo):
> +    """the set of obsolete revisions"""
> +    obs = set()
> +    nm = repo.changelog.nodemap
> +    for prec in repo.obsstore.precursors:
> +        rev = nm.get(prec)
> +        if rev is not None:
> +            obs.add(rev)
> +    return set(repo.revs('%ld - public()', obs))
> +
> + at cachefor('unstable')
> +def _computeunstableset(repo):
> +    """the set of non obsolete revisions with obsolete parents"""
> +    return set(repo.revs('(obsolete()::) - obsolete()'))
> +
> + at cachefor('suspended')
> +def _computesuspendedset(repo):
> +    """the set of obsolete parents with non obsolete descendants"""
> +    return set(repo.revs('obsolete() and obsolete()::unstable()'))
> +
> + at cachefor('extinct')
> +def _computeextinctset(repo):
> +    """the set of obsolete parents without non obsolete descendants"""
> +    return set(repo.revs('obsolete() - obsolete()::unstable()'))
> diff --git a/mercurial/phases.py b/mercurial/phases.py
> --- a/mercurial/phases.py
> +++ b/mercurial/phases.py
> @@ -102,10 +102,11 @@ Note: old client behave as a publishing
>
>  import errno
>  from node import nullid, nullrev, bin, hex, short
>  from i18n import _
>  import util
> +import obsolete
>
>  allphases = public, draft, secret = range(3)
>  trackedphases = allphases[1:]
>  phasenames = ['public', 'draft', 'secret']
>
> @@ -242,10 +243,11 @@ class phasecache(object):
>                  # some roots may need to be declared for lower phases
>                  delroots.extend(olds - roots)
>              # declare deleted root in the target phase
>              if targetphase != 0:
>                  self.retractboundary(repo, targetphase, delroots)
> +        obsolete.clearobscaches(repo)
>
>      def retractboundary(self, repo, targetphase, nodes):
>          # Be careful to preserve shallow-copied values: do not update
>          # phaseroots values, replace them.
>
> @@ -258,10 +260,11 @@ class phasecache(object):
>              currentroots = currentroots.copy()
>              currentroots.update(newroots)
>              ctxs = repo.set('roots(%ln::)', currentroots)
>              currentroots.intersection_update(ctx.node() for ctx in ctxs)
>              self._updateroots(targetphase, currentroots)
> +        obsolete.clearobscaches(repo)
>
>  def advanceboundary(repo, targetphase, nodes):
>      """Add nodes to a phase changing other nodes phases if necessary.
>
>      This function move boundary *forward* this means that all nodes
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -10,10 +10,11 @@ import parser, util, error, discovery, h
>  import node
>  import bookmarks as bookmarksmod
>  import match as matchmod
>  from i18n import _
>  import encoding
> +import obsolete as obsmod
>
>  def _revancestors(repo, revs, followfirst):
>      """Like revlog.ancestors(), but supports followfirst."""
>      cut = followfirst and 1 or None
>      cl = repo.changelog
> @@ -619,12 +620,12 @@ def extinct(repo, subset, x):
>      """``extinct()``
>      Obsolete changesets with obsolete descendants only.
>      """
>      # i18n: "extinct" is a keyword
>      getargs(x, 0, 0, _("extinct takes no arguments"))
> -    extinctset = set(repo.revs('(obsolete()::) - (::(not obsolete()))'))
> -    return [r for r in subset if r in extinctset]
> +    extincts = obsmod.getobscache(repo, 'extinct')
> +    return [r for r in subset if r in extincts]
>
>  def extra(repo, subset, x):
>      """``extra(label, [value])``
>      Changesets with the given label in the extra metadata, with the given
>      optional value.
> @@ -957,11 +958,12 @@ def node_(repo, subset, x):
>  def obsolete(repo, subset, x):
>      """``obsolete()``
>      Mutable changeset with a newer version."""
>      # i18n: "obsolete" is a keyword
>      getargs(x, 0, 0, _("obsolete takes no arguments"))
> -    return [r for r in subset if repo[r].obsolete()]
> +    obsoletes = obsmod.getobscache(repo, 'obsolete')
> +    return [r for r in subset if r in obsoletes]
>
>  def origin(repo, subset, x):
>      """``origin([set])``
>      Changesets that were specified as a source for the grafts,
transplants or
>      rebases that created the given revisions.  Omitting the optional set
is the
> @@ -1435,12 +1437,12 @@ def unstable(repo, subset, x):
>      """``unstable()``
>      Non-obsolete changesets with obsolete ancestors.
>      """
>      # i18n: "unstable" is a keyword
>      getargs(x, 0, 0, _("unstable takes no arguments"))
> -    unstableset = set(repo.revs('(obsolete()::) - obsolete()'))
> -    return [r for r in subset if r in unstableset]
> +    unstables = obsmod.getobscache(repo, 'unstable')
> +    return [r for r in subset if r in unstables]
>
>
>  def user(repo, subset, x):
>      """``user(string)``
>      User name contains string. The match is case-insensitive.
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://selenic.com/pipermail/mercurial-devel/attachments/20120828/f8cb445b/attachment.html>


More information about the Mercurial-devel mailing list