[PATCH] obsolete: introduce caches for all meaningful sets

pierre-yves.david at logilab.fr pierre-yves.david at logilab.fr
Mon Aug 27 11:30:40 CDT 2012


# HG changeset patch
# User Pierre-Yves David <pierre-yves.david at logilab.fr>
# Date 1346084195 -7200
# Node ID e8e6662faf370bc9209a067cf2d951338bbbe584
# Parent  99a2a4ae35e2180b7f825ef2677c36d538eac4ba
obsolete: introduce caches for all meaningful sets

This changeset introduces caches on the `obsstore` that keeps track of sets of
revisions meaningful for obsolescence related logics. For now they are:

- obsolete: changesets used as precursors (and not public),
- extinct:  obsolete changesets with osbolete descendants only,
- unstable: non obsolete changesets with obsolete ancestors.

The cache is accessed using the `getobscache(repo, '<set-name>')` function which
builds the cache on demand. The `clearobscaches(repo)` function takes care of
clearing the caches if any.

Caches are cleared when one of these events happens:

- a new marker is added,
- a new changeset is added,
- some changesets are made public,
- some public changesets are demoted to draft or secret.

Declaration of more sets is made easy because we will have to handle at least
two other "troubles" (latecomer and conflicting).

Caches are now used by revset and changectx. It is usually not much more
expensive to compute the whole set than to check the property of a few elements.
The performance boost is welcome in case we apply obsolescence logic on a lot of
revisions. This makes the feature usable!

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -9,10 +9,11 @@ from node import nullid, nullrev, short,
 from i18n import _
 import ancestor, mdiff, error, util, scmutil, subrepo, patch, encoding, phases
 import copies
 import match as matchmod
 import os, errno, stat
+import obsolete as obsmod
 
 propertycache = util.propertycache
 
 class changectx(object):
     """A changecontext object makes access to data related to a particular
@@ -230,42 +231,27 @@ class changectx(object):
         for d in self._repo.changelog.descendants([self._rev]):
             yield changectx(self._repo, d)
 
     def obsolete(self):
         """True if the changeset is obsolete"""
-        return (self.node() in self._repo.obsstore.precursors
-                and self.phase() > phases.public)
+        return self.rev() in obsmod.getobscache(self._repo, 'obsolete')
 
     def extinct(self):
         """True if the changeset is extinct"""
         # We should just compute a cache a check againts it.
         # see revset implementation for details
         #
         # But this naive implementation does not require cache
-        if self.phase() <= phases.public:
-            return False
-        if not self.obsolete():
-            return False
-        for desc in self.descendants():
-            if not desc.obsolete():
-                return False
-        return True
+        return self.rev() in obsmod.getobscache(self._repo, 'extinct')
 
     def unstable(self):
         """True if the changeset is not obsolete but it's ancestor are"""
         # We should just compute /(obsolete()::) - obsolete()/
         # and keep it in a cache.
         #
         # But this naive implementation does not require cache
-        if self.phase() <= phases.public:
-            return False
-        if self.obsolete():
-            return False
-        for anc in self.ancestors():
-            if anc.obsolete():
-                return True
-        return False
+        return self.rev() in obsmod.getobscache(self._repo, 'unstable')
 
     def _fileinfo(self, path):
         if '_manifest' in self.__dict__:
             try:
                 return self._manifest[path], self._manifest.flags(path)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1038,10 +1038,11 @@ class localrepository(object):
 
         delcache('_tagscache')
 
         self._branchcache = None # in UTF-8
         self._branchcachetip = None
+        obsolete.clearobscaches(self)
 
     def invalidatedirstate(self):
         '''Invalidates the dirstate, causing the next call to dirstate
         to check if it was modified since the last time it was read,
         rereading it if it has.
@@ -2400,10 +2401,11 @@ class localrepository(object):
                 htext = _(" (%+d heads)") % dh
 
             self.ui.status(_("added %d changesets"
                              " with %d changes to %d files%s\n")
                              % (changesets, revisions, files, htext))
+            obsolete.clearobscaches(self)
 
             if changesets > 0:
                 p = lambda: cl.writepending() and self.root or ""
                 self.hook('pretxnchangegroup', throw=True,
                           node=hex(cl.node(clstart)), source=srctype,
diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -161,10 +161,12 @@ class obsstore(object):
     - precursors: old -> set(new)
     - successors: new -> set(old)
     """
 
     def __init__(self, sopener):
+        # caches for various obsolescence related cache
+        self.caches = {}
         self._all = []
         # new markers to serialize
         self.precursors = {}
         self.successors = {}
         self.sopener = sopener
@@ -220,10 +222,12 @@ class obsstore(object):
             finally:
                 # XXX: f.close() == filecache invalidation == obsstore rebuilt.
                 # call 'filecacheentry.refresh()'  here
                 f.close()
             self._load(new)
+            # new marker *may* have changed several set. invalidate the cache.
+            self.caches.clear()
         return len(new)
 
     def mergemarkers(self, transation, data):
         markers = _readmarkers(data)
         self.add(transation, markers)
@@ -327,5 +331,69 @@ def anysuccessors(obsstore, node):
         for mark in obsstore.precursors.get(current, ()):
             for suc in mark[1]:
                 if suc not in seen:
                     seen.add(suc)
                     remaining.add(suc)
+
+# mapping of 'set-name' -> <function to computer this set>
+cachefuncs = {}
+def cachefor(name):
+    """Decorator to register a function as computing the cache for a set"""
+    def decorator(func):
+        assert name not in cachefuncs
+        cachefuncs[name] = func
+        return func
+    return decorator
+
+def getobscache(repo, name):
+    """Return the set of revision that belong to the <name> set
+
+    Such access may compute the set and cache it for future use"""
+    if not repo.obsstore:
+        return ()
+    if name not in repo.obsstore.caches:
+        repo.obsstore.caches[name] = cachefuncs[name](repo)
+    return repo.obsstore.caches[name]
+
+# To be simple we need to invalidate obsolescence cache when:
+#
+# - new changeset is added:
+# - public phase is changed
+# - obsolescence marker are added
+# - strip is used a repo
+def clearobscaches(repo):
+    """Remove all obsolescence related cache from a repo
+
+    This remove all cache in obsstore is the obsstore already exist on the
+    repo.
+
+    (We could be smarter here given the exact event that trigger the cache
+    clearing)"""
+    # only clear cache is there is obsstore data in this repo
+    if 'obsstore' in repo._filecache:
+        repo.obsstore.caches.clear()
+
+ at cachefor('obsolete')
+def _computeobsoleteset(repo):
+    """the set of obsolete revisions"""
+    obs = set()
+    nm = repo.changelog.nodemap
+    for prec in repo.obsstore.precursors:
+        rev = nm.get(prec)
+        if rev is not None:
+            obs.add(rev)
+    return set(repo.revs('%ld - public()', obs))
+
+ at cachefor('unstable')
+def _computeunstableset(repo):
+    """the set of non obsolete revisions with obsolete parents"""
+    return set(repo.revs('(obsolete()::) - obsolete()'))
+
+ at cachefor('suspended')
+def _computesuspendedset(repo):
+    """the set of obsolete parents with non obsolete descendants"""
+    return set(repo.revs('obsolete() and obsolete()::unstable()'))
+
+ at cachefor('extinct')
+def _computeextinctset(repo):
+    """the set of obsolete parents without non obsolete descendants"""
+    return set(repo.revs('obsolete() - obsolete()::unstable()'))
diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -102,10 +102,11 @@ Note: old client behave as a publishing 
 
 import errno
 from node import nullid, nullrev, bin, hex, short
 from i18n import _
 import util
+import obsolete
 
 allphases = public, draft, secret = range(3)
 trackedphases = allphases[1:]
 phasenames = ['public', 'draft', 'secret']
 
@@ -242,10 +243,11 @@ class phasecache(object):
                 # some roots may need to be declared for lower phases
                 delroots.extend(olds - roots)
             # declare deleted root in the target phase
             if targetphase != 0:
                 self.retractboundary(repo, targetphase, delroots)
+        obsolete.clearobscaches(repo)
 
     def retractboundary(self, repo, targetphase, nodes):
         # Be careful to preserve shallow-copied values: do not update
         # phaseroots values, replace them.
 
@@ -258,10 +260,11 @@ class phasecache(object):
             currentroots = currentroots.copy()
             currentroots.update(newroots)
             ctxs = repo.set('roots(%ln::)', currentroots)
             currentroots.intersection_update(ctx.node() for ctx in ctxs)
             self._updateroots(targetphase, currentroots)
+        obsolete.clearobscaches(repo)
 
 def advanceboundary(repo, targetphase, nodes):
     """Add nodes to a phase changing other nodes phases if necessary.
 
     This function move boundary *forward* this means that all nodes
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -10,10 +10,11 @@ import parser, util, error, discovery, h
 import node
 import bookmarks as bookmarksmod
 import match as matchmod
 from i18n import _
 import encoding
+import obsolete as obsmod
 
 def _revancestors(repo, revs, followfirst):
     """Like revlog.ancestors(), but supports followfirst."""
     cut = followfirst and 1 or None
     cl = repo.changelog
@@ -619,12 +620,12 @@ def extinct(repo, subset, x):
     """``extinct()``
     Obsolete changesets with obsolete descendants only.
     """
     # i18n: "extinct" is a keyword
     getargs(x, 0, 0, _("extinct takes no arguments"))
-    extinctset = set(repo.revs('(obsolete()::) - (::(not obsolete()))'))
-    return [r for r in subset if r in extinctset]
+    extincts = obsmod.getobscache(repo, 'extinct')
+    return [r for r in subset if r in extincts]
 
 def extra(repo, subset, x):
     """``extra(label, [value])``
     Changesets with the given label in the extra metadata, with the given
     optional value.
@@ -957,11 +958,12 @@ def node_(repo, subset, x):
 def obsolete(repo, subset, x):
     """``obsolete()``
     Mutable changeset with a newer version."""
     # i18n: "obsolete" is a keyword
     getargs(x, 0, 0, _("obsolete takes no arguments"))
-    return [r for r in subset if repo[r].obsolete()]
+    obsoletes = obsmod.getobscache(repo, 'obsolete')
+    return [r for r in subset if r in obsoletes]
 
 def origin(repo, subset, x):
     """``origin([set])``
     Changesets that were specified as a source for the grafts, transplants or
     rebases that created the given revisions.  Omitting the optional set is the
@@ -1435,12 +1437,12 @@ def unstable(repo, subset, x):
     """``unstable()``
     Non-obsolete changesets with obsolete ancestors.
     """
     # i18n: "unstable" is a keyword
     getargs(x, 0, 0, _("unstable takes no arguments"))
-    unstableset = set(repo.revs('(obsolete()::) - obsolete()'))
-    return [r for r in subset if r in unstableset]
+    unstables = obsmod.getobscache(repo, 'unstable')
+    return [r for r in subset if r in unstables]
 
 
 def user(repo, subset, x):
     """``user(string)``
     User name contains string. The match is case-insensitive.


More information about the Mercurial-devel mailing list