[PATCH 1 of 3] phases: introduce phasecache

Fri May 11 17:49:48 CDT 2012

# HG changeset patch
# User Patrick Mezard <patrick at mezard.eu>
# Date 1336775047 -7200
# Node ID 7cd39b49030833bec83b23c25871a2a18d80ce2a
# Parent  5cf18921bb7b6cdb4cb299a8f26908a3a39b2996
phases: introduce phasecache

The original motivation was changectx.phase() had special logic to
correctly lookup in repo._phaserev, including invalidating it when
necessary. And at other places, repo._phaserev was accessed directly.

This led to the discovery that phases state including _phaseroots,
_phaserev and _dirtyphase was manipulated in localrepository.py,
phases.py, repair.py, etc. phasecache helps encapsulating that.

This patch replaces all phase state in localrepo with phasecache and
adjust related code except for advance/retractboundary() in phases.
These still access to phasecache internals directly. This will be
addressed in a followup.

diff --git a/hgext/mq.py b/hgext/mq.py
--- a/hgext/mq.py
+++ b/hgext/mq.py
@@ -862,7 +862,7 @@
     def finish(self, repo, revs):
         # Manually trigger phase computation to ensure phasedefaults is
         # executed before we remove the patches.
-        repo._phaserev
+        repo._phasecache
         patches = self._revpatches(repo, sorted(revs))
         qfinished = self._cleanup(patches, len(patches))
         if qfinished and repo.ui.configbool('mq', 'secret', False):
diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -4352,7 +4352,7 @@
             nodes = [ctx.node() for ctx in repo.set('%ld', revs)]
             if not nodes:
                 raise util.Abort(_('empty revision set'))
-            olddata = repo._phaserev[:]
+            olddata = repo._phasecache.getphaserevs(repo)[:]
             phases.advanceboundary(repo, targetphase, nodes)
             if opts['force']:
                 phases.retractboundary(repo, targetphase, nodes)
@@ -4360,7 +4360,7 @@
             lock.release()
         if olddata is not None:
             changes = 0
-            newdata = repo._phaserev
+            newdata = repo._phasecache.getphaserevs(repo)
             changes = sum(o != newdata[i] for i, o in enumerate(olddata))
             rejected = [n for n in nodes
                         if newdata[repo[n].rev()] < targetphase]
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -191,12 +191,7 @@
     def bookmarks(self):
         return self._repo.nodebookmarks(self._node)
     def phase(self):
-        if self._rev == -1:
-            return phases.public
-        if self._rev >= len(self._repo._phaserev):
-            # outdated cache
-            del self._repo._phaserev
-        return self._repo._phaserev[self._rev]
+        return self._repo._phasecache.phase(self._repo, self._rev)
     def phasestr(self):
         return phases.phasenames[self.phase()]
     def mutable(self):
diff --git a/mercurial/discovery.py b/mercurial/discovery.py
--- a/mercurial/discovery.py
+++ b/mercurial/discovery.py
@@ -105,7 +105,7 @@
     og.commonheads, _any, _hds = commoninc
 
     # compute outgoing
-    if not repo._phaseroots[phases.secret]:
+    if not repo._phasecache.phaseroots[phases.secret]:
         og.missingheads = onlyheads or repo.heads()
     elif onlyheads is None:
         # use visible heads as it should be cached
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -41,7 +41,6 @@
         self.wopener = scmutil.opener(self.root)
         self.baseui = baseui
         self.ui = baseui.copy()
-        self._dirtyphases = False
         # A list of callback to shape the phase if no data were found.
         # Callback are in the form: func(repo, roots) --> processed root.
         # This list it to be filled by extension during repo setup
@@ -182,22 +181,8 @@
       bookmarks.write(self)
 
     @storecache('phaseroots')
-    def _phaseroots(self):
-        phaseroots, self._dirtyphases = phases.readroots(
-            self, self._phasedefaults)
-        return phaseroots
-
-    @propertycache
-    def _phaserev(self):
-        cache = [phases.public] * len(self)
-        for phase in phases.trackedphases:
-            roots = map(self.changelog.rev, self._phaseroots[phase])
-            if roots:
-                for rev in roots:
-                    cache[rev] = phase
-                for rev in self.changelog.descendants(*roots):
-                    cache[rev] = phase
-        return cache
+    def _phasecache(self):
+        return phases.phasecache(self, self._phasedefaults)
 
     @storecache('00changelog.i')
     def changelog(self):
@@ -604,10 +589,11 @@
 
     def known(self, nodes):
         nm = self.changelog.nodemap
+        pc = self._phasecache
         result = []
         for n in nodes:
             r = nm.get(n)
-            resp = not (r is None or self._phaserev[r] >= phases.secret)
+            resp = not (r is None or pc.phase(self, r) >= phases.secret)
             result.append(resp)
         return result
 
@@ -863,7 +849,6 @@
                 pass
 
         delcache('_tagscache')
-        delcache('_phaserev')
 
         self._branchcache = None # in UTF-8
         self._branchcachetip = None
@@ -931,9 +916,8 @@
 
         def unlock():
             self.store.write()
-            if self._dirtyphases:
-                phases.writeroots(self, self._phaseroots)
-                self._dirtyphases = False
+            if '_phasecache' in vars(self):
+                self._phasecache.write()
             for k, ce in self._filecache.items():
                 if k == 'dirstate':
                     continue
diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -99,7 +99,7 @@
 """
 
 import errno
-from node import nullid, bin, hex, short
+from node import nullid, nullrev, bin, hex, short
 from i18n import _
 
 allphases = public, draft, secret = range(3)
@@ -124,7 +124,7 @@
             updated = True
     return updated
 
-def readroots(repo, phasedefaults=None):
+def _readroots(repo, phasedefaults=None):
     """Read phase roots from disk
 
     phasedefaults is a list of fn(repo, roots) callable, which are
@@ -156,15 +156,51 @@
         dirty = True
     return roots, dirty
 
-def writeroots(repo, phaseroots):
-    """Write phase roots from disk"""
-    f = repo.sopener('phaseroots', 'w', atomictemp=True)
-    try:
-        for phase, roots in enumerate(phaseroots):
-            for h in roots:
-                f.write('%i %s\n' % (phase, hex(h)))
-    finally:
-        f.close()
+class phasecache(object):
+    def __init__(self, repo, phasedefaults):
+        self.phaseroots, self.dirty = _readroots(repo, phasedefaults)
+        self.opener = repo.sopener
+        self._phaserevs = None
+
+    def getphaserevs(self, repo, rebuild=False):
+        if rebuild or self._phaserevs is None:
+            revs = [public] * len(repo.changelog)
+            for phase in trackedphases:
+                roots = map(repo.changelog.rev, self.phaseroots[phase])
+                if roots:
+                    for rev in roots:
+                        revs[rev] = phase
+                    for rev in repo.changelog.descendants(*roots):
+                        revs[rev] = phase
+            self._phaserevs = revs
+        return self._phaserevs
+
+    def invalidatephaserevs(self):
+        self._phaserevs = None
+
+    def phase(self, repo, rev):
+        # We need a repo argument here to be able to build _phaserev
+        # if necessary. The repository instance is not stored in
+        # phasecache to avoid reference cycles. The changelog instance
+        # is not stored because it is a filecache() property and can
+        # be replaced without us being notified.
+        if rev == nullrev:
+            return public
+        if self._phaserevs is None or rev >= len(self._phaserevs):
+            self._phaserevs = self.getphaserevs(repo, rebuild=True)
+        return self._phaserevs[rev]
+
+    def write(self):
+        if not self.dirty:
+            return
+        f = self.opener('phaseroots', 'w', atomictemp=True)
+        try:
+            for phase, roots in enumerate(self.phaseroots):
+                for h in roots:
+                    f.write('%i %s\n' % (phase, hex(h)))
+        finally:
+            f.close()
+        self.dirty = False
 
 def advanceboundary(repo, targetphase, nodes):
     """Add nodes to a phase changing other nodes phases if necessary.
@@ -173,6 +209,8 @@
     in the target phase or kept in a *lower* phase.
 
     Simplify boundary to contains phase roots only."""
+    phcache = repo._phasecache
+
     delroots = [] # set of root deleted by this path
     for phase in xrange(targetphase + 1, len(allphases)):
         # filter nodes that are not in a compatible phase already
@@ -181,16 +219,15 @@
         nodes = [n for n in nodes if repo[n].phase() >= phase]
         if not nodes:
             break # no roots to move anymore
-        roots = repo._phaseroots[phase]
+        roots = phcache.phaseroots[phase]
         olds = roots.copy()
         ctxs = list(repo.set('roots((%ln::) - (%ln::%ln))', olds, olds, nodes))
         roots.clear()
         roots.update(ctx.node() for ctx in ctxs)
         if olds != roots:
             # invalidate cache (we probably could be smarter here
-            if '_phaserev' in vars(repo):
-                del repo._phaserev
-            repo._dirtyphases = True
+            phcache.invalidatephaserevs()
+            phcache.dirty = True
             # some roots may need to be declared for lower phases
             delroots.extend(olds - roots)
         # declare deleted root in the target phase
@@ -205,22 +242,23 @@
     in the target phase or kept in a *higher* phase.
 
     Simplify boundary to contains phase roots only."""
-    currentroots = repo._phaseroots[targetphase]
+    phcache = repo._phasecache
+
+    currentroots = phcache.phaseroots[targetphase]
     newroots = [n for n in nodes if repo[n].phase() < targetphase]
     if newroots:
         currentroots.update(newroots)
         ctxs = repo.set('roots(%ln::)', currentroots)
         currentroots.intersection_update(ctx.node() for ctx in ctxs)
-        if '_phaserev' in vars(repo):
-            del repo._phaserev
-        repo._dirtyphases = True
+        phcache.invalidatephaserevs()
+        phcache.dirty = True
 
 
 def listphases(repo):
     """List phases root for serialisation over pushkey"""
     keys = {}
     value = '%i' % draft
-    for root in repo._phaseroots[draft]:
+    for root in repo._phasecache.phaseroots[draft]:
         keys[hex(root)] = value
 
     if repo.ui.configbool('phases', 'publish', True):
@@ -263,7 +301,7 @@
 def visibleheads(repo):
     """return the set of visible head of this repo"""
     # XXX we want a cache on this
-    sroots = repo._phaseroots[secret]
+    sroots = repo._phasecache.phaseroots[secret]
     if sroots:
         # XXX very slow revset. storing heads or secret "boundary" would help.
         revset = repo.set('heads(not (%ln::))', sroots)
@@ -279,7 +317,7 @@
     """return a branchmap for the visible set"""
     # XXX Recomputing this data on the fly is very slow.  We should build a
     # XXX cached version while computin the standard branchmap version.
-    sroots = repo._phaseroots[secret]
+    sroots = repo._phasecache.phaseroots[secret]
     if sroots:
         vbranchmap = {}
         for branch, nodes in  repo.branchmap().iteritems():
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -462,7 +462,8 @@
     """``draft()``
     Changeset in draft phase."""
     getargs(x, 0, 0, _("draft takes no arguments"))
-    return [r for r in subset if repo._phaserev[r] == phases.draft]
+    pc = repo._phasecache
+    return [r for r in subset if pc.phase(repo, r) == phases.draft]
 
 def filelog(repo, subset, x):
     """``filelog(pattern)``
@@ -851,7 +852,8 @@
     """``public()``
     Changeset in public phase."""
     getargs(x, 0, 0, _("public takes no arguments"))
-    return [r for r in subset if repo._phaserev[r] == phases.public]
+    pc = repo._phasecache
+    return [r for r in subset if pc.phase(repo, r) == phases.public]
 
 def remote(repo, subset, x):
     """``remote([id [,path]])``
@@ -1030,7 +1032,8 @@
     """``secret()``
     Changeset in secret phase."""
     getargs(x, 0, 0, _("secret takes no arguments"))
-    return [r for r in subset if repo._phaserev[r] == phases.secret]
+    pc = repo._phasecache
+    return [r for r in subset if pc.phase(repo, r) == phases.secret]
 
 def sort(repo, subset, x):
     """``sort(set[, [-]key...])``