[PATCH 1 of 2] context: don't use util.cachefunc due to cycle creation (issue5043)

Gregory Szorc gregory.szorc at gmail.com
Sun Jan 17 14:47:33 CST 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1453061430 28800
#      Sun Jan 17 12:10:30 2016 -0800
# Node ID 0723bc793d3bd2d4b52a748d68561e8ea0c99e05
# Parent  add2ba16430ea5d31ee26e84e1b4c66dc3a6ee15
context: don't use util.cachefunc due to cycle creation (issue5043)

util.cachefunc stores all arguments as the cache key. For filectxfn
functions, the arguments include the memctx instance. This creates a
cycle where memctx._filectxfn references self. This causes a memory
leak.

We break the cycle by implementing our own memoizing function that
only uses the path as the cache key. Since each memctx has its own
cache instance, there is no concern about invalid cache hits.

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1774,16 +1774,32 @@ class workingcommitctx(workingctx):
     def _changedset(self):
         """Return the set of files changed in this context
         """
         changed = set(self._status.modified)
         changed.update(self._status.added)
         changed.update(self._status.removed)
         return changed
 
+def makecachingfilectxfn(func):
+    """Create a filectxfn that caches based on the path.
+
+    We can't use util.cachefunc because it uses all arguments as the cache
+    key and this creates a cycle since the arguments include the repo and
+    memctx.
+    """
+    cache = {}
+
+    def getfilectx(repo, memctx, path):
+        if path not in cache:
+            cache[path] = func(repo, memctx, path)
+        return cache[path]
+
+    return getfilectx
+
 class memctx(committablectx):
     """Use memctx to perform in-memory commits via localrepo.commitctx().
 
     Revision information is supplied at initialization time while
     related files data and is made available through a callback
     mechanism.  'repo' is the current localrepo, 'parents' is a
     sequence of two parent revisions identifiers (pass None for every
     missing parent), 'text' is the commit message and 'files' lists
@@ -1833,19 +1849,18 @@ class memctx(committablectx):
                 copied = fctx.renamed()
                 if copied:
                     copied = copied[0]
                 return memfilectx(repo, path, fctx.data(),
                                   islink=fctx.islink(), isexec=fctx.isexec(),
                                   copied=copied, memctx=memctx)
             self._filectxfn = getfilectx
         else:
-            # "util.cachefunc" reduces invocation of possibly expensive
-            # "filectxfn" for performance (e.g. converting from another VCS)
-            self._filectxfn = util.cachefunc(filectxfn)
+            # memoizing increases performance for e.g. vcs convert scenarios.
+            self._filectxfn = makecachingfilectxfn(filectxfn)
 
         if extra:
             self._extra = extra.copy()
         else:
             self._extra = {}
 
         if self._extra.get('branch', '') == '':
             self._extra['branch'] = 'default'


More information about the Mercurial-devel mailing list