[PATCH 02 of 11 v2] scmutil: introduce filecache

Idan Kamara idankk86 at gmail.com
Mon Jul 25 07:09:09 CDT 2011


# HG changeset patch
# User Idan Kamara <idankk86 at gmail.com>
# Date 1310227619 -10800
# Node ID 3d8973ae522c5c4196b923a102fd14d7f98936ea
# Parent  43e4a9f7c3176922b7c16c83565261ce7d7fc07d
scmutil: introduce filecache

The idea is being able to associate a file with a property, and watch
that file stat info for modifications when we decide it's important for it to
be up-to-date. Once it changes, we recreate the object.

On filesystems that can't uniquely identify a file, we always recreate.

As a consequence, localrepo.invalidate() will become much less expensive in the
case where nothing changed on-disk.

diff -r 43e4a9f7c317 -r 3d8973ae522c mercurial/scmutil.py
--- a/mercurial/scmutil.py	Mon Jul 25 15:03:02 2011 +0300
+++ b/mercurial/scmutil.py	Sat Jul 09 19:06:59 2011 +0300
@@ -709,3 +709,95 @@
         raise error.RequirementError(_("unknown repository format: "
             "requires features '%s' (upgrade Mercurial)") % "', '".join(missings))
     return requirements
+
+class filecacheentry(object):
+    def __init__(self, path):
+        self.path = path
+        self.cachestat = filecacheentry.stat(self.path)
+
+        if self.cachestat:
+            self._cacheable = self.cachestat.cacheable()
+        else:
+            # None means we don't know yet
+            self._cacheable = None
+
+    def refresh(self):
+        if self.cacheable():
+            self.cachestat = filecacheentry.stat(self.path)
+
+    def cacheable(self):
+        if self._cacheable is not None:
+            return self._cacheable
+
+        # we don't know yet, assume it is for now
+        return True
+
+    def changed(self):
+        # no point in going further if we can't cache it
+        if not self.cacheable():
+            return True
+
+        newstat = filecacheentry.stat(self.path)
+
+        # we may not know if it's cacheable yet, check again now
+        if newstat and self._cacheable is None:
+            self._cacheable = newstat.cacheable()
+
+            # check again
+            if not self._cacheable:
+                return True
+
+        if self.cachestat != newstat:
+            self.cachestat = newstat
+            return True
+        else:
+            return False
+
+    @staticmethod
+    def stat(path):
+        try:
+            return util.cachestat(path)
+        except OSError, e:
+            if e.errno != errno.ENOENT:
+                raise
+
+class filecache(object):
+    '''A property like decorator that tracks a file under .hg/ for updates.
+
+    Records stat info when called in _filecache.
+
+    On subsequent calls, compares old stat info with new info, and recreates
+    the object when needed, updating the new stat info in _filecache.
+
+    Mercurial either atomic renames or appends for files under .hg,
+    so to ensure the cache is reliable we need the filesystem to be able
+    to tell us if a file has been replaced. If it can't, we fallback to
+    recreating the object on every call (essentially the same behaviour as
+    propertycache).'''
+    def __init__(self, path, instore=False):
+        self.path = path
+        self.instore = instore
+
+    def __call__(self, func):
+        self.func = func
+        self.name = func.__name__
+        return self
+
+    def __get__(self, obj, type=None):
+        entry = obj._filecache.get(self.name)
+
+        if entry:
+            if entry.changed():
+                entry.obj = self.func(obj)
+        else:
+            path = self.instore and obj.sjoin(self.path) or obj.join(self.path)
+
+            # We stat -before- creating the object so our cache doesn't lie if
+            # a writer modified between the time we read and stat
+            entry = filecacheentry(path)
+            entry.obj = self.func(obj)
+
+            obj._filecache[self.name] = entry
+
+        setattr(obj, self.name, entry.obj)
+        return entry.obj
diff -r 43e4a9f7c317 -r 3d8973ae522c tests/test-filecache.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-filecache.py	Sat Jul 09 19:06:59 2011 +0300
@@ -0,0 +1,95 @@
+import sys, os, subprocess
+
+try:
+    subprocess.check_call(['%s/hghave' % os.environ['TESTDIR'], 'cacheable'])
+except subprocess.CalledProcessError:
+    sys.exit(80)
+
+from mercurial import util, scmutil, extensions
+
+filecache = scmutil.filecache
+
+class fakerepo(object):
+    def __init__(self):
+        self._filecache = {}
+
+    def join(self, p):
+        return p
+
+    def sjoin(self, p):
+        return p
+
+    @filecache('x')
+    def cached(self):
+        print 'creating'
+
+    def invalidate(self):
+        for k in self._filecache:
+            try:
+                delattr(self, k)
+            except AttributeError:
+                pass
+
+def basic(repo):
+    # file doesn't exist, calls function
+    repo.cached
+
+    repo.invalidate()
+    # file still doesn't exist, uses cache
+    repo.cached
+
+    # create empty file
+    f = open('x', 'w')
+    f.close()
+    repo.invalidate()
+    # should recreate the object
+    repo.cached
+
+    f = open('x', 'w')
+    f.write('a')
+    f.close()
+    repo.invalidate()
+    # should recreate the object
+    repo.cached
+
+    repo.invalidate()
+    # stats file again, nothing changed, reuses object
+    repo.cached
+
+    # atomic replace file, size doesn't change
+    # hopefully st_mtime doesn't change as well so this doesn't use the cache
+    # because of inode change
+    f = scmutil.opener('.')('x', 'w', atomictemp=True)
+    f.write('b')
+    f.rename()
+
+    repo.invalidate()
+    repo.cached
+
+def fakeuncacheable():
+    def wrapcacheable(orig, *args, **kwargs):
+        return False
+
+    def wrapinit(orig, *args, **kwargs):
+        pass
+
+    originit = extensions.wrapfunction(util.cachestat, '__init__', wrapinit)
+    origcacheable = extensions.wrapfunction(util.cachestat, 'cacheable', wrapcacheable)
+
+    try:
+        os.remove('x')
+    except:
+        pass
+
+    basic(fakerepo())
+
+    util.cachestat.cacheable = origcacheable
+    util.cachestat.__init__ = originit
+
+print 'basic:'
+print
+basic(fakerepo())
+print
+print 'fakeuncacheable:'
+print
+fakeuncacheable()
diff -r 43e4a9f7c317 -r 3d8973ae522c tests/test-filecache.py.out
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-filecache.py.out	Sat Jul 09 19:06:59 2011 +0300
@@ -0,0 +1,15 @@
+basic:
+
+creating
+creating
+creating
+creating
+
+fakeuncacheable:
+
+creating
+creating
+creating
+creating
+creating
+creating


More information about the Mercurial-devel mailing list