[PATCH 2 of 3 V3] chgcache: implement a smartcache layer

Wed Mar 8 01:35:58 EST 2017

# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1488949878 28800
#      Tue Mar 07 21:11:18 2017 -0800
# Node ID f0bded8d53c5c9a5cfb25d29dd99cf4eb3fb79b2
# Parent  60eb2c2b5196a62d635dbe0eb1e29fdd945d5058
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r f0bded8d53c5
chgcache: implement a smartcache layer

See the docstring of smartcache. Basically it sets up a pattern where
everything in the cache has a corresponding hash value to help test if it's
valid quickly.

It's mainly designed to be used with the repo state. See the next patch.

diff --git a/mercurial/chgcache.py b/mercurial/chgcache.py
--- a/mercurial/chgcache.py
+++ b/mercurial/chgcache.py
@@ -21,2 +21,61 @@ def set(key, value):
     else:
         _cache[key] = value
+
+class smartcache(object):
+    """cache knowing how to load and invalidate values, for predefined keys
+
+    The cache object will only answer to a key who is in loadfunctable. The
+    loadfunctable stores load functions which will do hashing and loading.
+    smartcache will update or invalidate entries according to the hash, and
+    provide the hash and value to load functions being called next time.
+
+    There is no "set" method. To pre-populate the cache, call "get" instead.
+    This will make sure the hash values are always correctly set.
+
+    The end users using smartcache.get will only notice the values, the hashes
+    and the cache is transparent to them.
+
+    A load function has the signature:
+
+        (state, oldhash, oldvalue) -> (newhash, newvalue)
+
+    Where state is where the load function reads information. oldhash, oldvalue
+    is what currently being stored in the cache. The returned hash and value
+    will be used to update the cache.
+
+    A load function usually looks like:
+
+        def loadfunc(state, oldhash, oldvalue):
+            hash = state.quickhash()
+            if hash == oldhash:
+                return oldhash, oldvalue
+            value = state.loadvalue()
+            hash = hashvalue(value)
+            # or, if hashvalue is expensive
+            hashagain = state.quickhash()
+            if hashagain != hash:
+                # invalidate the cache entry without filling a new one
+                hash = None
+            return hash, value
+
+    If predefined keys are not flexible enough, loadfunctable could be an
+    object implementing "get" which generates load functions dynamically.
+    """
+
+    def __init__(self, keyprefix, state, loadfunctable):
+        self.keyprefix = keyprefix
+        self.state = state
+        self.loadfunctable = loadfunctable
+
+    def get(self, key):
+        loadfunc = self.loadfunctable.get(key)
+        if loadfunc is None:
+            return None
+        fullkey = self.keyprefix + key
+        oldhash, oldvalue = get(fullkey) or [None, None]
+        newhash, newvalue = loadfunc(self.state, oldhash, oldvalue)
+        if newhash is None:
+            set(fullkey, None)
+        elif newvalue is not oldvalue or newhash != oldhash:
+            set(fullkey, (newhash, newvalue))
+        return newvalue
diff --git a/tests/test-chgcache.py b/tests/test-chgcache.py
new file mode 100644
--- /dev/null
+++ b/tests/test-chgcache.py
@@ -0,0 +1,56 @@
+from __future__ import absolute_import, print_function
+
+import os
+
+from mercurial import (
+    chgcache,
+    scmutil,
+)
+
+filename = 'foo'
+
+def readfoo(vfs, oldhash, oldvalue):
+    # NOTE: st_size is intentional for this test. Do not use it in real code if
+    # the file could be rewritten (not append-only).
+    try:
+        newhash = vfs.stat(filename).st_size
+    except OSError:
+        return None, None
+    if oldhash == newhash:
+        print('cache hit')
+        return oldhash, oldvalue
+    else:
+        print('cache miss')
+        value = vfs.read(filename)
+        # NOTE: This is wrong. In reality, we need to calculate the hash again,
+        # and return None as the "newhash" if hashes mismatch, to mitigate
+        # filesystem race conditions.
+        # That said, in this test we do know nobody else will touch the file,
+        # so it's fine.
+        return newhash, value
+
+loadfuncs = {'foo': readfoo}
+vfs = scmutil.vfs(os.environ['TESTTMP'])
+
+cache = chgcache.smartcache('vfs', vfs, loadfuncs)
+
+def printcache():
+    print('cache["foo"] = %r' % cache.get('foo'))
+
+printcache() # None, because the file does not exist
+
+vfs.write(filename, 'a')
+printcache() # cache miss, 'a'
+printcache() # cache hit, 'a'
+
+vfs.write(filename, 'ab')
+printcache() # cache miss, 'ab'
+
+vfs.write(filename, 'cd')
+printcache() # cache hit, 'ab'
+
+vfs.unlink('foo')
+printcache() # None, will invalidate the cache
+
+vfs.write(filename, 'ef')
+printcache() # cache miss, 'ef'
diff --git a/tests/test-chgcache.py.out b/tests/test-chgcache.py.out
new file mode 100644
--- /dev/null
+++ b/tests/test-chgcache.py.out
@@ -0,0 +1,12 @@
+cache["foo"] = None
+cache miss
+cache["foo"] = 'a'
+cache hit
+cache["foo"] = 'a'
+cache miss
+cache["foo"] = 'ab'
+cache hit
+cache["foo"] = 'ab'
+cache["foo"] = None
+cache miss
+cache["foo"] = 'ef'