[PATCH 2 of 3 V3] chgcache: implement a smartcache layer
Jun Wu
quark at fb.com
Wed Mar 8 01:35:58 EST 2017
# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1488949878 28800
# Tue Mar 07 21:11:18 2017 -0800
# Node ID f0bded8d53c5c9a5cfb25d29dd99cf4eb3fb79b2
# Parent 60eb2c2b5196a62d635dbe0eb1e29fdd945d5058
# Available At https://bitbucket.org/quark-zju/hg-draft
# hg pull https://bitbucket.org/quark-zju/hg-draft -r f0bded8d53c5
chgcache: implement a smartcache layer
See the docstring of smartcache. Basically it sets up a pattern where
everything in the cache has a corresponding hash value to help test if it's
valid quickly.
It's mainly designed to be used with the repo state. See the next patch.
diff --git a/mercurial/chgcache.py b/mercurial/chgcache.py
--- a/mercurial/chgcache.py
+++ b/mercurial/chgcache.py
@@ -21,2 +21,61 @@ def set(key, value):
else:
_cache[key] = value
+
+class smartcache(object):
+ """cache knowing how to load and invalidate values, for predefined keys
+
+ The cache object will only answer to a key who is in loadfunctable. The
+ loadfunctable stores load functions which will do hashing and loading.
+ smartcache will update or invalidate entries according to the hash, and
+ provide the hash and value to load functions being called next time.
+
+ There is no "set" method. To pre-populate the cache, call "get" instead.
+ This will make sure the hash values are always correctly set.
+
+ The end users using smartcache.get will only notice the values, the hashes
+ and the cache is transparent to them.
+
+ A load function has the signature:
+
+ (state, oldhash, oldvalue) -> (newhash, newvalue)
+
+ Where state is where the load function reads information. oldhash, oldvalue
+ is what currently being stored in the cache. The returned hash and value
+ will be used to update the cache.
+
+ A load function usually looks like:
+
+ def loadfunc(state, oldhash, oldvalue):
+ hash = state.quickhash()
+ if hash == oldhash:
+ return oldhash, oldvalue
+ value = state.loadvalue()
+ hash = hashvalue(value)
+ # or, if hashvalue is expensive
+ hashagain = state.quickhash()
+ if hashagain != hash:
+ # invalidate the cache entry without filling a new one
+ hash = None
+ return hash, value
+
+ If predefined keys are not flexible enough, loadfunctable could be an
+ object implementing "get" which generates load functions dynamically.
+ """
+
+ def __init__(self, keyprefix, state, loadfunctable):
+ self.keyprefix = keyprefix
+ self.state = state
+ self.loadfunctable = loadfunctable
+
+ def get(self, key):
+ loadfunc = self.loadfunctable.get(key)
+ if loadfunc is None:
+ return None
+ fullkey = self.keyprefix + key
+ oldhash, oldvalue = get(fullkey) or [None, None]
+ newhash, newvalue = loadfunc(self.state, oldhash, oldvalue)
+ if newhash is None:
+ set(fullkey, None)
+ elif newvalue is not oldvalue or newhash != oldhash:
+ set(fullkey, (newhash, newvalue))
+ return newvalue
diff --git a/tests/test-chgcache.py b/tests/test-chgcache.py
new file mode 100644
--- /dev/null
+++ b/tests/test-chgcache.py
@@ -0,0 +1,56 @@
+from __future__ import absolute_import, print_function
+
+import os
+
+from mercurial import (
+ chgcache,
+ scmutil,
+)
+
+filename = 'foo'
+
+def readfoo(vfs, oldhash, oldvalue):
+ # NOTE: st_size is intentional for this test. Do not use it in real code if
+ # the file could be rewritten (not append-only).
+ try:
+ newhash = vfs.stat(filename).st_size
+ except OSError:
+ return None, None
+ if oldhash == newhash:
+ print('cache hit')
+ return oldhash, oldvalue
+ else:
+ print('cache miss')
+ value = vfs.read(filename)
+ # NOTE: This is wrong. In reality, we need to calculate the hash again,
+ # and return None as the "newhash" if hashes mismatch, to mitigate
+ # filesystem race conditions.
+ # That said, in this test we do know nobody else will touch the file,
+ # so it's fine.
+ return newhash, value
+
+loadfuncs = {'foo': readfoo}
+vfs = scmutil.vfs(os.environ['TESTTMP'])
+
+cache = chgcache.smartcache('vfs', vfs, loadfuncs)
+
+def printcache():
+ print('cache["foo"] = %r' % cache.get('foo'))
+
+printcache() # None, because the file does not exist
+
+vfs.write(filename, 'a')
+printcache() # cache miss, 'a'
+printcache() # cache hit, 'a'
+
+vfs.write(filename, 'ab')
+printcache() # cache miss, 'ab'
+
+vfs.write(filename, 'cd')
+printcache() # cache hit, 'ab'
+
+vfs.unlink('foo')
+printcache() # None, will invalidate the cache
+
+vfs.write(filename, 'ef')
+printcache() # cache miss, 'ef'
diff --git a/tests/test-chgcache.py.out b/tests/test-chgcache.py.out
new file mode 100644
--- /dev/null
+++ b/tests/test-chgcache.py.out
@@ -0,0 +1,12 @@
+cache["foo"] = None
+cache miss
+cache["foo"] = 'a'
+cache hit
+cache["foo"] = 'a'
+cache miss
+cache["foo"] = 'ab'
+cache hit
+cache["foo"] = 'ab'
+cache["foo"] = None
+cache miss
+cache["foo"] = 'ef'
More information about the Mercurial-devel
mailing list