[PATCH 2 of 3 V3] chgcache: implement a smartcache layer
Simon Farnsworth
simonfar at fb.com
Wed Mar 8 13:52:06 EST 2017
On 08/03/2017 06:35, Jun Wu wrote:
> # HG changeset patch
> # User Jun Wu <quark at fb.com>
> # Date 1488949878 28800
> # Tue Mar 07 21:11:18 2017 -0800
> # Node ID f0bded8d53c5c9a5cfb25d29dd99cf4eb3fb79b2
> # Parent 60eb2c2b5196a62d635dbe0eb1e29fdd945d5058
> # Available At https://urldefense.proofpoint.com/v2/url?u=https-3A__bitbucket.org_quark-2Dzju_hg-2Ddraft&d=DwIGaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=mEgSWILcY4c4W3zjApBQLA&m=reQmKdchqeMVRwrhw7ZqWyDvfs90FzZDm_PbdOvq4oo&s=Fii6rsLkxbQqCcmGkO37t_GzC3q7-yPDldCdBtsr2Zg&e=
> # hg pull https://urldefense.proofpoint.com/v2/url?u=https-3A__bitbucket.org_quark-2Dzju_hg-2Ddraft&d=DwIGaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=mEgSWILcY4c4W3zjApBQLA&m=reQmKdchqeMVRwrhw7ZqWyDvfs90FzZDm_PbdOvq4oo&s=Fii6rsLkxbQqCcmGkO37t_GzC3q7-yPDldCdBtsr2Zg&e= -r f0bded8d53c5
> chgcache: implement a smartcache layer
>
> See the docstring of smartcache. Basically it sets up a pattern where
> everything in the cache has a corresponding hash value to help test if it's
> valid quickly.
>
> It's mainly designed to be used with the repo state. See the next patch.
>
> diff --git a/mercurial/chgcache.py b/mercurial/chgcache.py
> --- a/mercurial/chgcache.py
> +++ b/mercurial/chgcache.py
> @@ -21,2 +21,61 @@ def set(key, value):
> else:
> _cache[key] = value
> +
> +class smartcache(object):
> + """cache knowing how to load and invalidate values, for predefined keys
> +
> + The cache object will only answer to a key who is in loadfunctable. The
"which is in", not "who is in", and drop the "The" at the end of this
line, for better English.
> + loadfunctable stores load functions which will do hashing and loading.
> + smartcache will update or invalidate entries according to the hash, and
> + provide the hash and value to load functions being called next time.
> +
> + There is no "set" method. To pre-populate the cache, call "get" instead.
> + This will make sure the hash values are always correctly set.
> +
> + The end users using smartcache.get will only notice the values, the hashes
> + and the cache is transparent to them.
> +
> + A load function has the signature:
> +
> + (state, oldhash, oldvalue) -> (newhash, newvalue)
> +
> + Where state is where the load function reads information. oldhash, oldvalue
> + is what currently being stored in the cache. The returned hash and value
> + will be used to update the cache.
> +
> + A load function usually looks like:
> +
> + def loadfunc(state, oldhash, oldvalue):
> + hash = state.quickhash()
> + if hash == oldhash:
> + return oldhash, oldvalue
> + value = state.loadvalue()
> + hash = hashvalue(value)
> + # or, if hashvalue is expensive
> + hashagain = state.quickhash()
> + if hashagain != hash:
> + # invalidate the cache entry without filling a new one
> + hash = None
> + return hash, value
> +
I don't like this interface - there's a lot for a load function author
to get right. I would prefer to see a split into a pair of functions -
one that gets the hash, one that gets the data - and have the cache code
handle the state management instead.
> + If predefined keys are not flexible enough, loadfunctable could be an
> + object implementing "get" which generates load functions dynamically.
> + """
> +
> + def __init__(self, keyprefix, state, loadfunctable):
> + self.keyprefix = keyprefix
> + self.state = state
> + self.loadfunctable = loadfunctable
> +
> + def get(self, key):
> + loadfunc = self.loadfunctable.get(key)
> + if loadfunc is None:
> + return None
> + fullkey = self.keyprefix + key
> + oldhash, oldvalue = get(fullkey) or [None, None]
> + newhash, newvalue = loadfunc(self.state, oldhash, oldvalue)
> + if newhash is None:
> + set(fullkey, None)
> + elif newvalue is not oldvalue or newhash != oldhash:
> + set(fullkey, (newhash, newvalue))
> + return newvalue
> diff --git a/tests/test-chgcache.py b/tests/test-chgcache.py
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-chgcache.py
> @@ -0,0 +1,56 @@
> +from __future__ import absolute_import, print_function
> +
> +import os
> +
> +from mercurial import (
> + chgcache,
> + scmutil,
> +)
> +
> +filename = 'foo'
> +
> +def readfoo(vfs, oldhash, oldvalue):
> + # NOTE: st_size is intentional for this test. Do not use it in real code if
> + # the file could be rewritten (not append-only).
> + try:
> + newhash = vfs.stat(filename).st_size
> + except OSError:
> + return None, None
> + if oldhash == newhash:
> + print('cache hit')
> + return oldhash, oldvalue
> + else:
> + print('cache miss')
> + value = vfs.read(filename)
> + # NOTE: This is wrong. In reality, we need to calculate the hash again,
> + # and return None as the "newhash" if hashes mismatch, to mitigate
> + # filesystem race conditions.
> + # That said, in this test we do know nobody else will touch the file,
> + # so it's fine.
I'd prefer to avoid the need for this comment - instead:
finalhash = vfs.stat(filename).st_size
if finalhash == newhash:
return newhash, value
else:
return None, value
This has the advantage of being a worked example for people who learn
better from examples than from documentation.
> + return newhash, value
> +
> +loadfuncs = {'foo': readfoo}
> +vfs = scmutil.vfs(os.environ['TESTTMP'])
> +
> +cache = chgcache.smartcache('vfs', vfs, loadfuncs)
> +
> +def printcache():
> + print('cache["foo"] = %r' % cache.get('foo'))
> +
> +printcache() # None, because the file does not exist
> +
> +vfs.write(filename, 'a')
> +printcache() # cache miss, 'a'
> +printcache() # cache hit, 'a'
> +
> +vfs.write(filename, 'ab')
> +printcache() # cache miss, 'ab'
> +
> +vfs.write(filename, 'cd')
> +printcache() # cache hit, 'ab'
> +
> +vfs.unlink('foo')
> +printcache() # None, will invalidate the cache
> +
> +vfs.write(filename, 'ef')
> +printcache() # cache miss, 'ef'
> diff --git a/tests/test-chgcache.py.out b/tests/test-chgcache.py.out
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-chgcache.py.out
> @@ -0,0 +1,12 @@
> +cache["foo"] = None
> +cache miss
> +cache["foo"] = 'a'
> +cache hit
> +cache["foo"] = 'a'
> +cache miss
> +cache["foo"] = 'ab'
> +cache hit
> +cache["foo"] = 'ab'
> +cache["foo"] = None
> +cache miss
> +cache["foo"] = 'ef'
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.mercurial-2Dscm.org_mailman_listinfo_mercurial-2Ddevel&d=DwIGaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=mEgSWILcY4c4W3zjApBQLA&m=reQmKdchqeMVRwrhw7ZqWyDvfs90FzZDm_PbdOvq4oo&s=DZWjoUWpkqSpspZkpl_xGThGgkNdeyF3WiYgAFIIKnM&e=
>
--
Simon Farnsworth
More information about the Mercurial-devel
mailing list