[PATCH 4 of 8] obscache: add an abstract base class for changelog+obstore cache
Augie Fackler
raf at durin42.com
Fri May 19 17:50:22 EDT 2017
On Fri, May 19, 2017 at 04:28:03PM +0200, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david at octobus.net>
> # Date 1495194829 -7200
> # Fri May 19 13:53:49 2017 +0200
> # Node ID b5c984cd0640255b94f22d56a4bfe398b2c5de2e
> # Parent c67d3438f0d1ee56437ab0cffd49b02f441f876e
> # EXP-Topic obscache
> # Available At https://www.mercurial-scm.org/repo/users/marmoute/mercurial/
> # hg pull https://www.mercurial-scm.org/repo/users/marmoute/mercurial/ -r b5c984cd0640
> obscache: add an abstract base class for changelog+obstore cache
>
> There are many useful properties to cache around obsolescence. Some of them
> needs data from both the changelog and the obsstore. We needs logic handle to
> track changes and strip to both data structure are the same time. We also wants
> these to allow incremental update when possible instead of recomputing the whole
> set all the time.
>
> As this is a common needs, we start by adding an abstract class reusable by
> multiple cache.
>
> This code was initially part of the evolve extensions and matured there.
>
> diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
> --- a/mercurial/obsolete.py
> +++ b/mercurial/obsolete.py
> @@ -1141,6 +1141,228 @@ def successorssets(repo, initialnode, ca
> cache[current] = final
> return cache[initialnode]
>
> +class dualsourcecache(object):
I'm not in love with this name, as it sounds more generic than it
is. I might also have some nits about it being an abstract class
depending on the future patches.
> + """An abstract class for cache that needs both changelog and obsstore
> +
> + This class handle the tracking of changelog and obsstore update. It provide
> + data to performs incremental update (see the 'updatefrom' function for
> + details). This class can also detect stripping of the changelog or the
> + obsstore and can reset the cache in this cache (see the 'clear' function
> + for details).
> +
> + For this purpose it use a cache key covering changelog and obsstore
> + content:
> +
> + The cache key parts are:
> + - tip-rev,
> + - tip-node,
> + - obsstore-length (nb markers),
> + - obsstore-file-size (in bytes),
> + - obsstore "cache key"
> + """
> +
> + # default key used for an empty cache
> + emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid)
> + _cachename = None # used for debug message
> +
> + def __init__(self):
> + super(dualsourcecache, self).__init__()
> + self._cachekey = None
> +
> + @property
> + def _tiprev(self):
> + """small utility to make the code less error prone"""
> + if self._cachekey is None:
> + return None
> + return self._cachekey[0]
> +
> + @property
> + def _obssize(self):
> + """small utility to make the code less error prone"""
> + if self._cachekey is None:
> + return None
> + return self._cachekey[3]
> +
> + def _updatefrom(self, repo, revs, obsmarkers):
> + """override this method to update your cache data incrementally
> +
> + revs: list of new revision in the changelog
> + obsmarker: list of new obsmarkers in the obsstore
> + """
> + raise NotImplementedError
Nit: could we move all the abstract parts of the interface to the top
of the class so it's easier to find what has to be implemented to get
a concrete implementation?
> +
> + def clear(self, reset=False):
> + """invalidate the cache content
> +
> + if 'reset' is passed, we detected a strip and the cache will have to be
> + recomputed.
> +
> + Subclasses MUST overide this method to actually affect the cache data.
> + """
> + if reset:
> + self._cachekey = self.emptykey if reset else None
> + else:
> + self._cachekey = None
> +
> + def load(self, repo):
> + """Load data from disk
> +
> + Subclasses MUST restore the "cachekey" attribute while doing so.
> + """
> + raise NotImplementedError
> +
> + # Useful public function (no need to override them)
> +
> + def uptodate(self, repo):
> + """return True if the cache content is up to date False otherwise
> +
> + This method can be used to detect of the cache is lagging behind new
> + data in either changelog or obsstore.
> + """
> + repo = repo.unfiltered()
> + if self._cachekey is None:
> + self.load(repo)
> + status = self._checkkey(repo.changelog, repo.obsstore)
> + return (status is not None
> + and status[0] == self._tiprev
> + and status[1] == self._obssize)
> +
> + def update(self, repo):
> + """update the cache with new repository data
> +
> + The update will be incremental when possible"""
> + repo = repo.unfiltered()
> + # If we do not have any data, try loading from disk
> + if self._cachekey is None:
> + self.load(repo)
> +
> + cl = repo.changelog
> +
> + upgrade = self._upgradeneeded(repo)
> + if upgrade is None:
> + return
> +
> + reset, revs, obsmarkers, obskeypair = upgrade
> + if reset or self._cachekey is None:
> + repo.ui.log('cache', 'strip detected, %s cache reset\n'
> + % self._cachename)
> + self.clear(reset=True)
> +
> + starttime = util.timer()
> + self._updatefrom(repo, revs, obsmarkers)
> + duration = util.timer() - starttime
> + repo.ui.log('cache', 'updated %s in %.4f seconds (%sr, %so)\n',
> + self._cachename, duration, len(revs), len(obsmarkers))
> +
> + # update the key from the new data
> + key = list(self._cachekey)
> + if revs:
> + # tiprev
> + key[0] = revs[-1]
> + # tipnode
> + key[1] = cl.node(key[0])
> + if obsmarkers:
> + # obsstore length (nb obsmarker)
> + key[2] += len(obsmarkers)
> + # obsstore size + key
> + key[3], key[4] = obskeypair
> + self._cachekey = tuple(key)
> +
> + # from here, there are internal function only
> +
> + def _checkkey(self, changelog, obsstore):
> + """key current key against changelog and obsstore
> +
> + return:
> + - None: when the key is invalid (no key, strip detect, etc),
> + - (current-tiprev, current-obsszie, current-obskey) otherwise.
> + """
> + key = self._cachekey
> + if key is None:
> + return None
> +
> + ### Is the cache valid ?
> + keytiprev, keytipnode, keyobslength, keyobssize, keyobskey = key
> + # check for changelog strip
> + tiprev = len(changelog) - 1
> + if (tiprev < keytiprev
> + or changelog.node(keytiprev) != keytipnode):
> + return None
> + # check for obsstore strip
> + obssize, obskey = obsstore.cachekey(index=keyobssize)
> + if obskey != keyobskey:
> + return None
> + if obssize != keyobssize:
> + # we want to return the obskey for the new size
> + __, obskey = obsstore.cachekey(index=obssize)
> + return tiprev, obssize, obskey
> +
> + def _upgradeneeded(self, repo):
> + """return (reset, revs, markers, (obssize, obskey)) or None
> +
> + If 'None' is returned, the cache is up to date, no upgrade are needed.
> +
> + 'reset': True if the current data must be destroyed (strip detected),
> +
> + 'revs': full list of new revisions that the cache must take in account,
> + 'markers': full list of new markers the cache must take in account,
> + '(obssize, obskey)': the new obs-cachekey that match the new markers.
> +
> + note: the new 'tiprev' can be computed using 'max(revs)' or revs[-1].
> + """
> +
> + # We need to ensure we use the same changelog and obsstore through the
> + # processing. Otherwise some invalidation could update the object and
> + # their content after we computed the cache key.
> + cl = repo.changelog
> + obsstore = repo.obsstore
> +
> + reset = False
> +
> + status = self._checkkey(cl, obsstore)
> + if status is None:
> + reset = True
> + key = self.emptykey
> + obssize, obskey = obsstore.cachekey()
> + tiprev = len(cl) - 1
> + else:
> + key = self._cachekey
> + tiprev, obssize, obskey = status
> + keytiprev = key[0]
> + keyobslength = key[2]
> + keyobssize = key[3]
> +
> + if not reset and keytiprev == tiprev and keyobssize == obssize:
> + return None # nothing to upgrade
> +
> + ### cache is valid, is there anything to update
> +
> + # any new changesets ?
> + revs = ()
> + if keytiprev < tiprev:
> + revs = list(cl.revs(start=keytiprev + 1, stop=tiprev))
> +
> + # any new markers
> + markers = ()
> + if keyobssize < obssize:
> + # XXX Three are a small race change here. Since the obsstore might
> + # have move forward between the time we computed the cache key and
> + # we access the data. To fix this we need so "up to" argument when
> + # fetching the markers here. Otherwise we might return more markers
> + # than covered by the cache key.
> + #
> + # In practice the cache target usage only update in a transaction
> + # context, with the lock taken. So we should be fine.
> +
> + # XXX we currently naively access all markers on the obsstore for
> + # the sake of simplicity. This will be updated to ways that does
> + # not requires to load the whole obsstore in the future.
> + markers = list(obsstore)
> + if keyobslength:
> + markers = markers[keyobslength:]
> +
> + return reset, revs, markers, (obssize, obskey)
> +
> # mapping of 'set-name' -> <function to compute this set>
> cachefuncs = {}
> def cachefor(name):
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list