[PATCH 01 of 14] cache: introduce an abstract class for cache we can upgrade incrementally

Boris Feld boris.feld at octobus.net
Sun Jul 9 17:55:13 UTC 2017


# HG changeset patch
# User Boris Feld <boris.feld at octobus.net>
# Date 1499458441 -7200
#      Fri Jul 07 22:14:01 2017 +0200
# Node ID 6edb62505c697329de034c2fdc47befd5896f31f
# Parent  89796a25d4bb91fb418ad3e70faad2c586902ffb
# EXP-Topic obs-cache
cache: introduce an abstract class for cache we can upgrade incrementally

Right now each class implements their own mechanism for validation, and
update. We start introducing abstract class to ultimately allow more
unification of the cache code.

The end goal of this series is to introduce a cache for some obsolescence
property, not to actually implement the cache. However, taking advantage of
adding a new cache to introduce the abstract class seems like a win.

diff -r 89796a25d4bb -r 6edb62505c69 mercurial/cache.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/cache.py	Fri Jul 07 22:14:01 2017 +0200
@@ -0,0 +1,127 @@
+# cache.py - utilities for caching
+#
+# Copyright 2017 Octobus SAS <contact at octobus.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+
+import abc
+import struct
+
+from . import (
+    util,
+)
+
+class incrementalcachebase(object):
+    """base class for incremental cache from append only source
+
+    There are multiple append only data source we might want to cache
+    computation from. One of the common pattern is to track the state of the
+    file and update the cache with the extra data (eg: branchmap-cache tracking
+    changelog). This pattern also needs to detect when a the source is striped
+
+    The overall pattern is similar whatever the actual source is. This class
+    introduces the basic patterns.
+    """
+
+    __metaclass__ = abc.ABCMeta
+
+    # default key used for an empty cache
+    emptykey = ()
+
+    _cachekeyspec = '' # used for serialization
+    _cachename = None # used for debug message
+
+    @abc.abstractmethod
+    def __init__(self):
+        super(incrementalcachebase, self).__init__()
+        self._cachekey = None
+
+    @util.propertycache
+    def _cachekeystruct(self):
+        # dynamic property to help subclass to change it
+         return struct.Struct('>' + self._cachekeyspec)
+
+    @util.propertycache
+    def _cachekeysize(self):
+        # dynamic property to help subclass to change it
+        return self._cachekeystruct.size
+
+    @abc.abstractmethod
+    def _updatefrom(self, repo, data):
+        """override this method to update you date from incrementally read data.
+
+        Content of <data> will depends of the sources.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def clear(self, reset=False):
+        """invalidate the cache content
+
+        if 'reset' is passed, we detected a strip and the cache will have to be
+        recomputed.
+
+        Subclasses MUST overide this method to actually affect the cache data.
+        """
+        if reset:
+            self._cachekey = self.emptykey if reset else None
+        else:
+            self._cachekey = None
+
+    @abc.abstractmethod
+    def load(self, repo):
+        """Load data from disk
+
+        Subclasses MUST restore the "cachekey" attribute while doing so.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _fetchupdatedata(self, repo):
+        """Check the source for possible changes and return necessary data
+
+        The return is a tree elements tuple: reset, data, cachekey
+
+        * reset: `True` when a strip is detected and cache need to be reset
+        * data: new data to take in account, actual type depends of the source
+        * cachekey: the cache key covering <data> and precious covered data
+        """
+        raise NotImplementedError
+
+    # Useful "public" function (no need to override them)
+
+    def update(self, repo):
+        """update the cache with new repository data
+
+        The update will be incremental when possible"""
+        repo = repo.unfiltered()
+
+        # If we do not have any data, try loading from disk
+        if self._cachekey is None:
+            self.load(repo)
+
+        reset, data, newkey = self._fetchupdatedata(repo)
+        if newkey == self._cachekey:
+            return
+        if reset or self._cachekey is None:
+            repo.ui.log('cache', 'strip detected, %s cache reset\n'
+                        % self._cachename)
+            self.clear(reset=True)
+
+        starttime = util.timer()
+        self._updatefrom(repo, data)
+        duration = util.timer() - starttime
+        repo.ui.log('cache', 'updated %s in %.4f seconds\n',
+                    self._cachename, duration)
+
+        self._cachekey = newkey
+
+    def _serializecachekey(self):
+        """provide a bytes version of the cachekey"""
+        return self._cachekeystruct.pack(*self._cachekey)
+
+    def _deserializecachekey(self, data):
+        """read the cachekey from bytes"""
+        return self._cachekeystruct.unpack(data)


More information about the Mercurial-devel mailing list