[PATCH PoC] filecache: unimplement __set__() and __delete__() DO NOT PUSH

Yuya Nishihara yuya at tcha.org
Sat Oct 20 11:01:12 UTC 2018


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1540025760 -32400
#      Sat Oct 20 17:56:00 2018 +0900
# Node ID d8f825e87202c31a8e65b41c405e804b2e25195b
# Parent  a9e303dcd1e1f22e9930fe745aee21674cf209c0
filecache: unimplement __set__() and __delete__() DO NOT PUSH

Implementing __set__() implies that the descriptor can't be overridden by
obj.__dict__, which means any property access involves slow function call.

  "Data descriptors with __set__() and __get__() defined always override
  a redefinition in an instance dictionary. In contrast, non-data descriptors
  can be overridden by instances."

  https://docs.python.org/2.7/reference/datamodel.html#invoking-descriptors

This patch basically backs out 236bb604dc39, "scmutil: update cached copy
when filecached attribute is assigned (issue3263)." The problem described
in issue3263 (which is #3264 in Bugzilla) should no longer happen since
repo._bookmarkcurrent has been moved to repo._bookmarks.active. We still
have a risk of introducing similar bugs, but I think that's the cost we
have to pay.

  $ hg perfrevset 'branch(tip)' -R mercurial
  (orig) wall 0.134997 comb 0.130000 user 0.130000 sys 0.000000 (best of 69)
  (this) wall 0.099038 comb 0.110000 user 0.100000 sys 0.010000 (best of 93)

TODO: fix test-filecache.py failure

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -318,6 +318,9 @@ class dirstate(object):
 
     def setbranch(self, branch):
         self._branch = encoding.fromlocal(branch)
+        ce = self._filecache.get('_branch')
+        if ce:
+            ce.obj = self._branch
         f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
         try:
             f.write(self._branch + '\n')
@@ -325,7 +328,6 @@ class dirstate(object):
 
             # make sure filecache has the correct stat info for _branch after
             # replacing the underlying file
-            ce = self._filecache['_branch']
             if ce:
                 ce.refresh()
         except: # re-raises
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -91,11 +91,12 @@ class _basefilecache(scmutil.filecache):
     def __get__(self, repo, type=None):
         if repo is None:
             return self
-        return super(_basefilecache, self).__get__(repo.unfiltered(), type)
-    def __set__(self, repo, value):
-        return super(_basefilecache, self).__set__(repo.unfiltered(), value)
-    def __delete__(self, repo):
-        return super(_basefilecache, self).__delete__(repo.unfiltered())
+        # filtered repo has no entry in its __dict__
+        unfi = repo.unfiltered()
+        try:
+            return unfi.__dict__[self.sname]
+        except KeyError:
+            return super(_basefilecache, self).__get__(unfi, type)
 
 class repofilecache(_basefilecache):
     """filecache for files in .hg but outside of .hg/store"""
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -1247,16 +1247,14 @@ class filecache(object):
     results cached. The decorated function is called. The results are stashed
     away in a ``_filecache`` dict on the object whose method is decorated.
 
-    On subsequent access, the cached result is returned.
-
-    On external property set operations, stat() calls are performed and the new
-    value is cached.
+    On subsequent access, the cached result is used as it is set to the
+    instance dictionary.
 
-    On property delete operations, cached data is removed.
+    On external property set/delete operations, the caller must update the
+    corresponding _filecache entry appropriately.
 
-    When using the property API, cached data is always returned, if available:
-    no stat() is performed to check if the file has changed and if the function
-    needs to be called to reflect file changes.
+    When using the property API, the cached data is always used if available.
+    No stat() is performed to check if the file has changed.
 
     Others can muck about with the state of the ``_filecache`` dict. e.g. they
     can populate an entry before the property's getter is called. In this case,
@@ -1289,10 +1287,8 @@ class filecache(object):
         # if accessed on the class, return the descriptor itself.
         if obj is None:
             return self
-        # do we need to check if the file changed?
-        if self.sname in obj.__dict__:
-            assert self.name in obj._filecache, self.name
-            return obj.__dict__[self.sname]
+
+        assert self.sname not in obj.__dict__
 
         entry = obj._filecache.get(self.name)
 
@@ -1312,24 +1308,8 @@ class filecache(object):
         obj.__dict__[self.sname] = entry.obj
         return entry.obj
 
-    def __set__(self, obj, value):
-        if self.name not in obj._filecache:
-            # we add an entry for the missing value because X in __dict__
-            # implies X in _filecache
-            paths = [self.join(obj, path) for path in self.paths]
-            ce = filecacheentry(paths, False)
-            obj._filecache[self.name] = ce
-        else:
-            ce = obj._filecache[self.name]
-
-        ce.obj = value # update cached copy
-        obj.__dict__[self.sname] = value # update copy returned by obj.x
-
-    def __delete__(self, obj):
-        try:
-            del obj.__dict__[self.sname]
-        except KeyError:
-            raise AttributeError(self.sname)
+    # don't implement __set__(), which would make __dict__ lookup as slow as
+    # Python function call.
 
 def extdatasource(repo, source):
     """Gather a map of rev -> value dict from the specified source


More information about the Mercurial-devel mailing list