[PATCH 2 of 3] filecache: unimplement __set__() and __delete__() (API)

Yuya Nishihara yuya at tcha.org
Tue Oct 23 08:43:27 EDT 2018


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1540025760 -32400
#      Sat Oct 20 17:56:00 2018 +0900
# Node ID 759c7efd7c9132ce6dac5bbd9a37e70acb420d24
# Parent  6a3a42dfcdd0ccb628ae3b8f3129b26db32a7ff0
filecache: unimplement __set__() and __delete__() (API)

Implementing __set__() implies that the descriptor can't be overridden by
obj.__dict__, which means any property access involves slow function call.

  "Data descriptors with __set__() and __get__() defined always override
  a redefinition in an instance dictionary. In contrast, non-data descriptors
  can be overridden by instances."

  https://docs.python.org/2.7/reference/datamodel.html#invoking-descriptors

This patch basically backs out 236bb604dc39, "scmutil: update cached copy
when filecached attribute is assigned (issue3263)." The problem described
in issue3263 (which is #3264 in Bugzilla) should no longer happen since
repo._bookmarkcurrent has been moved to repo._bookmarks.active. We still
have a risk of introducing similar bugs, but I think that's the cost we
have to pay.

  $ hg perfrevset 'branch(tip)' -R mercurial
  (orig) wall 0.139511 comb 0.140000 user 0.140000 sys 0.000000 (best of 66)
  (prev) wall 0.114195 comb 0.110000 user 0.110000 sys 0.000000 (best of 81)
  (this) wall 0.099038 comb 0.110000 user 0.100000 sys 0.010000 (best of 93)

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -317,7 +317,7 @@ class dirstate(object):
         return copies
 
     def setbranch(self, branch):
-        self._branch = encoding.fromlocal(branch)
+        self.__class__._branch.set(self, encoding.fromlocal(branch))
         f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
         try:
             f.write(self._branch + '\n')
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -91,17 +91,16 @@ class _basefilecache(scmutil.filecache):
     def __get__(self, repo, type=None):
         if repo is None:
             return self
-        # inlined the fast path as the cost of function call matters
+        # proxy to unfiltered __dict__ since filtered repo has no entry
         unfi = repo.unfiltered()
         try:
             return unfi.__dict__[self.sname]
         except KeyError:
             pass
         return super(_basefilecache, self).__get__(unfi, type)
-    def __set__(self, repo, value):
-        return super(_basefilecache, self).__set__(repo.unfiltered(), value)
-    def __delete__(self, repo):
-        return super(_basefilecache, self).__delete__(repo.unfiltered())
+
+    def set(self, repo, value):
+        return super(_basefilecache, self).set(repo.unfiltered(), value)
 
 class repofilecache(_basefilecache):
     """filecache for files in .hg but outside of .hg/store"""
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -1249,16 +1249,15 @@ class filecache(object):
     results cached. The decorated function is called. The results are stashed
     away in a ``_filecache`` dict on the object whose method is decorated.
 
-    On subsequent access, the cached result is returned.
-
-    On external property set operations, stat() calls are performed and the new
-    value is cached.
+    On subsequent access, the cached result is used as it is set to the
+    instance dictionary.
 
-    On property delete operations, cached data is removed.
+    On external property set/delete operations, the caller must update the
+    corresponding _filecache entry appropriately. Use __class__.<attr>.set()
+    instead of directly setting <attr>.
 
-    When using the property API, cached data is always returned, if available:
-    no stat() is performed to check if the file has changed and if the function
-    needs to be called to reflect file changes.
+    When using the property API, the cached data is always used if available.
+    No stat() is performed to check if the file has changed.
 
     Others can muck about with the state of the ``_filecache`` dict. e.g. they
     can populate an entry before the property's getter is called. In this case,
@@ -1291,11 +1290,8 @@ class filecache(object):
         # if accessed on the class, return the descriptor itself.
         if obj is None:
             return self
-        # do we need to check if the file changed?
-        try:
-            return obj.__dict__[self.sname]
-        except KeyError:
-            pass
+
+        assert self.sname not in obj.__dict__
 
         entry = obj._filecache.get(self.name)
 
@@ -1315,7 +1311,10 @@ class filecache(object):
         obj.__dict__[self.sname] = entry.obj
         return entry.obj
 
-    def __set__(self, obj, value):
+    # don't implement __set__(), which would make __dict__ lookup as slow as
+    # function call.
+
+    def set(self, obj, value):
         if self.name not in obj._filecache:
             # we add an entry for the missing value because X in __dict__
             # implies X in _filecache
@@ -1328,12 +1327,6 @@ class filecache(object):
         ce.obj = value # update cached copy
         obj.__dict__[self.sname] = value # update copy returned by obj.x
 
-    def __delete__(self, obj):
-        try:
-            del obj.__dict__[self.sname]
-        except KeyError:
-            raise AttributeError(self.sname)
-
 def extdatasource(repo, source):
     """Gather a map of rev -> value dict from the specified source
 
diff --git a/tests/test-filecache.py b/tests/test-filecache.py
--- a/tests/test-filecache.py
+++ b/tests/test-filecache.py
@@ -177,7 +177,7 @@ def test_filecache_synced():
 def setbeforeget(repo):
     os.remove('x')
     os.remove('y')
-    repo.cached = 'string set externally'
+    repo.__class__.cached.set(repo, 'string set externally')
     repo.invalidate()
     print("* neither file exists")
     print(repo.cached)
@@ -188,7 +188,7 @@ def setbeforeget(repo):
     print("* file x created")
     print(repo.cached)
 
-    repo.cached = 'string 2 set externally'
+    repo.__class__.cached.set(repo, 'string 2 set externally')
     repo.invalidate()
     print("* string set externally again")
     print(repo.cached)


More information about the Mercurial-devel mailing list