D1780: smartset: split generatorset classes to avoid cycle

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Dec 27 18:25:10 UTC 2017


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  I uncovered a cycle manifesting in a memory leak by running
  `hgperfrevset '::tip'`. The cycle was due to generatorset.__init__
  assigning a bound method to self.__contains__. Internet sleuthing
  revealed that assigning a bound method to an instance attribute
  always creates a cycle.
  
  This commit creates two new variants of generatorset for the special
  cases of ascending and descending generators. The special
  implementations of __contains__ have been extracted to these classes
  where they are defined as __contains__.
  
  generatorset now implements __new__ and changes the spawned type to
  one of the new classes if needed.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1780

AFFECTED FILES
  mercurial/smartset.py
  tests/test-revset.t
  tests/test-revset2.t

CHANGE DETAILS

diff --git a/tests/test-revset2.t b/tests/test-revset2.t
--- a/tests/test-revset2.t
+++ b/tests/test-revset2.t
@@ -152,7 +152,7 @@
   * set:
   <addset
     <baseset- [1, 3, 5]>,
-    <generatorset+>>
+    <generatorsetdesc+>>
   5
   3
   1
@@ -174,7 +174,7 @@
             (symbol '5'))))))
   * set:
   <addset+
-    <generatorset+>,
+    <generatorsetdesc+>,
     <baseset- [1, 3, 5]>>
   0
   1
@@ -927,7 +927,7 @@
       (symbol 'merge')
       None))
   * set:
-  <generatorset+>
+  <generatorsetasc+>
   6
   7
 
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -1484,7 +1484,7 @@
   $ hg debugrevspec -s 'last(0::)'
   * set:
   <baseset slice=0:1
-    <generatorset->>
+    <generatorsetasc->>
   9
   $ hg identify -r '0::' --num
   9
diff --git a/mercurial/smartset.py b/mercurial/smartset.py
--- a/mercurial/smartset.py
+++ b/mercurial/smartset.py
@@ -772,6 +772,16 @@
     >>> xs.last()  # cached
     4
     """
+    def __new__(cls, gen, iterasc=None):
+        if iterasc is None:
+            typ = cls
+        elif iterasc:
+            typ = generatorsetasc
+        else:
+            typ = generatorsetdesc
+
+        return super(generatorset, cls).__new__(typ)
+
     def __init__(self, gen, iterasc=None):
         """
         gen: a generator producing the values for the generatorset.
@@ -782,13 +792,6 @@
         self._genlist = []
         self._finished = False
         self._ascending = True
-        if iterasc is not None:
-            if iterasc:
-                self.fastasc = self._iterator
-                self.__contains__ = self._asccontains
-            else:
-                self.fastdesc = self._iterator
-                self.__contains__ = self._desccontains
 
     def __nonzero__(self):
         # Do not use 'for r in self' because it will enforce the iteration
@@ -814,36 +817,6 @@
         self._cache[x] = False
         return False
 
-    def _asccontains(self, x):
-        """version of contains optimised for ascending generator"""
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-            if l > x:
-                break
-
-        self._cache[x] = False
-        return False
-
-    def _desccontains(self, x):
-        """version of contains optimised for descending generator"""
-        if x in self._cache:
-            return self._cache[x]
-
-        # Use new values only, as existing values would be cached.
-        for l in self._consumegen():
-            if l == x:
-                return True
-            if l < x:
-                break
-
-        self._cache[x] = False
-        return False
-
     def __iter__(self):
         if self._ascending:
             it = self.fastasc
@@ -949,6 +922,44 @@
         d = {False: '-', True: '+'}[self._ascending]
         return '<%s%s>' % (type(self).__name__, d)
 
+class generatorsetasc(generatorset):
+    """Special case of generatorset optimized for ascending generators."""
+
+    fastasc = generatorset._iterator
+
+    def __contains__(self, x):
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+            if l > x:
+                break
+
+        self._cache[x] = False
+        return False
+
+class generatorsetdesc(generatorset):
+    """Special case of generatorset optimized for descending generators."""
+
+    fastdesc = generatorset._iterator
+
+    def __contains__(self, x):
+        if x in self._cache:
+            return self._cache[x]
+
+        # Use new values only, as existing values would be cached.
+        for l in self._consumegen():
+            if l == x:
+                return True
+            if l < x:
+                break
+
+        self._cache[x] = False
+        return False
+
 def spanset(repo, start=0, end=None):
     """Create a spanset that represents a range of repository revisions
 



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list