[PATCH 1 of 2] revset: add minus revset class

Durham Goode durham at fb.com
Wed Feb 10 19:23:46 EST 2016


# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1455150122 28800
#      Wed Feb 10 16:22:02 2016 -0800
# Node ID edfc4fda0c1ca78186d9a779f0d4d0cfc6e1c03f
# Parent  c791244507faabf86b1fd71746ec5b3d0364ed7d
revset: add minus revset class

Previously, revsets like 'X - Y' were translated to be 'X and not Y'. This can
be expensive, since if Y is a single commit then 'not Y' becomes a huge set and
sometimes the query optimizer doesn't account for it well.

This patch adds a minusset smart revset class that knows how to handle the minus
operator smartly. A future patch will change the optimizer to use this instead
of changing '- Y' to be 'and not Y'.

On a large repo this saves 2.2 seconds on rebase and histedit because "X:: - X"
becomes almost instant. The future patch that actually uses the minus set will
include more detailed performance numbers.

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -3087,6 +3087,16 @@ def _iterordered(ascending, iter1, iter2
         for val in it:
             yield val
 
+def _minusiter(iter1, minusset):
+    inminus = minusset.__contains__
+    try:
+        while True:
+            r = iter1.next()
+            if not inminus(r):
+                yield r
+    except StopIteration:
+        pass
+
 class addset(abstractsmartset):
     """Represent the addition of two sets
 
@@ -3287,6 +3297,176 @@ class addset(abstractsmartset):
         d = {None: '', False: '-', True: '+'}[self._ascending]
         return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2)
 
+class minusset(abstractsmartset):
+    """Represent the subtraction of two sets
+
+    Wrapper structure for lazily subtracting two structures without losing much
+    performance on the __contains__ method
+
+    If the ascending attribute is set, that means the two structures are
+    ordered in either an ascending or descending way. Therefore, we can perform
+    the operation lazily if the results are asked for in that order.
+
+    >>> xs = baseset([1, 4, 2, 3])
+    >>> ys = baseset([1, 3])
+
+    >>> rs = minusset(xs, ys)
+    >>> bool(rs), 1 in rs, 2 in rs, 3 in rs, rs.first(), rs.last()
+    (True, False, True, False, 4, 2)
+    >>> rs = minusset(xs, baseset([]))
+    >>> bool(rs), 1 in rs, 2 in rs, rs.first(), rs.last()
+    (True, True, True, 1, 3)
+    >>> rs = minusset(baseset([]), baseset([]))
+    >>> bool(rs), 1 in rs, rs.first(), rs.last()
+    (False, False, None, None)
+
+    iterate unsorted:
+    >>> rs = minusset(xs, ys)
+    >>> [x for x in rs]  # without _genlist
+    [4, 2]
+    >>> assert not rs._genlist
+    >>> len(rs)
+    2
+    >>> [x for x in rs]  # with _genlist
+    [4, 2]
+    >>> assert rs._genlist
+
+    iterate ascending:
+    >>> rs = minusset(xs, ys, ascending=True)
+    >>> [x for x in rs], [x for x in rs.fastasc()]  # without _asclist
+    ([2, 4], [2, 4])
+    >>> assert not rs._asclist
+    >>> len(rs)
+    2
+    >>> [x for x in rs], [x for x in rs.fastasc()]
+    ([2, 4], [2, 4])
+    >>> assert rs._asclist
+
+    iterate descending:
+    >>> rs = minusset(xs, ys, ascending=False)
+    >>> [x for x in rs], [x for x in rs.fastdesc()]  # without _asclist
+    ([4, 2], [4, 2])
+    >>> assert not rs._asclist
+    >>> len(rs)
+    2
+    >>> [x for x in rs], [x for x in rs.fastdesc()]
+    ([4, 2], [4, 2])
+    >>> assert rs._asclist
+
+    iterate ascending without fastasc:
+    >>> rs = minusset(generatorset(xs), ys, ascending=True)
+    >>> assert rs.fastasc is None
+    >>> [x for x in rs]
+    [2, 4]
+
+    iterate descending without fastdesc:
+    >>> rs = minusset(generatorset(xs), ys, ascending=False)
+    >>> assert rs.fastdesc is None
+    >>> [x for x in rs]
+    [4, 2]
+    """
+    def __init__(self, revs, minusrevs, ascending=None):
+        self._revs = revs
+        self._minusrevs = minusrevs
+        self._iter = None
+        self._ascending = ascending
+        self._genlist = None
+        self._asclist = None
+
+    def __len__(self):
+        return len(self._list)
+
+    def __nonzero__(self):
+        if self._genlist:
+            return True
+        for r in self:
+            return True
+        return False
+
+    @util.propertycache
+    def _list(self):
+        if not self._genlist:
+            self._genlist = baseset(iter(self))
+        return self._genlist
+
+    def __iter__(self):
+        if self._ascending is None:
+            if self._genlist:
+                return iter(self._genlist)
+            return _minusiter(iter(self._revs), self._minusrevs)
+
+        # try to use our own fast iterator if it exists
+        self._trysetasclist()
+        if self._ascending:
+            attr = 'fastasc'
+        else:
+            attr = 'fastdesc'
+        it = getattr(self, attr)
+        if it is not None:
+            return it()
+
+        it = iter(sorted(self._revs, reverse=not self._ascending))
+        return _minusiter(it, self._minusrevs)
+
+    def _trysetasclist(self):
+        """populate the _asclist attribute if possible and necessary"""
+        if self._genlist is not None and self._asclist is None:
+            self._asclist = sorted(self._genlist)
+
+    @property
+    def fastasc(self):
+        self._trysetasclist()
+        if self._asclist is not None:
+            return self._asclist.__iter__
+        iter1 = self._revs.fastasc
+        if iter1 is None:
+            return None
+        return lambda: _minusiter(iter1(), self._minusrevs)
+
+    @property
+    def fastdesc(self):
+        self._trysetasclist()
+        if self._asclist is not None:
+            return self._asclist.__reversed__
+        iter1 = self._revs.fastdesc
+        if iter1 is None:
+            return None
+        return lambda: _minusiter(iter1(), self._minusrevs)
+
+    def __contains__(self, x):
+        return x in self._revs and x not in self._minusrevs
+
+    def sort(self, reverse=False):
+        self._ascending = not reverse
+
+    def isascending(self):
+        return self._ascending is not None and self._ascending
+
+    def isdescending(self):
+        return self._ascending is not None and not self._ascending
+
+    def reverse(self):
+        if self._ascending is None:
+            self._list.reverse()
+        else:
+            self._ascending = not self._ascending
+
+    def first(self):
+        for x in self:
+            return x
+        return None
+
+    def last(self):
+        self.reverse()
+        val = self.first()
+        self.reverse()
+        return val
+
+    def __repr__(self):
+        d = {None: '', False: '-', True: '+'}[self._ascending]
+        return '<%s%s %r, %r>' % (type(self).__name__, d, self._revs,
+                                  self._minusrevs)
+
 class generatorset(abstractsmartset):
     """Wrap a generator for lazy iteration
 


More information about the Mercurial-devel mailing list