[PATCH 1 of 4] revset: added lazyset class to return values as soon as they are computed

Lucas Moscovicz lmoscovicz at fb.com
Thu Feb 6 18:02:39 UTC 2014


# HG changeset patch
# User Lucas Moscovicz <lmoscovicz at fb.com>
# Date 1390951154 28800
#      Tue Jan 28 15:19:14 2014 -0800
# Node ID ebd119f34a845b1a92462291e025879f80f4b2e7
# Parent  19b9ecbf916636b9bbb74cb9e14b5f20aad39eb0
revset: added lazyset class to return values as soon as they are computed

This class allows us to return values from large revsets as soon as they are
computed instead of having to wait for the entire revset to be calculated.

diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1489,6 +1489,8 @@
         # returns the revision matching A then the revision matching B. Sort
         # again to fix that.
         revs = matcher(repo, revs)
+        if isinstance(revs, revset.lazyset):
+            revs = revset.baseset(revs)
         revs.sort(reverse=True)
     if limit is not None:
         revs = revs[:limit]
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -194,7 +194,10 @@
 def getset(repo, subset, x):
     if not x:
         raise error.ParseError(_("missing argument"))
-    return baseset(methods[x[0]](repo, subset, *x[1:]))
+    s = methods[x[0]](repo, subset, *x[1:])
+    if isinstance(s, lazyset):
+        return s
+    return baseset(s)
 
 def _getrevsource(repo, r):
     extra = repo[r].extra()
@@ -210,6 +213,8 @@
 
 def stringset(repo, subset, x):
     x = repo[x].rev()
+    if isinstance(subset, lazyset):
+        subset = baseset(subset)
     if x == -1 and len(subset) == len(repo):
         return baseset([-1])
     if len(subset) == len(repo) or x in subset:
@@ -226,6 +231,11 @@
     m = getset(repo, cl, x)
     n = getset(repo, cl, y)
 
+    if isinstance(m, lazyset):
+        m = baseset(m)
+    if isinstance(n, lazyset):
+        n = baseset(n)
+
     if not m or not n:
         return baseset([])
     m, n = m[0], n[-1]
@@ -234,14 +244,16 @@
         r = range(m, n + 1)
     else:
         r = range(m, n - 1, -1)
-    s = subset.set()
+    if not isinstance(subset, lazyset):
+        s = subset.set()
     return baseset([x for x in r if x in s])
 
 def dagrange(repo, subset, x, y):
     r = baseset(repo)
     xs = _revsbetween(repo, getset(repo, r, x), getset(repo, r, y))
-    s = subset.set()
-    return baseset([r for r in xs if r in s])
+    if not isinstance(subset, lazyset):
+        subset = subset.set()
+    return baseset([r for r in xs if r in subset])
 
 def andset(repo, subset, x, y):
     return getset(repo, getset(repo, subset, x), y)
@@ -249,7 +261,7 @@
 def orset(repo, subset, x, y):
     xl = getset(repo, subset, x)
     yl = getset(repo, subset - xl, y)
-    return baseset(xl + yl)
+    return xl + yl
 
 def notset(repo, subset, x):
     return subset - getset(repo, subset, x)
@@ -305,6 +317,8 @@
     if not args:
         return baseset([])
     s = set(_revancestors(repo, args, followfirst)) | set(args)
+    if isinstance(subset, lazyset):
+        subset = baseset(subset)
     ss = subset.set()
     return baseset([r for r in ss if r in s])
 
@@ -334,8 +348,7 @@
         for i in range(n):
             r = cl.parentrevs(r)[0]
         ps.add(r)
-    s = subset.set()
-    return baseset([r for r in s if r in ps])
+    return baseset([r for r in subset if r in ps])
 
 def author(repo, subset, x):
     """``author(string)``
@@ -362,7 +375,8 @@
     # i18n: "bisect" is a keyword
     status = getstring(x, _("bisect requires a string")).lower()
     state = set(hbisect.get(repo, status))
-    s = subset.set()
+    if not isinstance(subset, lazyset):
+        s = subset.set()
     return baseset([r for r in s if r in state])
 
 # Backward-compatibility
@@ -406,7 +420,8 @@
 
     bms = set([repo[r].rev()
                for r in repo._bookmarks.values()])
-    s = subset.set()
+    if not isinstance(subset, lazyset):
+        s = subset.set()
     return baseset([r for r in s if r in bms])
 
 def branch(repo, subset, x):
@@ -512,7 +527,10 @@
     """``children(set)``
     Child changesets of changesets in set.
     """
-    s = getset(repo, baseset(repo), x).set()
+    s = getset(repo, baseset(repo), x)
+    if isinstance(s, lazyset):
+        s = baseset(s)
+    s = s.set()
     cs = _children(repo, subset, s)
     return subset & cs
 
@@ -594,6 +612,8 @@
 
 def _descendants(repo, subset, x, followfirst=False):
     args = getset(repo, baseset(repo), x)
+    if isinstance(args, lazyset):
+        args = baseset(args)
     if not args:
         return baseset([])
     s = set(_revdescendants(repo, args, followfirst)) | set(args)
@@ -618,9 +638,12 @@
     is the same as passing all().
     """
     if x is not None:
-        args = getset(repo, baseset(repo), x).set()
+        args = getset(repo, baseset(repo), x)
     else:
-        args = getall(repo, baseset(repo), x).set()
+        args = getall(repo, baseset(repo), x)
+
+    if not isinstance(args, lazyset):
+        args = args.set()
 
     dests = set()
 
@@ -936,6 +959,8 @@
     except (TypeError, ValueError):
         # i18n: "limit" is a keyword
         raise error.ParseError(_("limit expects a number"))
+    if isinstance(subset, lazyset):
+        subset = baseset(subset)
     ss = subset.set()
     os = getset(repo, baseset(repo), l[0])[:lim]
     return baseset([r for r in os if r in ss])
@@ -954,8 +979,13 @@
     except (TypeError, ValueError):
         # i18n: "last" is a keyword
         raise error.ParseError(_("last expects a number"))
+    if isinstance(subset, lazyset):
+        subset = baseset(subset)
     ss = subset.set()
-    os = getset(repo, baseset(repo), l[0])[-lim:]
+    os = getset(repo, baseset(repo), l[0])
+    if isinstance(os, lazyset):
+        os = baseset(os)
+    os = os[-lim:]
     return baseset([r for r in os if r in ss])
 
 def maxrev(repo, subset, x):
@@ -963,6 +993,8 @@
     Changeset with highest revision number in set.
     """
     os = getset(repo, baseset(repo), x)
+    if isinstance(os, lazyset):
+        os = baseset(os)
     if os:
         m = max(os)
         if m in subset:
@@ -1000,6 +1032,8 @@
     Changeset with lowest revision number in set.
     """
     os = getset(repo, baseset(repo), x)
+    if isinstance(os, lazyset):
+        os = baseset(os)
     if os:
         m = min(os)
         if m in subset:
@@ -1049,9 +1083,12 @@
     for the first operation is selected.
     """
     if x is not None:
-        args = getset(repo, baseset(repo), x).set()
+        args = getset(repo, baseset(repo), x)
     else:
-        args = getall(repo, baseset(repo), x).set()
+        args = getall(repo, baseset(repo), x)
+
+    if not isinstance(args, lazyset):
+        args = args.set()
 
     def _firstsrc(rev):
         src = _getrevsource(repo, rev)
@@ -1090,6 +1127,8 @@
     repo.ui.popbuffer()
     cl = repo.changelog
     o = set([cl.rev(r) for r in outgoing.missing])
+    if isinstance(subset, lazyset):
+        subset = baseset(subset)
     s = subset.set()
     return baseset([r for r in s if r in o])
 
@@ -1367,7 +1406,9 @@
     """``roots(set)``
     Changesets in set with no parent changeset in set.
     """
-    s = getset(repo, baseset(repo.changelog), x).set()
+    s = getset(repo, baseset(repo.changelog), x)
+    if not isinstance(s, lazyset):
+        s = s.set()
     subset = baseset([r for r in subset if r in s])
     cs = _children(repo, subset, s)
     return subset - cs
@@ -1534,6 +1575,8 @@
     if not s:
         return baseset([])
     ls = [repo[r].rev() for r in s.split('\0')]
+    if isinstance(subset, lazyset):
+        subset = baseset(subset)
     s = subset.set()
     return baseset([r for r in ls if r in s])
 
@@ -2052,5 +2095,38 @@
             x = x.set()
         return baseset([y for y in s if y in x])
 
+    def __add__(self, x):
+        s = self.set()
+        l = [r for r in x if r not in s]
+        return baseset(list(self) + l)
+
+class lazyset(object):
+    def __init__(self, subset, condition):
+        self._subset = subset
+        self._condition = condition
+        self._baseset = None
+
+    def reverse(self):
+        self._subset.reverse()
+
+    def __contains__(self, x):
+        return x in self._subset and self._condition(x)
+
+    def __and__(self, x):
+        return lazyset(self, lambda r: r in x)
+
+    def __sub__(self, x):
+        return lazyset(self, lambda r: r not in x)
+
+    def __add__(self, x):
+        l = baseset([r for r in self])
+        return l + baseset(x)
+
+    def __iter__(self):
+        cond = self._condition
+        for x in self._subset:
+            if cond(x):
+                yield x
+
 # tell hggettext to extract docstrings from these functions:
 i18nfunctions = symbols.values()


More information about the Mercurial-devel mailing list