[PATCH 1 of 4] revset: make addset able to chain more than two ordered iterators

Yuya Nishihara yuya at tcha.org
Tue May 19 07:18:18 CDT 2015


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1431755656 -32400
#      Sat May 16 14:54:16 2015 +0900
# Node ID e71e42ed67100df4d69386ba8e8ce7f0767638db
# Parent  08d1ef09ed371bfa6982ea67f6c92b495d42c520
revset: make addset able to chain more than two ordered iterators

This will allow us to handle chained OR operations in one addset instance. It
can reduce the stack depth from O(n) to O(log(n)). I've tried naive O(1)-depth
implementation using an extra loop and a list/dict, but it turned out slower
than this version.

0) 1ef96a3b8b89
1) drop redundant filteredset from right-hand side set of "or" operation
2) unnested OR ops, with naive O(1)-depth _iterordered() using a dict
   (previous patch series)
3) unnested OR ops, with partitioned recursive _iterordered() (this series)

revset #0: sort(0:1099 + 1000:2099 + 2000:3099 + 3000:4099 + 4000:5099)
0) wall 0.013344 comb 0.010000 user 0.010000 sys 0.000000 (best of 191)
1) wall 0.003944 comb 0.010000 user 0.010000 sys 0.000000 (best of 690)
2) wall 0.004670 comb 0.000000 user 0.000000 sys 0.000000 (best of 580)
3) wall 0.002555 comb 0.010000 user 0.010000 sys 0.000000 (best of 1063)

self._r1 and _r2 will be removed later.

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2954,7 +2954,7 @@ class filteredset(abstractsmartset):
     def __repr__(self):
         return '<%s %r>' % (type(self).__name__, self._subset)
 
-def _iterordered(ascending, iter1, iter2):
+def _iterorderedpair(ascending, iter1, iter2):
     """produce an ordered iteration from two iterators with the same order
 
     The ascending is used to indicated the iteration direction.
@@ -2990,6 +2990,19 @@ def _iterordered(ascending, iter1, iter2
         for val in it:
             yield val
 
+def _iterordered(ascending, iters):
+    """produce an ordered iteration from iterators with the same order"""
+    assert iters
+    if len(iters) == 1:
+        return iters[0]
+    elif len(iters) == 2:
+        return _iterorderedpair(ascending, *iters)
+    else:
+        p = len(iters) // 2
+        iter1 = _iterordered(ascending, iters[:p])
+        iter2 = _iterordered(ascending, iters[p:])
+        return _iterorderedpair(ascending, iter1, iter2)
+
 class addset(abstractsmartset):
     """Represent the addition of two sets
 
@@ -3061,6 +3074,7 @@ class addset(abstractsmartset):
     def __init__(self, revs1, revs2, ascending=None):
         self._r1 = revs1
         self._r2 = revs2
+        self._subsets = [revs1, revs2]
         self._iter = None
         self._ascending = ascending
         self._genlist = None
@@ -3109,21 +3123,15 @@ class addset(abstractsmartset):
         if it is not None:
             return it()
         # maybe half of the component supports fast
-        # get iterator for _r1
-        iter1 = getattr(self._r1, attr)
-        if iter1 is None:
-            # let's avoid side effect (not sure it matters)
-            iter1 = iter(sorted(self._r1, reverse=not self._ascending))
-        else:
-            iter1 = iter1()
-        # get iterator for _r2
-        iter2 = getattr(self._r2, attr)
-        if iter2 is None:
-            # let's avoid side effect (not sure it matters)
-            iter2 = iter(sorted(self._r2, reverse=not self._ascending))
-        else:
-            iter2 = iter2()
-        return _iterordered(self._ascending, iter1, iter2)
+        iters = []
+        for rs in self._subsets:
+            it = getattr(rs, attr)
+            if it is None:
+                # let's avoid side effect (not sure it matters)
+                iters.append(iter(sorted(rs, reverse=not self._ascending)))
+            else:
+                iters.append(it())
+        return _iterordered(self._ascending, iters)
 
     def _trysetasclist(self):
         """populate the _asclist attribute if possible and necessary"""
@@ -3135,22 +3143,20 @@ class addset(abstractsmartset):
         self._trysetasclist()
         if self._asclist is not None:
             return self._asclist.__iter__
-        iter1 = self._r1.fastasc
-        iter2 = self._r2.fastasc
-        if None in (iter1, iter2):
+        iterfuncs = [rs.fastasc for rs in self._subsets]
+        if None in iterfuncs:
             return None
-        return lambda: _iterordered(True, iter1(), iter2())
+        return lambda: _iterordered(True, [it() for it in iterfuncs])
 
     @property
     def fastdesc(self):
         self._trysetasclist()
         if self._asclist is not None:
             return self._asclist.__reversed__
-        iter1 = self._r1.fastdesc
-        iter2 = self._r2.fastdesc
-        if None in (iter1, iter2):
+        iterfuncs = [rs.fastdesc for rs in self._subsets]
+        if None in iterfuncs:
             return None
-        return lambda: _iterordered(False, iter1(), iter2())
+        return lambda: _iterordered(False, [it() for it in iterfuncs])
 
     def __contains__(self, x):
         return x in self._r1 or x in self._r2


More information about the Mercurial-devel mailing list