[PATCH 4 of 4] revset: reduce nesting of chained 'or' operations (issue4624)

Yuya Nishihara yuya at tcha.org
Tue May 26 10:00:22 CDT 2015


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1430039628 -32400
#      Sun Apr 26 18:13:48 2015 +0900
# Node ID 2b8ae08b9352bfe45aad999059385b5a026e56b6
# Parent  f8cd0e0ceea631cb6d22b324bfeeb7fecee426fb
revset: reduce nesting of chained 'or' operations (issue4624)

This reduces the stack depth of chained 'or' operations:
 - from O(n) to O(1) at the parsing, alias expansion and optimization phases
 - from O(n) to O(log(n)) at the evaluation phase

simplifyinfixops() must be applied immediately after the parsing phase.
Otherwise, alias expansion would crash by "maximum recursion depth exceeded"
error.

Test cases use 'x:y|y:z' instead of 'x|y' because I'm planning to optimize
'x|y' in a different way.

Benchmarks:

0) 605b1d32c1c0
1) this patch

revset #0: 0 + 1 + 2 + ... + 200
0) wall 0.026347 comb 0.030000 user 0.030000 sys 0.000000 (best of 101)
1) wall 0.023858 comb 0.030000 user 0.030000 sys 0.000000 (best of 112)

revset #1: 0 + 1 + 2 + ... + 1000
0) maximum recursion depth exceeded
1) wall 0.483341 comb 0.480000 user 0.480000 sys 0.000000 (best of 20)

revset #2: sort(0 + 1 + 2 + ... + 200)
0) wall 0.013404 comb 0.010000 user 0.010000 sys 0.000000 (best of 196)
1) wall 0.006814 comb 0.010000 user 0.010000 sys 0.000000 (best of 375)

revset #3: sort(0 + 1 + 2 + ... + 1000)
0) maximum recursion depth exceeded
1) wall 0.035240 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -356,10 +356,9 @@ def dagrange(repo, subset, x, y):
 def andset(repo, subset, x, y):
     return getset(repo, getset(repo, subset, x), y)
 
-def orset(repo, subset, x, y):
-    xl = getset(repo, subset, x)
-    yl = getset(repo, subset, y)
-    return xl + yl
+def orset(repo, subset, *xs):
+    rs = [getset(repo, subset, x) for x in xs]
+    return combinesets(rs)
 
 def notset(repo, subset, x):
     return subset - getset(repo, subset, x)
@@ -2160,9 +2159,8 @@ def optimize(x, small):
             return w, (op, tb, ta)
         return w, (op, ta, tb)
     elif op == 'or':
-        wa, ta = optimize(x[1], False)
-        wb, tb = optimize(x[2], False)
-        return max(wa, wb), (op, ta, tb)
+        ws, ts = zip(*[optimize(y, False) for y in x[1:]])
+        return max(ws), (op,) + ts
     elif op == 'not':
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):
@@ -2380,7 +2378,7 @@ def _parsealiasdefn(defn, args):
     tree, pos = p.parse(defn)
     if pos != len(defn):
         raise error.ParseError(_('invalid token'), pos)
-    return tree
+    return parser.simplifyinfixops(tree, ('or',))
 
 class revsetalias(object):
     # whether own `error` information is already shown or not.
@@ -2511,7 +2509,7 @@ def parse(spec, lookup=None):
     tree, pos = p.parse(spec, lookup=lookup)
     if pos != len(spec):
         raise error.ParseError(_("invalid token"), pos)
-    return tree
+    return parser.simplifyinfixops(tree, ('or',))
 
 def posttreebuilthook(tree, repo):
     # hook for extensions to execute code on the optimized tree
diff --git a/tests/test-glog.t b/tests/test-glog.t
--- a/tests/test-glog.t
+++ b/tests/test-glog.t
@@ -1469,13 +1469,12 @@ glog always reorders nodes which explain
   (group
     (group
       (or
-        (or
-          (func
-            ('symbol', 'branch')
-            ('string', 'default'))
-          (func
-            ('symbol', 'branch')
-            ('string', 'branch')))
+        (func
+          ('symbol', 'branch')
+          ('string', 'default'))
+        (func
+          ('symbol', 'branch')
+          ('string', 'branch'))
         (func
           ('symbol', 'branch')
           ('string', 'branch')))))
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -137,16 +137,15 @@ trivial
   6
   $ try '0|1|2'
   (or
-    (or
-      ('symbol', '0')
-      ('symbol', '1'))
+    ('symbol', '0')
+    ('symbol', '1')
     ('symbol', '2'))
   * set:
   <addset
+    <baseset [0]>,
     <addset
-      <baseset [0]>,
-      <baseset [1]>>,
-    <baseset [2]>>
+      <baseset [1]>,
+      <baseset [2]>>>
   0
   1
   2
@@ -919,6 +918,49 @@ test that `or` operation skips duplicate
   4
   5
 
+test that chained `or` operations make balanced addsets
+
+  $ try '0:1|1:2|2:3|3:4|4:5'
+  (or
+    (range
+      ('symbol', '0')
+      ('symbol', '1'))
+    (range
+      ('symbol', '1')
+      ('symbol', '2'))
+    (range
+      ('symbol', '2')
+      ('symbol', '3'))
+    (range
+      ('symbol', '3')
+      ('symbol', '4'))
+    (range
+      ('symbol', '4')
+      ('symbol', '5')))
+  * set:
+  <addset
+    <addset
+      <spanset+ 0:1>,
+      <spanset+ 1:2>>,
+    <addset
+      <spanset+ 2:3>,
+      <addset
+        <spanset+ 3:4>,
+        <spanset+ 4:5>>>>
+  0
+  1
+  2
+  3
+  4
+  5
+
+test that chained `or` operations never eat up stack (issue4624)
+(uses `0:1` instead of `0` to avoid future optimization of trivial revisions)
+
+  $ hg log -T '{rev}\n' -r "`python -c "print '|'.join(['0:1'] * 500)"`"
+  0
+  1
+
 check that conversion to only works
   $ try --optimize '::3 - ::1'
   (minus
@@ -1361,6 +1403,44 @@ test nesting and variable passing
   <baseset [5]>
   5
 
+test chained `or` operations are flattened at parsing phase
+
+  $ echo 'chainedorops($1, $2, $3) = $1|$2|$3' >> .hg/hgrc
+  $ try 'chainedorops(0:1, 1:2, 2:3)'
+  (func
+    ('symbol', 'chainedorops')
+    (list
+      (list
+        (range
+          ('symbol', '0')
+          ('symbol', '1'))
+        (range
+          ('symbol', '1')
+          ('symbol', '2')))
+      (range
+        ('symbol', '2')
+        ('symbol', '3'))))
+  (or
+    (range
+      ('symbol', '0')
+      ('symbol', '1'))
+    (range
+      ('symbol', '1')
+      ('symbol', '2'))
+    (range
+      ('symbol', '2')
+      ('symbol', '3')))
+  * set:
+  <addset
+    <spanset+ 0:1>,
+    <addset
+      <spanset+ 1:2>,
+      <spanset+ 2:3>>>
+  0
+  1
+  2
+  3
+
 test variable isolation, variable placeholders are rewritten as string
 then parsed and matched again as string. Check they do not leak too
 far away.


More information about the Mercurial-devel mailing list