[PATCH 3 of 7 V4] revset: insert _unordered to fix ordering of nested 'or' and '_list*()' (BC)

Yuya Nishihara yuya at tcha.org
Thu Jun 23 11:59:55 EDT 2016


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1455629461 -32400
#      Tue Feb 16 22:31:01 2016 +0900
# Node ID bd5e886ceca4257dd843f98539a18ef7c31ec8e0
# Parent  0a1b1af8c9b8aaa0690de2fc0614b531dc03a5c2
revset: insert _unordered to fix ordering of nested 'or' and '_list*()' (BC)

This fixes the order of 'x & (y | z)' and 'x & _list*(...)' by inserting
'_unordered' function.

This is obviously slower than the current buggy version if an input set is
large:

       #0           #1           #2           #3
    0) 0.002968     0.002980     0.002982     0.073042
    1) 0.004513     0.004485     0.012029     0.075261

    #0: 0:4000 & (0:1099 + 1000:2099 + 2000:3099)
    #1: 4000:0 & (0:1099 + 1000:2099 + 2000:3099)
    #2: 10000:0 & (0:1099 + 1000:2099 + 2000:3099)
    #3: file("path:hg") & (0:1099 + 1000:2099 + 2000:3099)

This issue could be addressed by redirecting nested 'or' to new filter
function, but it appears to be slower than the '_unordered()' insertion.

    def unionset(repo, subset, *xs):
        ss = [getset(repo, fullreposet(repo), x) for x in xs]
        return subset.filter(lambda r: any(r in s for s in ss), cache=False)

Changes from V3:
 - got rid of double subset filtering
 - dropped 'sort()', 'reverse()' and 'head()' from the list as their problems
   are slightly different
 - renamed '_reorder(set)' to '_unordered(set)' to clarify the nested set is
   evaluated as if it were 'unordered'

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2205,6 +2205,14 @@ def tag(repo, subset, x):
 def tagged(repo, subset, x):
     return tag(repo, subset, x)
 
+# for internal use
+ at predicate('_unordered(set)', safe=True)
+def _unordered(repo, subset, x):
+    # Evaluate right-hand-side expression 'set' and reorder the result set
+    # according to the left-hand-side set 'subset'
+    rs = getset(repo, fullreposet(repo), x)
+    return subset & rs
+
 @predicate('unstable()', safe=True)
 def unstable(repo, subset, x):
     """Non-obsolete changesets with obsolete ancestors.
@@ -2346,6 +2354,13 @@ followorder = 'follow'  # must follow th
     followorder: followorder,
 }
 
+def _fixunionorder(x, order):
+    """Wrap 'or' and '_list*()' by '_unordered()' to enforce the ordering
+    requirement"""
+    if order == followorder:
+        return ('func', ('symbol', '_unordered'), x)
+    return x
+
 def _matchonly(revs, bases):
     """
     >>> f = lambda *args: _matchonly(*map(parse, args))
@@ -2444,7 +2459,7 @@ def _optimize(x, small, order):
         # we can't reorder trees by weight because it would change the order.
         # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
         #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
-        return max(ws), (op,) + tuple(ts)
+        return max(ws), _fixunionorder((op,) + tuple(ts), order)
     elif op == 'not':
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):
@@ -2492,7 +2507,10 @@ def _optimize(x, small, order):
             w = 10 # assume most sorts look at changelog
         else:
             w = 1
-        return w + wa, (op, x[1], ta)
+        t = (op, x[1], ta)
+        if f in "_list _intlist _hexlist":
+            t = _fixunionorder(t, order)
+        return w + wa, t
     return 1, x
 
 def optimize(tree):
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -979,14 +979,17 @@ ordering defined by it.
       ('symbol', '2')
       ('symbol', '0'))
     (func
-      ('symbol', '_list')
-      ('string', '0\x001\x002')))
+      ('symbol', '_unordered')
+      (func
+        ('symbol', '_list')
+        ('string', '0\x001\x002'))))
   * set:
-  <baseset [0, 1, 2]>
+  <filteredset
+    <spanset- 0:2>,
+    <baseset [0, 1, 2]>>
+  2
+  1
   0
-  1
-  2
- BROKEN: should be '2 1 0'
 
  'A + B' should take the ordering of the left expression:
 
@@ -1006,21 +1009,22 @@ ordering defined by it.
     (range
       ('symbol', '2')
       ('symbol', '0'))
-    (or
-      (range
-        ('symbol', '0')
-        ('symbol', '1'))
-      ('symbol', '2')))
+    (func
+      ('symbol', '_unordered')
+      (or
+        (range
+          ('symbol', '0')
+          ('symbol', '1'))
+        ('symbol', '2'))))
   * set:
-  <addset
-    <filteredset
+  <filteredset
+    <spanset- 0:2>,
+    <addset
       <spanset+ 0:1>,
-      <spanset- 0:2>>,
-    <baseset [2]>>
+      <baseset [2]>>>
+  2
+  1
   0
-  1
-  2
- BROKEN: should be '2 1 0'
 
  '_intlist(a b)' should behave like 'a + b':
 
@@ -1035,15 +1039,17 @@ ordering defined by it.
   * optimized:
   (and
     (func
-      ('symbol', '_intlist')
-      ('string', '0\x001\x002'))
+      ('symbol', '_unordered')
+      (func
+        ('symbol', '_intlist')
+        ('string', '0\x001\x002')))
     (range
       ('symbol', '2')
       ('symbol', '0')))
   * set:
   <filteredset
     <spanset- 0:2>,
-    <baseset [0, 1, 2]>>
+    <baseset+ [0, 1, 2]>>
   2
   1
   0
@@ -1089,14 +1095,17 @@ ordering defined by it.
       ('symbol', '2')
       ('symbol', '0'))
     (func
-      ('symbol', '_hexlist')
-      ('string', '*'))) (glob)
+      ('symbol', '_unordered')
+      (func
+        ('symbol', '_hexlist')
+        ('string', '*')))) (glob)
   * set:
-  <baseset [0, 1, 2]>
+  <filteredset
+    <spanset- 0:2>,
+    <baseset [0, 1, 2]>>
+  2
+  1
   0
-  1
-  2
- BROKEN: should be '2 1 0'
 
   $ trylist --optimize --bin '%ln & 2:0' `hg log -T '{node} ' -r0+2+1`
   (and
@@ -1120,6 +1129,67 @@ ordering defined by it.
   2
   1
 
+ '_unordered' should be omitted if order doesn't matter:
+
+  $ try --optimize '2:0 & not (0 + 1)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (not
+      (group
+        (or
+          ('symbol', '0')
+          ('symbol', '1')))))
+  * optimized:
+  (difference
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', '_list')
+      ('string', '0\x001')))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <not
+      <baseset [0, 1]>>>
+  2
+
+  $ try --optimize '2:0 & not (0:2 & (0 + 1))'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (not
+      (group
+        (and
+          (range
+            ('symbol', '0')
+            ('symbol', '2'))
+          (group
+            (or
+              ('symbol', '0')
+              ('symbol', '1')))))))
+  * optimized:
+  (difference
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (and
+      (range
+        ('symbol', '0')
+        ('symbol', '2'))
+      (func
+        ('symbol', '_list')
+        ('string', '0\x001'))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <not
+      <baseset [0, 1]>>>
+  2
+
  'present()' should do nothing other than suppressing an error:
 
   $ try --optimize '2:0 & present(0 + 1 + 2)'
@@ -1316,9 +1386,10 @@ ordering defined by it.
     <spanset- 0:2>>
   1
 
- 'A & B' can be rewritten as 'B & A' by weight, but the ordering rule should
- be determined before the optimization (i.e. 'B' should take the ordering of
- 'A'):
+ 'A & B' can be rewritten as 'B & A' by weight, but that's fine as long as
+ the ordering rule is determined before the rewrite; in this example,
+ 'B' follows the order of the initial set, which is the same order as 'A'
+ since 'A' also follows the order:
 
   $ try --optimize 'contains("glob:*") & (2 + 0 + 1)'
   (and
@@ -1333,19 +1404,23 @@ ordering defined by it.
   * optimized:
   (and
     (func
-      ('symbol', '_list')
-      ('string', '2\x000\x001'))
+      ('symbol', '_unordered')
+      (func
+        ('symbol', '_list')
+        ('string', '2\x000\x001')))
     (func
       ('symbol', 'contains')
       ('string', 'glob:*')))
   * set:
   <filteredset
-    <baseset [2, 0, 1]>,
+    <baseset+ [0, 1, 2]>,
     <contains 'glob:*'>>
-  2
   0
   1
- BROKEN: should be '0 1 2'
+  2
+
+ and in this example, 'A & B' is rewritten as 'B & A', but 'A' overrides
+ the order appropriately:
 
   $ try --optimize 'reverse(contains("glob:*")) & (0 + 2 + 1)'
   (and
@@ -1362,8 +1437,10 @@ ordering defined by it.
   * optimized:
   (and
     (func
-      ('symbol', '_list')
-      ('string', '0\x002\x001'))
+      ('symbol', '_unordered')
+      (func
+        ('symbol', '_list')
+        ('string', '0\x002\x001')))
     (func
       ('symbol', 'reverse')
       (func
@@ -1371,12 +1448,11 @@ ordering defined by it.
         ('string', 'glob:*'))))
   * set:
   <filteredset
-    <baseset [1, 2, 0]>,
+    <baseset- [0, 1, 2]>,
     <contains 'glob:*'>>
+  2
   1
-  2
   0
- BROKEN: should be '2 1 0'
 
 test sort revset
 --------------------------------------------


More information about the Mercurial-devel mailing list