[PATCH 3 of 7 V4] revset: insert _unordered to fix ordering of nested 'or' and '_list*()' (BC)
Yuya Nishihara
yuya at tcha.org
Thu Jun 23 11:59:55 EDT 2016
# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1455629461 -32400
# Tue Feb 16 22:31:01 2016 +0900
# Node ID bd5e886ceca4257dd843f98539a18ef7c31ec8e0
# Parent 0a1b1af8c9b8aaa0690de2fc0614b531dc03a5c2
revset: insert _unordered to fix ordering of nested 'or' and '_list*()' (BC)
This fixes the order of 'x & (y | z)' and 'x & _list*(...)' by inserting
'_unordered' function.
This is obviously slower than the current buggy version if an input set is
large:
#0 #1 #2 #3
0) 0.002968 0.002980 0.002982 0.073042
1) 0.004513 0.004485 0.012029 0.075261
#0: 0:4000 & (0:1099 + 1000:2099 + 2000:3099)
#1: 4000:0 & (0:1099 + 1000:2099 + 2000:3099)
#2: 10000:0 & (0:1099 + 1000:2099 + 2000:3099)
#3: file("path:hg") & (0:1099 + 1000:2099 + 2000:3099)
This issue could be addressed by redirecting nested 'or' to new filter
function, but it appears to be slower than the '_unordered()' insertion.
def unionset(repo, subset, *xs):
ss = [getset(repo, fullreposet(repo), x) for x in xs]
return subset.filter(lambda r: any(r in s for s in ss), cache=False)
Changes from V3:
- got rid of double subset filtering
- dropped 'sort()', 'reverse()' and 'head()' from the list as their problems
are slightly different
- renamed '_reorder(set)' to '_unordered(set)' to clarify the nested set is
evaluated as if it were 'unordered'
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2205,6 +2205,14 @@ def tag(repo, subset, x):
def tagged(repo, subset, x):
return tag(repo, subset, x)
+# for internal use
+ at predicate('_unordered(set)', safe=True)
+def _unordered(repo, subset, x):
+ # Evaluate right-hand-side expression 'set' and reorder the result set
+ # according to the left-hand-side set 'subset'
+ rs = getset(repo, fullreposet(repo), x)
+ return subset & rs
+
@predicate('unstable()', safe=True)
def unstable(repo, subset, x):
"""Non-obsolete changesets with obsolete ancestors.
@@ -2346,6 +2354,13 @@ followorder = 'follow' # must follow th
followorder: followorder,
}
+def _fixunionorder(x, order):
+ """Wrap 'or' and '_list*()' by '_unordered()' to enforce the ordering
+ requirement"""
+ if order == followorder:
+ return ('func', ('symbol', '_unordered'), x)
+ return x
+
def _matchonly(revs, bases):
"""
>>> f = lambda *args: _matchonly(*map(parse, args))
@@ -2444,7 +2459,7 @@ def _optimize(x, small, order):
# we can't reorder trees by weight because it would change the order.
# ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
# ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
- return max(ws), (op,) + tuple(ts)
+ return max(ws), _fixunionorder((op,) + tuple(ts), order)
elif op == 'not':
# Optimize not public() to _notpublic() because we have a fast version
if x[1] == ('func', ('symbol', 'public'), None):
@@ -2492,7 +2507,10 @@ def _optimize(x, small, order):
w = 10 # assume most sorts look at changelog
else:
w = 1
- return w + wa, (op, x[1], ta)
+ t = (op, x[1], ta)
+ if f in "_list _intlist _hexlist":
+ t = _fixunionorder(t, order)
+ return w + wa, t
return 1, x
def optimize(tree):
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -979,14 +979,17 @@ ordering defined by it.
('symbol', '2')
('symbol', '0'))
(func
- ('symbol', '_list')
- ('string', '0\x001\x002')))
+ ('symbol', '_unordered')
+ (func
+ ('symbol', '_list')
+ ('string', '0\x001\x002'))))
* set:
- <baseset [0, 1, 2]>
+ <filteredset
+ <spanset- 0:2>,
+ <baseset [0, 1, 2]>>
+ 2
+ 1
0
- 1
- 2
- BROKEN: should be '2 1 0'
'A + B' should take the ordering of the left expression:
@@ -1006,21 +1009,22 @@ ordering defined by it.
(range
('symbol', '2')
('symbol', '0'))
- (or
- (range
- ('symbol', '0')
- ('symbol', '1'))
- ('symbol', '2')))
+ (func
+ ('symbol', '_unordered')
+ (or
+ (range
+ ('symbol', '0')
+ ('symbol', '1'))
+ ('symbol', '2'))))
* set:
- <addset
- <filteredset
+ <filteredset
+ <spanset- 0:2>,
+ <addset
<spanset+ 0:1>,
- <spanset- 0:2>>,
- <baseset [2]>>
+ <baseset [2]>>>
+ 2
+ 1
0
- 1
- 2
- BROKEN: should be '2 1 0'
'_intlist(a b)' should behave like 'a + b':
@@ -1035,15 +1039,17 @@ ordering defined by it.
* optimized:
(and
(func
- ('symbol', '_intlist')
- ('string', '0\x001\x002'))
+ ('symbol', '_unordered')
+ (func
+ ('symbol', '_intlist')
+ ('string', '0\x001\x002')))
(range
('symbol', '2')
('symbol', '0')))
* set:
<filteredset
<spanset- 0:2>,
- <baseset [0, 1, 2]>>
+ <baseset+ [0, 1, 2]>>
2
1
0
@@ -1089,14 +1095,17 @@ ordering defined by it.
('symbol', '2')
('symbol', '0'))
(func
- ('symbol', '_hexlist')
- ('string', '*'))) (glob)
+ ('symbol', '_unordered')
+ (func
+ ('symbol', '_hexlist')
+ ('string', '*')))) (glob)
* set:
- <baseset [0, 1, 2]>
+ <filteredset
+ <spanset- 0:2>,
+ <baseset [0, 1, 2]>>
+ 2
+ 1
0
- 1
- 2
- BROKEN: should be '2 1 0'
$ trylist --optimize --bin '%ln & 2:0' `hg log -T '{node} ' -r0+2+1`
(and
@@ -1120,6 +1129,67 @@ ordering defined by it.
2
1
+ '_unordered' should be omitted if order doesn't matter:
+
+ $ try --optimize '2:0 & not (0 + 1)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (not
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '1')))))
+ * optimized:
+ (difference
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', '_list')
+ ('string', '0\x001')))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <not
+ <baseset [0, 1]>>>
+ 2
+
+ $ try --optimize '2:0 & not (0:2 & (0 + 1))'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (not
+ (group
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '1')))))))
+ * optimized:
+ (difference
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (func
+ ('symbol', '_list')
+ ('string', '0\x001'))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <not
+ <baseset [0, 1]>>>
+ 2
+
'present()' should do nothing other than suppressing an error:
$ try --optimize '2:0 & present(0 + 1 + 2)'
@@ -1316,9 +1386,10 @@ ordering defined by it.
<spanset- 0:2>>
1
- 'A & B' can be rewritten as 'B & A' by weight, but the ordering rule should
- be determined before the optimization (i.e. 'B' should take the ordering of
- 'A'):
+ 'A & B' can be rewritten as 'B & A' by weight, but that's fine as long as
+ the ordering rule is determined before the rewrite; in this example,
+ 'B' follows the order of the initial set, which is the same order as 'A'
+ since 'A' also follows the order:
$ try --optimize 'contains("glob:*") & (2 + 0 + 1)'
(and
@@ -1333,19 +1404,23 @@ ordering defined by it.
* optimized:
(and
(func
- ('symbol', '_list')
- ('string', '2\x000\x001'))
+ ('symbol', '_unordered')
+ (func
+ ('symbol', '_list')
+ ('string', '2\x000\x001')))
(func
('symbol', 'contains')
('string', 'glob:*')))
* set:
<filteredset
- <baseset [2, 0, 1]>,
+ <baseset+ [0, 1, 2]>,
<contains 'glob:*'>>
- 2
0
1
- BROKEN: should be '0 1 2'
+ 2
+
+ and in this example, 'A & B' is rewritten as 'B & A', but 'A' overrides
+ the order appropriately:
$ try --optimize 'reverse(contains("glob:*")) & (0 + 2 + 1)'
(and
@@ -1362,8 +1437,10 @@ ordering defined by it.
* optimized:
(and
(func
- ('symbol', '_list')
- ('string', '0\x002\x001'))
+ ('symbol', '_unordered')
+ (func
+ ('symbol', '_list')
+ ('string', '0\x002\x001')))
(func
('symbol', 'reverse')
(func
@@ -1371,12 +1448,11 @@ ordering defined by it.
('string', 'glob:*'))))
* set:
<filteredset
- <baseset [1, 2, 0]>,
+ <baseset- [0, 1, 2]>,
<contains 'glob:*'>>
+ 2
1
- 2
0
- BROKEN: should be '2 1 0'
test sort revset
--------------------------------------------
More information about the Mercurial-devel
mailing list