[PATCH 2 of 6 V3] revset: insert _reorder to fix ordering of inner 'or' and functions (BC)
Yuya Nishihara
yuya at tcha.org
Fri Jun 17 10:45:28 EDT 2016
# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1455629461 -32400
# Tue Feb 16 22:31:01 2016 +0900
# Node ID a99114251d721a469991574105d1a0167f4fb162
# Parent defa6a51c4613a4294db3a16a3dea7b5cb6025a0
revset: insert _reorder to fix ordering of inner 'or' and functions (BC)
This fixes the order of 'x & (y | z)' and 'x & _list(...)' by inserting
'_reorder' function. Still 'x & y:z', 'x & reverse(y)' and 'x & sort(y)'
have the ordering issue, which should be fixed by subsequent patches.
Originally I was going to add a property to @predicate to mark a function,
but that seems not ideal because most functions should not have such property,
and we will need a different optimization trick per function.
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -1153,6 +1153,7 @@ def head(repo, subset, x):
# This does not break because of other fullreposet misbehavior.
# XXX We should combine with subset first: 'subset & baseset(...)'. This is
# necessary to ensure we preserve the order in subset.
+ # Remove 'head' from _orderingsymbols if you've fixed the issue.
return baseset(hs) & subset
@predicate('heads(set)', safe=True)
@@ -1685,6 +1686,13 @@ def removes(repo, subset, x):
pat = getstring(x, _("removes requires a pattern"))
return checkstatus(repo, subset, pat, 2)
+ at predicate('_reorder(set)', safe=True)
+def _reorder(repo, subset, x):
+ # Evaluate right-hand-side expression 'set' and reorder the result set
+ # according to the left-hand-side set 'subset'
+ rs = getset(repo, subset, x)
+ return subset & rs
+
@predicate('rev(number)', safe=True)
def rev(repo, subset, x):
"""Revision with the given numeric identifier.
@@ -2302,6 +2310,18 @@ methods = {
"parentpost": p1,
}
+# Symbols which have own ordering rules and disregard the order of input set.
+# Few symbols should be listed. Almost all symbols should follow the input
+# order (i.e. they should do 'subset & x' or 'subset.filter(x)'.)
+_orderingsymbols = set([
+ 'head',
+ 'reverse',
+ 'sort',
+ '_list',
+ '_intlist',
+ '_hexlist',
+])
+
# Constants for ordering requirement, used in _optimize():
#
# If 'define', any future functions and operations can change the ordering of
@@ -2342,6 +2362,11 @@ methods = {
_followorder: _followorder,
}
+def _fixfolloworder(x, order):
+ if order == _followorder:
+ return ('func', ('symbol', '_reorder'), x)
+ return x
+
def _matchonly(revs, bases):
"""
>>> f = lambda *args: _matchonly(*map(parse, args))
@@ -2440,7 +2465,7 @@ def _optimize(x, small, order):
# we can't reorder trees by weight because it would change the order.
# ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
# ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
- return max(ws), (op,) + tuple(ts)
+ return max(ws), _fixfolloworder((op,) + tuple(ts), order)
elif op == 'not':
# Optimize not public() to _notpublic() because we have a fast version
if x[1] == ('func', ('symbol', 'public'), None):
@@ -2488,7 +2513,10 @@ def _optimize(x, small, order):
w = 10 # assume most sorts look at changelog
else:
w = 1
- return w + wa, (op, x[1], ta)
+ t = (op, x[1], ta)
+ if f in _orderingsymbols:
+ t = _fixfolloworder(t, order)
+ return w + wa, t
return 1, x
def optimize(tree):
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -908,6 +908,376 @@ Test order of revisions in compound expr
1
0
+ $ log '2:0 & head()'
+ 2
+ 1
+ 0
+
+ '_reorder' should be inserted where necessary:
+
+ $ try --optimize '2:0 & (0 + 1 + 2)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '1')
+ ('symbol', '2'))))
+ * optimized:
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', '_reorder')
+ (func
+ ('symbol', '_list')
+ ('string', '0\x001\x002'))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <baseset [0, 1, 2]>>
+ 2
+ 1
+ 0
+
+ $ try --optimize '2:0 & (0:1 + 2)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (group
+ (or
+ (range
+ ('symbol', '0')
+ ('symbol', '1'))
+ ('symbol', '2'))))
+ * optimized:
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', '_reorder')
+ (or
+ (range
+ ('symbol', '0')
+ ('symbol', '1'))
+ ('symbol', '2'))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <addset
+ <filteredset
+ <spanset+ 0:1>,
+ <spanset- 0:2>>,
+ <baseset [2]>>>
+ 2
+ 1
+ 0
+
+ '_reorder' should be omitted if order doesn't matter:
+
+ $ try --optimize '2:0 & not (0 + 1)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (not
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '1')))))
+ * optimized:
+ (difference
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', '_list')
+ ('string', '0\x001')))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <not
+ <baseset [0, 1]>>>
+ 2
+
+ $ try --optimize '2:0 & not (0:2 & (0 + 1))'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (not
+ (group
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '1')))))))
+ * optimized:
+ (difference
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (func
+ ('symbol', '_list')
+ ('string', '0\x001'))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <not
+ <baseset [0, 1]>>>
+ 2
+
+ BROKEN: reverse() and sort() will need different workaround since they
+ modify the flag of input set:
+
+ $ try --optimize '0:2 & reverse(all())'
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (func
+ ('symbol', 'reverse')
+ (func
+ ('symbol', 'all')
+ None)))
+ * optimized:
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (func
+ ('symbol', '_reorder')
+ (func
+ ('symbol', 'reverse')
+ (func
+ ('symbol', 'all')
+ None))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <filteredset
+ <spanset- 0:2>,
+ <spanset+ 0:9>>>
+ 2
+ 1
+ 0
+
+ $ try --optimize '0:2 & sort(all(), -rev)'
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (func
+ ('symbol', 'sort')
+ (list
+ (func
+ ('symbol', 'all')
+ None)
+ (negate
+ ('symbol', 'rev')))))
+ * optimized:
+ (and
+ (range
+ ('symbol', '0')
+ ('symbol', '2'))
+ (func
+ ('symbol', '_reorder')
+ (func
+ ('symbol', 'sort')
+ (list
+ (func
+ ('symbol', 'all')
+ None)
+ ('string', '-rev')))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <filteredset
+ <spanset- 0:2>,
+ <spanset+ 0:9>>>
+ 2
+ 1
+ 0
+
+ for 'A & f(B)', 'B' should not be affected by the order of 'A':
+
+ $ try --optimize '2:0 & first(1 + 0 + 2)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', 'first')
+ (or
+ ('symbol', '1')
+ ('symbol', '0')
+ ('symbol', '2'))))
+ * optimized:
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', 'first')
+ (func
+ ('symbol', '_list')
+ ('string', '1\x000\x002'))))
+ * set:
+ <baseset
+ <limit n=1, offset=0,
+ <spanset- 0:2>,
+ <baseset [1, 0, 2]>>>
+ 1
+
+ $ try --optimize '2:0 & not last(0 + 2 + 1)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (not
+ (func
+ ('symbol', 'last')
+ (or
+ ('symbol', '0')
+ ('symbol', '2')
+ ('symbol', '1')))))
+ * optimized:
+ (difference
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (func
+ ('symbol', 'last')
+ (func
+ ('symbol', '_list')
+ ('string', '0\x002\x001'))))
+ * set:
+ <filteredset
+ <spanset- 0:2>,
+ <not
+ <baseset
+ <last n=1,
+ <fullreposet+ 0:9>,
+ <baseset [1, 2, 0]>>>>>
+ 2
+ 0
+
+ for 'A & (op)(B)', 'B' should not be affected by the order of 'A':
+
+ $ try --optimize '2:0 & (1 + 0 + 2):(0 + 2 + 1)'
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (range
+ (group
+ (or
+ ('symbol', '1')
+ ('symbol', '0')
+ ('symbol', '2')))
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '2')
+ ('symbol', '1')))))
+ * optimized:
+ (and
+ (range
+ ('symbol', '2')
+ ('symbol', '0'))
+ (range
+ (func
+ ('symbol', '_list')
+ ('string', '1\x000\x002'))
+ (func
+ ('symbol', '_list')
+ ('string', '0\x002\x001'))))
+ * set:
+ <filteredset
+ <baseset [1]>,
+ <spanset- 0:2>>
+ 1
+
+ 'A & B' can be rewritten as 'B & A' by weight, but that's fine as long as
+ the ordering policy is determined before the rewrite; in this example,
+ 'B' follows the order of the initial set, which is the same order as 'A'
+ since 'A' also follows the order:
+
+ $ try --optimize 'contains("glob:*") & (2 + 0 + 1)'
+ (and
+ (func
+ ('symbol', 'contains')
+ ('string', 'glob:*'))
+ (group
+ (or
+ ('symbol', '2')
+ ('symbol', '0')
+ ('symbol', '1'))))
+ * optimized:
+ (and
+ (func
+ ('symbol', '_reorder')
+ (func
+ ('symbol', '_list')
+ ('string', '2\x000\x001')))
+ (func
+ ('symbol', 'contains')
+ ('string', 'glob:*')))
+ * set:
+ <filteredset
+ <baseset+ [0, 1, 2]>,
+ <contains 'glob:*'>>
+ 0
+ 1
+ 2
+
+ and in this example, 'A & B' is rewritten as 'B & A', but 'A' overrides
+ the order appropriately:
+
+ $ try --optimize 'reverse(contains("glob:*")) & (0 + 2 + 1)'
+ (and
+ (func
+ ('symbol', 'reverse')
+ (func
+ ('symbol', 'contains')
+ ('string', 'glob:*')))
+ (group
+ (or
+ ('symbol', '0')
+ ('symbol', '2')
+ ('symbol', '1'))))
+ * optimized:
+ (and
+ (func
+ ('symbol', '_reorder')
+ (func
+ ('symbol', '_list')
+ ('string', '0\x002\x001')))
+ (func
+ ('symbol', 'reverse')
+ (func
+ ('symbol', 'contains')
+ ('string', 'glob:*'))))
+ * set:
+ <filteredset
+ <baseset- [0, 1, 2]>,
+ <contains 'glob:*'>>
+ 2
+ 1
+ 0
+
test sort revset
--------------------------------------------
More information about the Mercurial-devel
mailing list