[PATCH 1 of 2 V2] revset: use smartset minus operator
Durham Goode
durham at fb.com
Thu Feb 18 19:38:07 UTC 2016
# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1455824115 28800
# Thu Feb 18 11:35:15 2016 -0800
# Node ID dc8d72c3df7dfcd7b67fa71d679cc7c3ad90f25c
# Parent a036e1ae1fbe88ab99cb861ebfc2e4da7a3912ca
revset: use smartset minus operator
Previously, revsets like 'X - Y' were translated to be 'X and not Y'. This can
be expensive, since if Y is a single commit then 'not Y' becomes a huge set and
sometimes the query optimizer doesn't account for it well.
This patch changes revsets to use the built in smartset minus operator, which is
often smarter than 'X and not Y'.
On a large repo this saves 2.2 seconds on rebase and histedit because "X:: - X"
becomes almost instant.
Relevant performance numbers from revsetbenchmark.py
revset #13: roots((tip~100::) - (tip~100::tip))
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.001080 0.001107 0.001102 0.001118 0.001121 0.001114 0.001141 0.001123 0.001099 0.001123 0.001137
1) 0.000708 65% 0.000738 66% 0.000735 66% 0.000739 66% 0.000784 69% 0.000780 70% 0.000807 70% 0.000756 67% 0.000727 66% 0.000759 67% 0.000808 71%
revset #14: roots((0::) - (0::tip))
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.131304 0.079168 0.133129 0.076560 0.048179 0.133349 0.049153 0.077097 0.129689 0.076212 0.048543
1) 0.065066 49% 0.036941 46% 0.066063 49% 0.034755 45% 0.048558 0.071091 53% 0.047679 0.034984 45% 0.064572 49% 0.035680 46% 0.048508
revset #22: (not public() - obsolete())
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.000139 0.000133 0.000133 0.000138 0.000134 0.000155 0.000157 0.000152 0.000157 0.000156 0.000153
1) 0.000108 77% 0.000129 0.000129 0.000134 0.000132 0.000127 81% 0.000151 0.000147 0.000127 80% 0.000152 0.000149
revset #25: (20000::) - (20000)
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.050560 0.045513 0.022593 0.043588 0.021909 0.045517 0.021822 0.044660 0.049740 0.044227 0.021819
1) 0.018614 36% 0.000171 0% 0.019659 87% 0.000168 0% 0.015543 70% 0.021069 46% 0.015623 71% 0.000180 0% 0.018658 37% 0.000186 0% 0.015750 72%
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -436,6 +436,9 @@ def dagrange(repo, subset, x, y):
def andset(repo, subset, x, y):
return getset(repo, getset(repo, subset, x), y)
+def minusset(repo, subset, x, y):
+ return getset(repo, subset, x) - getset(repo, subset, y)
+
def orset(repo, subset, *xs):
assert xs
if len(xs) == 1:
@@ -2143,6 +2146,7 @@ methods = {
"string": stringset,
"symbol": stringset,
"and": andset,
+ "minus": minusset,
"or": orset,
"not": notset,
"list": listset,
@@ -2163,7 +2167,10 @@ def optimize(x, small):
op = x[0]
if op == 'minus':
- return optimize(('and', x[1], ('not', x[2])), small)
+ wa, ta = optimize(x[1], True)
+ wb, tb = optimize(x[2], True)
+
+ return wa, (op, ta, tb)
elif op == 'only':
return optimize(('func', ('symbol', 'only'),
('list', x[1], x[2])), small)
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -693,12 +693,11 @@ Test opreand of '%' is optimized recursi
* optimized:
(func
('symbol', 'only')
- (and
+ (minus
(range
('symbol', '8')
('symbol', '9'))
- (not
- ('symbol', '8'))))
+ ('symbol', '8')))
* set:
<baseset+ [8, 9]>
8
@@ -1249,13 +1248,16 @@ check that conversion to only works
(dagrangepre
('symbol', '1')))
* optimized:
- (func
- ('symbol', 'only')
- (list
- ('symbol', '3')
+ (minus
+ (func
+ ('symbol', 'ancestors')
+ ('symbol', '3'))
+ (func
+ ('symbol', 'ancestors')
('symbol', '1')))
* set:
- <baseset+ [3]>
+ <filteredset
+ <generatorset+>>
3
$ try --optimize 'ancestors(1) - ancestors(3)'
(minus
@@ -1266,13 +1268,16 @@ check that conversion to only works
('symbol', 'ancestors')
('symbol', '3')))
* optimized:
- (func
- ('symbol', 'only')
- (list
- ('symbol', '1')
+ (minus
+ (func
+ ('symbol', 'ancestors')
+ ('symbol', '1'))
+ (func
+ ('symbol', 'ancestors')
('symbol', '3')))
* set:
- <baseset+ []>
+ <filteredset
+ <generatorset+>>
$ try --optimize 'not ::2 and ::6'
(and
(not
More information about the Mercurial-devel
mailing list