[PATCH 1 of 2 V2] revset: use smartset minus operator

Thu Feb 18 19:38:07 UTC 2016

# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1455824115 28800
#      Thu Feb 18 11:35:15 2016 -0800
# Node ID dc8d72c3df7dfcd7b67fa71d679cc7c3ad90f25c
# Parent  a036e1ae1fbe88ab99cb861ebfc2e4da7a3912ca
revset: use smartset minus operator

Previously, revsets like 'X - Y' were translated to be 'X and not Y'. This can
be expensive, since if Y is a single commit then 'not Y' becomes a huge set and
sometimes the query optimizer doesn't account for it well.

This patch changes revsets to use the built in smartset minus operator, which is
often smarter than 'X and not Y'.

On a large repo this saves 2.2 seconds on rebase and histedit because "X:: - X"
becomes almost instant.

Relevant performance numbers from revsetbenchmark.py

revset #13: roots((tip~100::) - (tip~100::tip))
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.001080      0.001107      0.001102      0.001118      0.001121      0.001114      0.001141      0.001123      0.001099      0.001123      0.001137
1) 0.000708  65% 0.000738  66% 0.000735  66% 0.000739  66% 0.000784  69% 0.000780  70% 0.000807  70% 0.000756  67% 0.000727  66% 0.000759  67% 0.000808  71%

revset #14: roots((0::) - (0::tip))
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.131304      0.079168      0.133129      0.076560      0.048179      0.133349      0.049153      0.077097      0.129689      0.076212      0.048543
1) 0.065066  49% 0.036941  46% 0.066063  49% 0.034755  45% 0.048558      0.071091  53% 0.047679      0.034984  45% 0.064572  49% 0.035680  46% 0.048508

revset #22: (not public() - obsolete())
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.000139      0.000133      0.000133      0.000138      0.000134      0.000155      0.000157      0.000152      0.000157      0.000156      0.000153
1) 0.000108  77% 0.000129      0.000129      0.000134      0.000132      0.000127  81% 0.000151      0.000147      0.000127  80% 0.000152      0.000149

revset #25: (20000::) - (20000)
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.050560      0.045513      0.022593      0.043588      0.021909      0.045517      0.021822      0.044660      0.049740      0.044227      0.021819
1) 0.018614  36% 0.000171   0% 0.019659  87% 0.000168   0% 0.015543  70% 0.021069  46% 0.015623  71% 0.000180   0% 0.018658  37% 0.000186   0% 0.015750  72%

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -436,6 +436,9 @@ def dagrange(repo, subset, x, y):
 def andset(repo, subset, x, y):
     return getset(repo, getset(repo, subset, x), y)
 
+def minusset(repo, subset, x, y):
+    return getset(repo, subset, x) - getset(repo, subset, y)
+
 def orset(repo, subset, *xs):
     assert xs
     if len(xs) == 1:
@@ -2143,6 +2146,7 @@ methods = {
     "string": stringset,
     "symbol": stringset,
     "and": andset,
+    "minus": minusset,
     "or": orset,
     "not": notset,
     "list": listset,
@@ -2163,7 +2167,10 @@ def optimize(x, small):
 
     op = x[0]
     if op == 'minus':
-        return optimize(('and', x[1], ('not', x[2])), small)
+        wa, ta = optimize(x[1], True)
+        wb, tb = optimize(x[2], True)
+
+        return wa, (op, ta, tb)
     elif op == 'only':
         return optimize(('func', ('symbol', 'only'),
                          ('list', x[1], x[2])), small)
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -693,12 +693,11 @@ Test opreand of '%' is optimized recursi
   * optimized:
   (func
     ('symbol', 'only')
-    (and
+    (minus
       (range
         ('symbol', '8')
         ('symbol', '9'))
-      (not
-        ('symbol', '8'))))
+      ('symbol', '8')))
   * set:
   <baseset+ [8, 9]>
   8
@@ -1249,13 +1248,16 @@ check that conversion to only works
     (dagrangepre
       ('symbol', '1')))
   * optimized:
-  (func
-    ('symbol', 'only')
-    (list
-      ('symbol', '3')
+  (minus
+    (func
+      ('symbol', 'ancestors')
+      ('symbol', '3'))
+    (func
+      ('symbol', 'ancestors')
       ('symbol', '1')))
   * set:
-  <baseset+ [3]>
+  <filteredset
+    <generatorset+>>
   3
   $ try --optimize 'ancestors(1) - ancestors(3)'
   (minus
@@ -1266,13 +1268,16 @@ check that conversion to only works
       ('symbol', 'ancestors')
       ('symbol', '3')))
   * optimized:
-  (func
-    ('symbol', 'only')
-    (list
-      ('symbol', '1')
+  (minus
+    (func
+      ('symbol', 'ancestors')
+      ('symbol', '1'))
+    (func
+      ('symbol', 'ancestors')
       ('symbol', '3')))
   * set:
-  <baseset+ []>
+  <filteredset
+    <generatorset+>>
   $ try --optimize 'not ::2 and ::6'
   (and
     (not