[PATCH 2 of 2] revset: make optimizer use the new minusset

Durham Goode durham at fb.com
Wed Feb 10 19:23:47 EST 2016


# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1455146394 28800
#      Wed Feb 10 15:19:54 2016 -0800
# Node ID ec633f772ea500d3db5e2fff3240099960f679b3
# Parent  edfc4fda0c1ca78186d9a779f0d4d0cfc6e1c03f
revset: make optimizer use the new minusset

Now that we have a minusset class, we need to change the optimizer to use it.

On a large repo this saves 2.2 seconds on rebase and histedit because "X:: - X"
becomes almost instant.

>From the revset benchmark, the relevant performance changes are below. There's
a slight regression in 'not public() - obsolete()', but when I test it in a
large repo, the regression appears to be constant time.

Result by revset
================

Revision:
0) Revision  31631:a036e1ae1fbe: merge with stable
1) Revision  (tip) 31638:1fc5435f2c06: revset: add minusset class

revset #0: roots((tip~100::) - (tip~100::tip))
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.002040      0.001257      0.002095      0.001253      0.001343      0.002090      0.001331      0.001290      0.002151      0.001252      0.001292
1) 0.001421  69% 0.000972  77% 0.001448  69% 0.000974  77% 0.001384      0.001503  71% 0.001467 110% 0.001072  83% 0.001510  70% 0.001053  84% 0.001471 113%

revset #1: roots((0::) - (0::tip))
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.201518      0.140201      0.199214      0.139487      0.075112      0.192998      0.078176      0.139645      0.200445      0.139246      0.076422
1) 0.099052  49% 0.069741  49% 0.096234  48% 0.066677  47% 0.091777 122% 0.102186  52% 0.092119 117% 0.064825  46% 0.092928  46% 0.066803  47% 0.093369 122%

revset #2: (not public() - obsolete())
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.000508      0.000443      0.000446      0.000447      0.000445      0.000518      0.000480      0.000475      0.000522      0.000464      0.000462
1) 0.000534 105% 0.000576 130% 0.000568 127% 0.000590 131% 0.000567 127% 0.000569 109% 0.000619 128% 0.000643 135% 0.000587 112% 0.000621 133% 0.000637 137%

revset #3: (20000::) - (20000)
   plain         min           max           first         last          reverse       rev..rst      rev..ast      sort          sor..rst      sor..ast
0) 0.078105      0.070464      0.032111      0.070538      0.031498      0.070976      0.033055      0.072516      0.081517      0.073888      0.033544
1) 0.028956  37% 0.000285   0% 0.028707  89% 0.000278   0% 0.026527  84% 0.028691  40% 0.027183  82% 0.000303   0% 0.027680  33% 0.000320   0% 0.027546  82%

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -436,6 +436,9 @@ def dagrange(repo, subset, x, y):
 def andset(repo, subset, x, y):
     return getset(repo, getset(repo, subset, x), y)
 
+def minusset(repo, subset, x, y):
+    return (getset(repo, subset, x) - getset(repo, subset, y))
+
 def orset(repo, subset, *xs):
     assert xs
     if len(xs) == 1:
@@ -2143,6 +2146,7 @@ methods = {
     "string": stringset,
     "symbol": stringset,
     "and": andset,
+    "minus": minusset,
     "or": orset,
     "not": notset,
     "list": listset,
@@ -2163,7 +2167,23 @@ def optimize(x, small):
 
     op = x[0]
     if op == 'minus':
-        return optimize(('and', x[1], ('not', x[2])), small)
+        wa, ta = optimize(x[1], True)
+        wb, tb = optimize(x[2], True)
+
+        # (::x - ::y) has a fast path
+        def isonly(revs, bases):
+            return (
+                revs is not None
+                and revs[0] == 'func'
+                and getstring(revs[1], _('not a symbol')) == 'ancestors'
+                and bases is not None
+                and bases[0] == 'func'
+                and getstring(bases[1], _('not a symbol')) == 'ancestors')
+
+        if isonly(ta, tb):
+            return wa, ('func', ('symbol', 'only'), ('list', ta[2], tb[2]))
+
+        return wa, (op, ta, tb)
     elif op == 'only':
         return optimize(('func', ('symbol', 'only'),
                          ('list', x[1], x[2])), small)
@@ -2846,8 +2866,7 @@ class abstractsmartset(object):
         """Returns a new object with the substraction of the two collections.
 
         This is part of the mandatory API for smartset."""
-        c = other.__contains__
-        return self.filter(lambda r: not c(r), cache=False)
+        return minusset(self, other)
 
     def filter(self, condition, cache=True):
         """Returns this smartset filtered by condition as a new smartset.
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -168,8 +168,9 @@ names that should work without quoting
     ('symbol', 'b')
     ('symbol', 'a'))
   * set:
-  <filteredset
-    <baseset [1]>>
+  <minusset
+    <baseset [1]>,
+    <baseset [0]>>
   1
   $ try _a_b_c_
   ('symbol', '_a_b_c_')
@@ -181,8 +182,9 @@ names that should work without quoting
     ('symbol', '_a_b_c_')
     ('symbol', 'a'))
   * set:
-  <filteredset
-    <baseset [6]>>
+  <minusset
+    <baseset [6]>,
+    <baseset [0]>>
   6
   $ try .a.b.c.
   ('symbol', '.a.b.c.')
@@ -194,8 +196,9 @@ names that should work without quoting
     ('symbol', '.a.b.c.')
     ('symbol', 'a'))
   * set:
-  <filteredset
-    <baseset [7]>>
+  <minusset
+    <baseset [7]>,
+    <baseset [0]>>
   7
 
 names that should be caught by fallback mechanism
@@ -277,8 +280,9 @@ quoting needed
     ('string', '-a-b-c-')
     ('symbol', 'a'))
   * set:
-  <filteredset
-    <baseset [4]>>
+  <minusset
+    <baseset [4]>,
+    <baseset [0]>>
   4
 
   $ log '1 or 2'
@@ -693,12 +697,11 @@ Test opreand of '%' is optimized recursi
   * optimized:
   (func
     ('symbol', 'only')
-    (and
+    (minus
       (range
         ('symbol', '8')
         ('symbol', '9'))
-      (not
-        ('symbol', '8'))))
+      ('symbol', '8')))
   * set:
   <baseset+ [8, 9]>
   8


More information about the Mercurial-devel mailing list