[PATCH 8 of 8] revset: introduce an API that avoids `formatspec` input serialization

Boris Feld boris.feld at octobus.net
Fri Jan 11 06:29:10 EST 2019


# HG changeset patch
# User Boris Feld <boris.feld at octobus.net>
# Date 1546605681 -3600
#      Fri Jan 04 13:41:21 2019 +0100
# Node ID 73926c4ab24d6c01723ed050601b134bdc89562f
# Parent  4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa
# EXP-Topic revs-efficiency
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d
revset: introduce an API that avoids `formatspec` input serialization

Instead of having the data fully serialized, the input can be replaced with a
`__internal_input__(<idx>)` entry in the revspec. The actual value at `<idx>`
as to be passed along with the format spec but the operation can get much more
efficient.

Just using it for simple "%ld" case provide a significant boost. For example
here are the impact on a sample discovery run between two pypy repositories
with arbitrary differences (using hg perfdiscovery).

$ hg perfdiscovery
before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15)
after:  ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20)

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1362,8 +1362,8 @@ class localrepository(object):
         Returns a revset.abstractsmartset, which is a list-like interface
         that contains integer revisions.
         '''
-        expr = revsetlang.formatspec(expr, *args)
-        m = revset.match(None, expr)
+        expr, inputs = revsetlang.formatspecargs(expr, *args)
+        m = revset.matchany(None, [expr], inputs=inputs)
         return m(self)
 
     def set(self, expr, *args):
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2194,6 +2194,14 @@ def _hexlist(repo, subset, x, order):
     else:
         return _orderedhexlist(repo, subset, x)
 
+ at predicate(revsetlang.internal_input_func, takeorder=True)
+def _internal_input(repo, subset, x, order):
+    # access subtituted value during internal revset runs
+    if order == followorder:
+        return subset & x[1]
+    else:
+        return x[1] & subset
+
 methods = {
     "range": rangeset,
     "rangeall": rangeall,
@@ -2230,7 +2238,7 @@ def match(ui, spec, lookup=None):
     """Create a matcher for a single revision spec"""
     return matchany(ui, [spec], lookup=lookup)
 
-def matchany(ui, specs, lookup=None, localalias=None):
+def matchany(ui, specs, lookup=None, localalias=None, inputs=()):
     """Create a matcher that will include any revisions matching one of the
     given specs
 
@@ -2239,6 +2247,9 @@ def matchany(ui, specs, lookup=None, loc
 
     If localalias is not None, it is a dict {name: definitionstring}. It takes
     precedence over [revsetalias] config section.
+
+    inputs containts value for __internal_input__ reference. This is used by
+    internal revset runs.
     """
     if not specs:
         def mfunc(repo, subset=None):
@@ -2261,6 +2272,8 @@ def matchany(ui, specs, lookup=None, loc
         aliases.extend(localalias.items())
     if aliases:
         tree = revsetlang.expandaliases(tree, aliases, warn=warn)
+    if inputs:
+        tree = revsetlang.expandinputs(inputs, tree)
     tree = revsetlang.foldconcat(tree)
     tree = revsetlang.analyze(tree)
     tree = revsetlang.optimize(tree)
diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
--- a/mercurial/revsetlang.py
+++ b/mercurial/revsetlang.py
@@ -69,6 +69,9 @@ symbols = {}
 # default set of valid characters for non-initial letters of symbols
 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
 
+_internal_input = '__internal_input_placeholder__'
+internal_input_func = '__internal_input__'
+
 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
     '''
     Parse a revset statement into a stream of tokens
@@ -333,7 +336,7 @@ def _analyze(x):
     elif op == 'negate':
         s = getstring(x[1], _("can't negate that"))
         return _analyze(('string', '-' + s))
-    elif op in ('string', 'symbol'):
+    elif op in ('string', 'symbol', 'smartset'):
         return x
     elif op == 'rangeall':
         return (op, None)
@@ -373,7 +376,7 @@ def _optimize(x):
         return 0, x
 
     op = x[0]
-    if op in ('string', 'symbol'):
+    if op in ('string', 'symbol', 'smartset'):
         return 0.5, x # single revisions are small
     elif op == 'and':
         wa, ta = _optimize(x[1])
@@ -532,6 +535,26 @@ def expandaliases(tree, aliases, warn=No
                 alias.warned = True
     return tree
 
+class _inputrules(parser.basealiasrules):
+    """replace internal input reference by their actual value"""
+
+    @classmethod
+    def _getalias(cls, inputs, tree):
+        if not isinstance(tree, tuple):
+            return None
+        if tree[0] != 'func':
+            return None
+        if getsymbol(tree[1]) != _internal_input:
+            return None
+        idx = int(getsymbol(tree[2]))
+        newtree = ('func',
+                   ('symbol', internal_input_func),
+                   ('smartset', inputs[idx])
+        )
+        return parser.alias(idx, None, None, newtree), None
+
+expandinputs = _inputrules.expand
+
 def foldconcat(tree):
     """Fold elements to be concatenated by `##`
     """
@@ -686,12 +709,23 @@ def formatspec(expr, *args):
         if t == 'baseset':
             if isinstance(arg, set):
                 arg = sorted(arg)
-            try:
-                ret.append(_formatintlist(list(arg)))
-            except (TypeError, ValueError):
-                raise error.ParseError(_('invalid argument for revspec'))
+            ret.append(_formatintlist(list(arg)))
     return b''.join(ret)
 
+def formatspecargs(expr, *args):
+    """same as formatspec, but preserve some expensive arguments"""
+    parsed = _parseargs(expr, args)
+    ret = []
+    inputs = []
+    for t, arg in parsed:
+        if t is None:
+            ret.append(arg)
+        if t == 'baseset':
+            key = '%s(%d)' % (_internal_input, len(inputs))
+            inputs.append(smartset.baseset(arg))
+            ret.append(key)
+    return (b''.join(ret), inputs)
+
 def _parseargs(expr, args):
     """parse the expression and replace all inexpensive args
 


More information about the Mercurial-devel mailing list