[PATCH 8 of 8] revset: introduce an API that avoids `formatspec` input serialization
Boris Feld
boris.feld at octobus.net
Fri Jan 11 06:29:10 EST 2019
# HG changeset patch
# User Boris Feld <boris.feld at octobus.net>
# Date 1546605681 -3600
# Fri Jan 04 13:41:21 2019 +0100
# Node ID 73926c4ab24d6c01723ed050601b134bdc89562f
# Parent 4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa
# EXP-Topic revs-efficiency
# Available At https://bitbucket.org/octobus/mercurial-devel/
# hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d
revset: introduce an API that avoids `formatspec` input serialization
Instead of having the data fully serialized, the input can be replaced with a
`__internal_input__(<idx>)` entry in the revspec. The actual value at `<idx>`
as to be passed along with the format spec but the operation can get much more
efficient.
Just using it for simple "%ld" case provide a significant boost. For example
here are the impact on a sample discovery run between two pypy repositories
with arbitrary differences (using hg perfdiscovery).
$ hg perfdiscovery
before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15)
after: ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1362,8 +1362,8 @@ class localrepository(object):
Returns a revset.abstractsmartset, which is a list-like interface
that contains integer revisions.
'''
- expr = revsetlang.formatspec(expr, *args)
- m = revset.match(None, expr)
+ expr, inputs = revsetlang.formatspecargs(expr, *args)
+ m = revset.matchany(None, [expr], inputs=inputs)
return m(self)
def set(self, expr, *args):
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2194,6 +2194,14 @@ def _hexlist(repo, subset, x, order):
else:
return _orderedhexlist(repo, subset, x)
+ at predicate(revsetlang.internal_input_func, takeorder=True)
+def _internal_input(repo, subset, x, order):
+ # access subtituted value during internal revset runs
+ if order == followorder:
+ return subset & x[1]
+ else:
+ return x[1] & subset
+
methods = {
"range": rangeset,
"rangeall": rangeall,
@@ -2230,7 +2238,7 @@ def match(ui, spec, lookup=None):
"""Create a matcher for a single revision spec"""
return matchany(ui, [spec], lookup=lookup)
-def matchany(ui, specs, lookup=None, localalias=None):
+def matchany(ui, specs, lookup=None, localalias=None, inputs=()):
"""Create a matcher that will include any revisions matching one of the
given specs
@@ -2239,6 +2247,9 @@ def matchany(ui, specs, lookup=None, loc
If localalias is not None, it is a dict {name: definitionstring}. It takes
precedence over [revsetalias] config section.
+
+ inputs containts value for __internal_input__ reference. This is used by
+ internal revset runs.
"""
if not specs:
def mfunc(repo, subset=None):
@@ -2261,6 +2272,8 @@ def matchany(ui, specs, lookup=None, loc
aliases.extend(localalias.items())
if aliases:
tree = revsetlang.expandaliases(tree, aliases, warn=warn)
+ if inputs:
+ tree = revsetlang.expandinputs(inputs, tree)
tree = revsetlang.foldconcat(tree)
tree = revsetlang.analyze(tree)
tree = revsetlang.optimize(tree)
diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
--- a/mercurial/revsetlang.py
+++ b/mercurial/revsetlang.py
@@ -69,6 +69,9 @@ symbols = {}
# default set of valid characters for non-initial letters of symbols
_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
+_internal_input = '__internal_input_placeholder__'
+internal_input_func = '__internal_input__'
+
def tokenize(program, lookup=None, syminitletters=None, symletters=None):
'''
Parse a revset statement into a stream of tokens
@@ -333,7 +336,7 @@ def _analyze(x):
elif op == 'negate':
s = getstring(x[1], _("can't negate that"))
return _analyze(('string', '-' + s))
- elif op in ('string', 'symbol'):
+ elif op in ('string', 'symbol', 'smartset'):
return x
elif op == 'rangeall':
return (op, None)
@@ -373,7 +376,7 @@ def _optimize(x):
return 0, x
op = x[0]
- if op in ('string', 'symbol'):
+ if op in ('string', 'symbol', 'smartset'):
return 0.5, x # single revisions are small
elif op == 'and':
wa, ta = _optimize(x[1])
@@ -532,6 +535,26 @@ def expandaliases(tree, aliases, warn=No
alias.warned = True
return tree
+class _inputrules(parser.basealiasrules):
+ """replace internal input reference by their actual value"""
+
+ @classmethod
+ def _getalias(cls, inputs, tree):
+ if not isinstance(tree, tuple):
+ return None
+ if tree[0] != 'func':
+ return None
+ if getsymbol(tree[1]) != _internal_input:
+ return None
+ idx = int(getsymbol(tree[2]))
+ newtree = ('func',
+ ('symbol', internal_input_func),
+ ('smartset', inputs[idx])
+ )
+ return parser.alias(idx, None, None, newtree), None
+
+expandinputs = _inputrules.expand
+
def foldconcat(tree):
"""Fold elements to be concatenated by `##`
"""
@@ -686,12 +709,23 @@ def formatspec(expr, *args):
if t == 'baseset':
if isinstance(arg, set):
arg = sorted(arg)
- try:
- ret.append(_formatintlist(list(arg)))
- except (TypeError, ValueError):
- raise error.ParseError(_('invalid argument for revspec'))
+ ret.append(_formatintlist(list(arg)))
return b''.join(ret)
+def formatspecargs(expr, *args):
+ """same as formatspec, but preserve some expensive arguments"""
+ parsed = _parseargs(expr, args)
+ ret = []
+ inputs = []
+ for t, arg in parsed:
+ if t is None:
+ ret.append(arg)
+ if t == 'baseset':
+ key = '%s(%d)' % (_internal_input, len(inputs))
+ inputs.append(smartset.baseset(arg))
+ ret.append(key)
+ return (b''.join(ret), inputs)
+
def _parseargs(expr, args):
"""parse the expression and replace all inexpensive args
More information about the Mercurial-devel
mailing list