[PATCH 2 of 5] revsetlang: build optimized tree by helper function
Augie Fackler
raf at durin42.com
Thu Aug 31 15:23:57 EDT 2017
On Thu, Aug 31, 2017 at 11:42:11PM +0900, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya at tcha.org>
> # Date 1455712705 -32400
> # Wed Feb 17 21:38:25 2016 +0900
> # Node ID 937f5edf91058c63fafd981a59feaaaa98d21068
> # Parent 1d58bfbb617075b997d4b0a41924fcaee648b32f
> revsetlang: build optimized tree by helper function
>
> This should make optimize() more readable, but it doubles the parsing cost.
>
> (original)
> $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \
> 'L.optimize(L.analyze(L.parse("::tip")))'
> 10000 loops, best of 3: 18.1 usec per loop
>
> (this patch)
> $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \
> 'L._treecache.clear(); L.optimize(L.analyze(L.parse("::tip")))'
> 10000 loops, best of 3: 48.4 usec per loop
>
> 30usec isn't dominant compared to the revset evaluation, but that is a cost.
> That's why a parsed tree is cached, which can benefit in hgweb or chg server.
>
> diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
> --- a/mercurial/revsetlang.py
> +++ b/mercurial/revsetlang.py
> @@ -239,6 +239,25 @@ def getargsdict(x, funcname, keys):
> return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
> keyvaluenode='keyvalue', keynode='symbol')
>
> +# cache of {spec: raw parsed tree} built internally
> +_treecache = {}
Should this be an LRU cache that's very large, instead of an unbounded
cache? Otherwise long-lived hgweb servers might end up using quite a
bit of RAM?
> +
> +def _cachedtree(spec):
> + # thread safe because parse() is reentrant and dict.__setitem__() is atomic
> + tree = _treecache.get(spec)
> + if tree is None:
> + _treecache[spec] = tree = parse(spec)
> + return tree
> +
> +def _build(tmplspec, *repls):
> + """Create raw parsed tree from a template revset statement
> +
> + >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
> + ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
> + """
> + template = _cachedtree(tmplspec)
> + return parser.buildtree(template, ('symbol', '_'), *repls)
> +
> def _isnamedfunc(x, funcname):
> """Check if given tree matches named function"""
> return x and x[0] == 'func' and getsymbol(x[1]) == funcname
> @@ -302,16 +321,15 @@ def _analyze(x):
>
> op = x[0]
> if op == 'minus':
> - return _analyze(('and', x[1], ('not', x[2])))
> + return _analyze(_build('_ and not _', *x[1:]))
> elif op == 'only':
> - t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
> - return _analyze(t)
> + return _analyze(_build('only(_, _)', *x[1:]))
> elif op == 'onlypost':
> - return _analyze(('func', ('symbol', 'only'), x[1]))
> + return _analyze(_build('only(_)', x[1]))
> elif op == 'dagrangepre':
> - return _analyze(('func', ('symbol', 'ancestors'), x[1]))
> + return _analyze(_build('ancestors(_)', x[1]))
> elif op == 'dagrangepost':
> - return _analyze(('func', ('symbol', 'descendants'), x[1]))
> + return _analyze(_build('descendants(_)', x[1]))
> elif op == 'negate':
> s = getstring(x[1], _("can't negate that"))
> return _analyze(('string', '-' + s))
> @@ -367,9 +385,9 @@ def _optimize(x, small):
> w = min(wa, wb)
>
> # (::x and not ::y)/(not ::y and ::x) have a fast path
> - tm = _matchonly(ta, tb) or _matchonly(tb, ta)
> - if tm:
> - return w, ('func', ('symbol', 'only'), tm)
> + m = _matchonly(ta, tb) or _matchonly(tb, ta)
> + if m:
> + return w, _build('only(_, _)', *m[1:])
>
> if tb is not None and tb[0] == 'not':
> return wa, ('difference', ta, tb[1])
> @@ -387,7 +405,7 @@ def _optimize(x, small):
> w, t = ss[0]
> else:
> s = '\0'.join(t[1] for w, t in ss)
> - y = ('func', ('symbol', '_list'), ('string', s))
> + y = _build('_list(_)', ('string', s))
> w, t = _optimize(y, False)
> ws.append(w)
> ts.append(t)
> @@ -407,8 +425,7 @@ def _optimize(x, small):
> elif op == 'not':
> # Optimize not public() to _notpublic() because we have a fast version
> if x[1][:3] == ('func', ('symbol', 'public'), None):
> - newsym = ('func', ('symbol', '_notpublic'), None)
> - o = _optimize(newsym, not small)
> + o = _optimize(_build('_notpublic()'), not small)
> return o[0], o[1]
> else:
> o = _optimize(x[1], not small)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list