[PATCH 4 of 7] fileset: add stub for weight-based optimization
Yuya Nishihara
yuya at tcha.org
Fri Aug 3 11:01:41 EDT 2018
# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1532155946 -32400
# Sat Jul 21 15:52:26 2018 +0900
# Node ID 6c9246c08523616914af131700126cfc408114f7
# Parent 1598c4146f74757fef4e2db244b03188d197df52
fileset: add stub for weight-based optimization
The main purpose of this change is to group basic patterns, which can be
mapped to a plain matcher. I'm not so interested in a weight of each function.
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -902,6 +902,7 @@ def debugfileset(ui, repo, expr, **opts)
stages = [
('parsed', pycompat.identity),
('analyzed', filesetlang.analyze),
+ ('optimized', filesetlang.optimize),
]
stagenames = set(n for n, f in stages)
diff --git a/mercurial/fileset.py b/mercurial/fileset.py
--- a/mercurial/fileset.py
+++ b/mercurial/fileset.py
@@ -524,6 +524,7 @@ def match(ctx, expr, badfn=None):
"""Create a matcher for a single fileset expression"""
tree = filesetlang.parse(expr)
tree = filesetlang.analyze(tree)
+ tree = filesetlang.optimize(tree)
mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn)
return getmatch(mctx, tree)
diff --git a/mercurial/filesetlang.py b/mercurial/filesetlang.py
--- a/mercurial/filesetlang.py
+++ b/mercurial/filesetlang.py
@@ -164,12 +164,50 @@ def _analyze(x):
def analyze(x):
"""Transform raw parsed tree to evaluatable tree which can be fed to
- getmatch()
+ optimize() or getmatch()
All pseudo operations should be mapped to real operations or functions
defined in methods or symbols table respectively.
"""
return _analyze(x)
+def _optimize(x):
+ if x is None:
+ return 0, x
+
+ op = x[0]
+ if op in {'string', 'symbol'}:
+ return 0.5, x
+ if op == 'kindpat':
+ w, t = _optimize(x[2])
+ return w, (op, x[1], t)
+ if op == 'not':
+ w, t = _optimize(x[1])
+ return w, (op, t)
+ if op in {'and', 'minus'}:
+ wa, ta = _optimize(x[1])
+ wb, tb = _optimize(x[2])
+ return max(wa, wb), (op, ta, tb)
+ if op == 'or':
+ ws, ts = zip(*(_optimize(y) for y in x[1:]))
+ return max(ws), (op,) + ts
+ if op == 'list':
+ ws, ts = zip(*(_optimize(y) for y in x[1:]))
+ return sum(ws), (op,) + ts
+ if op == 'func':
+ f = getsymbol(x[1])
+ w = getattr(symbols.get(f), '_weight', 1)
+ wa, ta = _optimize(x[2])
+ return w + wa, (op, x[1], ta)
+ raise error.ProgrammingError('invalid operator %r' % op)
+
+def optimize(x):
+ """Reorder/rewrite evaluatable tree for optimization
+
+ All pseudo operations should be transformed beforehand.
+ """
+ _w, t = _optimize(x)
+ return t
+
def prettyformat(tree):
return parser.prettyformat(tree, ('string', 'symbol'))
diff --git a/mercurial/minifileset.py b/mercurial/minifileset.py
--- a/mercurial/minifileset.py
+++ b/mercurial/minifileset.py
@@ -86,4 +86,5 @@ def compile(text):
"""
tree = filesetlang.parse(text)
tree = filesetlang.analyze(tree)
+ tree = filesetlang.optimize(tree)
return _compile(tree)
diff --git a/mercurial/registrar.py b/mercurial/registrar.py
--- a/mercurial/registrar.py
+++ b/mercurial/registrar.py
@@ -247,6 +247,9 @@ class filesetpredicate(_funcregistrarbas
implies 'matchctx.status()' at runtime or not (False, by
default).
+ Optional argument 'weight' indicates the estimated run-time cost, useful
+ for static optimization, default is 1. Higher weight means more expensive.
+
'filesetpredicate' instance in example above can be used to
decorate multiple functions.
@@ -259,8 +262,9 @@ class filesetpredicate(_funcregistrarbas
_getname = _funcregistrarbase._parsefuncdecl
_docformat = "``%s``\n %s"
- def _extrasetup(self, name, func, callstatus=False):
+ def _extrasetup(self, name, func, callstatus=False, weight=1):
func._callstatus = callstatus
+ func._weight = weight
class _templateregistrarbase(_funcregistrarbase):
"""Base of decorator to register functions as template specific one
diff --git a/tests/test-fileset.t b/tests/test-fileset.t
--- a/tests/test-fileset.t
+++ b/tests/test-fileset.t
@@ -180,6 +180,17 @@ Show parsed tree at stages:
(func
(symbol 'clean')
None)))
+ * optimized:
+ (or
+ (symbol 'a1')
+ (symbol 'a2')
+ (and
+ (func
+ (symbol 'grep')
+ (string 'b'))
+ (func
+ (symbol 'clean')
+ None)))
* matcher:
<unionmatcher matchers=[
<patternmatcher patterns='(?:a1$)'>,
More information about the Mercurial-devel
mailing list