[PATCH 15 of 21 V2] speedy: add support for arbitrary file patterns in path query
Tomasz Kleczek
tkleczek at fb.com
Thu Dec 13 20:52:27 CST 2012
# HG changeset patch
# User Tomasz Kleczek <tkleczek at fb.com>
# Date 1355422376 28800
# Node ID 2fd469f383645ddd6bdf96b412f5b5f5bc5d6606
# Parent fd47c0de8d33a0a9106d8bbdc71bd66e3db8764e
speedy: add support for arbitrary file patterns in path query
This change speeds up commands such as:
log "glob:*"
log "relglob:*.py"
hg log d2 --include "**.py"
Only the fileset patterns are still not supported server-side.
diff --git a/hgext/speedy/client.py b/hgext/speedy/client.py
--- a/hgext/speedy/client.py
+++ b/hgext/speedy/client.py
@@ -91,18 +91,26 @@
`rev` is in fncache then fncache[rev] is a list with filenames
this rev modifies that are matching according to `match`.
- If match containts only 'path' or 'relpath' patterns the query is sent
+ If match doesn't contain any fileset expressions, the query is sent
to the server, otherwise it is performed locally as server doesn't
- support arbitrary patterns just yet.
+ support fileset expressions.
"""
- anynonpaths = bool(filter(lambda (k, v): k != 'path', match._pats))
- if match._includepats or match._excludepats or anynonpaths:
- # For now, server supports only literal paths and not arbitrary
- # patterns, fall back to the local query
+ allpats = match._pats + match._includepats + match._excludepats
+ anyfilesetexp = bool(filter(lambda (k, v): k == 'set', allpats))
+ if anyfilesetexp:
+ # Server doesn't support filesets, fall back to the local
+ # fileset query
return cmdutil.filterrevs(self._repo, list(self._repo), match)
else:
- paths = [v for k, v in match._pats]
- wanted = self._proxy.request('path', (paths,))
+ # Would like to send the match object over the network, but
+ # it is not possible as it contains functions and working
+ # dir context object. Send all parameters used to init the
+ # object in a dict instead and create the object server-side.
+ # match._ctx field is used only if one of the patterns to match is
+ # a 'set:' pattern. Therefore we do not loose any information here.
+ matchdict = dict(patterns=match._pats, include=match._includepats,
+ exclude=match._excludepats)
+ wanted = self._proxy.request('path', (matchdict,))
return set(nodestorevs(self._repo, wanted)), {}
def _patchedauthor(metapeer, repo, subset, pats):
diff --git a/hgext/speedy/index.py b/hgext/speedy/index.py
--- a/hgext/speedy/index.py
+++ b/hgext/speedy/index.py
@@ -53,3 +53,13 @@
for path in paths:
filechgs.setdefault(path, []).append(ctx.node())
return filechgs
+
+def makefiles(ctxs):
+ """Return the `files` index.
+
+ `files` is keyed by file name, with each value being an empty string
+ """
+ files = set()
+ for ctx in ctxs:
+ files.update(ctx.files())
+ return dict([(fn, '') for fn in files])
diff --git a/hgext/speedy/server.py b/hgext/speedy/server.py
--- a/hgext/speedy/server.py
+++ b/hgext/speedy/server.py
@@ -14,6 +14,7 @@
from mercurial import cmdutil
from mercurial.i18n import _
from mercurial import util
+from mercurial import match as matchmod
import index
import protocol
import tcptransport
@@ -59,10 +60,44 @@
return [node for node, date in self.chgdate.iteritems()
if matcher(date)]
- def path(self, paths):
- """Return a list of changesets that modify any of the paths.
+ def path(self, matchargs):
+ """Return a list of changesets that modify the specified paths.
- Only the changes present in `subset` are returned.
+ matchargs: a dict that may contain the following parameters:
+ 'patterns' - a pattern list
+ 'include' - a pattern list describing additional paths
+ to include
+ 'exclude' - a pattern list describing additional paths
+ to exclude
+
+ A pattern list is a an iterable with pairs of (kind, pattern),
+ kind may be any pattern kind recognized by match.match
+ constructor except for 'relpath' and 'set'.
+
+ If all patterns are literal paths, we can compute answer very
+ fast using `filechgs` index (see _literalpath method). Otherwise,
+ we have to fall back to an exhaustive search (see _patternpath
+ method).
+ """
+ pats = matchargs.get('patterns', [])
+ include = matchargs.get('include', [])
+ exclude = matchargs.get('exclude', [])
+ kinds = [k for k, v in pats]
+ if not include and not exclude and kinds == ['path'] * len(kinds):
+ paths = [ v for k, v in pats ]
+ wanted = self._literalpath(paths)
+ else:
+ def patsconvert(pats):
+ return[':'.join(p) for p in pats]
+ patterns = patsconvert(pats)
+ match = matchmod.match(self.repo.root, self.repo.root,
+ patterns, include=patsconvert(include),
+ exclude=patsconvert(exclude))
+ wanted = self._patternpath(match)
+ return wanted
+
+ def _literalpath(self, paths):
+ """Return a list of changesets touching any of the paths.
Uses `filechgs` index which provides the mapping from paths
(files and directories) to a list of changes touching this path.
@@ -74,10 +109,23 @@
nodes.update(newnodes)
return list(nodes)
+ def _patternpath(self, match):
+ """Return a list of changesets matching given files.
+
+ match: a callable that defines which files are relevant.
+ File is relevant if match(filename) == True.
+
+ Slow compared to _literalpath since it iterates through all filenames
+ in the repository history.
+ """
+ matchingfiles = filter(match, self.files.keys())
+ return self._literalpath(matchingfiles)
+
indicecfg = {
'userchgs': index.makeuserchgs,
'chgdate': index.makechgdate,
'filechgs': index.makefilechgs,
+ 'files': index.makefiles,
}
def makeserver(repo):
diff --git a/tests/test-speedy.t b/tests/test-speedy.t
--- a/tests/test-speedy.t
+++ b/tests/test-speedy.t
@@ -272,10 +272,10 @@
$ hg log --rev "date(10/20/2012) & user(testuser2)"
chg1
- $ cat >> $TESTTMP/localrepo/.hg/hgrc <<EOF_END
- > [speedy]
- > client = False
- > EOF_END
+ $ hg log "glob:d2/*" --exclude "**.py"
+ chgx
+ chgpushed
+ chg4
$ cd $TESTTMP/serverrepo
More information about the Mercurial-devel
mailing list