[PATCH 2 of 7] match: improve documentation - docstrings and more descriptive variable naming
Mads Kiilerich
mads at kiilerich.com
Wed Jan 15 19:18:10 CST 2014
# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1380816081 -7200
# Thu Oct 03 18:01:21 2013 +0200
# Node ID 43617c49e7c0b0512e18f3fe9c5712a25e09cdbd
# Parent 7a74b5ba8e5e663786774fe84ec4ecca0207a9c5
match: improve documentation - docstrings and more descriptive variable naming
No real changes.
diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -9,8 +9,10 @@ import re
import util, fileset, pathutil
from i18n import _
-def _rematcher(pat):
- m = util.compilere(pat)
+def _rematcher(regexp):
+ '''compile the regexp with the best available regexp engine and return a
+ matcher function'''
+ m = util.compilere(regexp)
try:
# slightly faster, provided by facebook's re2 bindings
return m.test_match
@@ -18,7 +20,9 @@ def _rematcher(pat):
return m.match
def _expandsets(pats, ctx):
- '''convert set: patterns into a list of files in the given context'''
+ '''Process the fileset patterns (kind='set') in pats.
+ Returns tuple with the patterns expanded in the given context plus the
+ remaining patterns.'''
fset = set()
other = []
@@ -41,10 +45,10 @@ class match(object):
root - the canonical root of the tree you're matching against
cwd - the current working directory, if relevant
patterns - patterns to find
- include - patterns to include
- exclude - patterns to exclude
- default - if a pattern in names has no explicit type, assume this one
- exact - patterns are actually literals
+ include - patterns to include (unless excluded)
+ exclude - patterns to exclude (even if included)
+ default - if a pattern in patterns has no explicit type, assume this one
+ exact - patterns are actually filenames (include/exclude still apply)
a pattern is one of:
'glob:<glob>' - a glob relative to cwd
@@ -114,10 +118,12 @@ class match(object):
def __iter__(self):
for f in self._files:
yield f
+
+ # Callbacks related to how the matcher is used by dirstate.walk.
+ # Subscribers to these events monkeypatch the matcher object.
def bad(self, f, msg):
- '''callback for each explicit file that can't be
- found/accessed, with an error message
- '''
+ '''Callback from dirstate.walk for each explicit file that can't be
+ found/accessed, with an error message.'''
pass
# If this is set, it will be called when an explicitly listed directory is
# visited.
@@ -127,15 +133,25 @@ class match(object):
traversedir = None
def missing(self, f):
pass
- def exact(self, f):
- return f in self._fmap
def rel(self, f):
+ '''Convert repo path back to path that is relative to cwd of matcher.'''
return util.pathto(self._root, self._cwd, f)
def files(self):
+ '''Explicitly listed files or patterns or roots:
+ if no patterns or .always(): empty list,
+ if exact: list exact files,
+ if not .anypats(): list all files and dirs,
+ else: optimal roots'''
return self._files
+ def exact(self, f):
+ '''Returns True if f is in .files().'''
+ return f in self._fmap
def anypats(self):
+ '''Matcher uses patterns or include/exclude.'''
return self._anypats
def always(self):
+ '''Matcher will match everything and .files() will be empty
+ - optimization might be possible and necessary.'''
return self._always
class exact(match):
@@ -191,21 +207,36 @@ class narrowmatcher(match):
def bad(self, f, msg):
self._matcher.bad(self._path + "/" + f, msg)
-def patkind(pat):
- return _patsplit(pat, None)[0]
+def patkind(pattern, default=None):
+ '''If pattern is 'kind:pattern' with a known kind, return kind.'''
+ return _patsplit(pattern, default)[0]
-def _patsplit(pat, default):
+def _patsplit(pattern, default):
"""Split a string into an optional pattern kind prefix and the
actual pattern."""
- if ':' in pat:
- kind, val = pat.split(':', 1)
+ if ':' in pattern:
+ kind, pat = pattern.split(':', 1)
if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
'listfile', 'listfile0', 'set'):
- return kind, val
- return default, pat
+ return kind, pat
+ return default, pattern
def _globre(pat):
- "convert a glob pattern into a regexp"
+ r'''Convert an extended glob pattern to a regexp pattern.
+
+ >>> print _globre(r'?')
+ .
+ >>> print _globre(r'*')
+ [^/]*
+ >>> print _globre(r'**')
+ .*
+ >>> print _globre(r'[a*?!^][^b][!c]')
+ [a*?!^][\^b][^c]
+ >>> print _globre(r'{a,b}')
+ (?:a|b)
+ >>> print _globre(r'.\*\?')
+ \.\*\?
+ '''
i, n = 0, len(pat)
res = ''
group = 0
@@ -260,38 +291,42 @@ def _globre(pat):
res += escape(c)
return res
-def _regex(kind, name, tail):
- '''convert a pattern into a regular expression'''
- if not name:
+def _regex(kind, pat, globtailre):
+ '''Convert a (normalized) pattern of any kind into a regular expression.'''
+ if not pat:
return ''
if kind == 're':
- return name
- elif kind == 'path':
- return '^' + re.escape(name) + '(?:/|$)'
- elif kind == 'relglob':
- return '(?:|.*/)' + _globre(name) + tail
- elif kind == 'relpath':
- return re.escape(name) + '(?:/|$)'
- elif kind == 'relre':
- if name.startswith('^'):
- return name
- return '.*' + name
- return _globre(name) + tail
+ return pat
+ if kind == 'path':
+ return '^' + re.escape(pat) + '(?:/|$)'
+ if kind == 'relglob':
+ return '(?:|.*/)' + _globre(pat) + globtailre
+ if kind == 'relpath':
+ return re.escape(pat) + '(?:/|$)'
+ if kind == 'relre':
+ if pat.startswith('^'):
+ return pat
+ return '.*' + pat
+ # 'glob' - which also is the default
+ return _globre(pat) + globtailre
-def _buildmatch(ctx, pats, tail):
+def _buildmatch(ctx, pats, globtailre):
+ '''Return regexp string and a matcher function for pats.'''
fset, pats = _expandsets(pats, ctx)
if not pats:
return "", fset.__contains__
- pat, mf = _buildregexmatch(pats, tail)
+ pat, mf = _buildregexmatch(pats, globtailre)
if fset:
return pat, lambda f: f in fset or mf(f)
return pat, mf
-def _buildregexmatch(pats, tail):
- """build a matching function from a set of patterns"""
+def _buildregexmatch(pats, globtailre):
+ """Build a match function from a list of kinds and patterns,
+ return regexp string and a matcher function."""
try:
- pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
+ pat = '(?:%s)' % '|'.join([_regex(k, p, globtailre)
+ for (k, p) in pats])
if len(pat) > 20000:
raise OverflowError
return pat, _rematcher(pat)
@@ -302,46 +337,49 @@ def _buildregexmatch(pats, tail):
l = len(pats)
if l < 2:
raise
- pata, a = _buildregexmatch(pats[:l//2], tail)
- patb, b = _buildregexmatch(pats[l//2:], tail)
+ pata, a = _buildregexmatch(pats[:l//2], globtailre)
+ patb, b = _buildregexmatch(pats[l//2:], globtailre)
return pat, lambda s: a(s) or b(s)
except re.error:
for k, p in pats:
try:
- _rematcher('(?:%s)' % _regex(k, p, tail))
+ _rematcher('(?:%s)' % _regex(k, p, globtailre))
except re.error:
raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
raise util.Abort(_("invalid pattern"))
-def _normalize(names, default, root, cwd, auditor):
+def _normalize(patterns, default, root, cwd, auditor):
+ '''Convert 'kind:pat' from the patterns list to tuples with kind and
+ normalized and rooted patterns and with listfiles expanded.'''
pats = []
- for kind, name in [_patsplit(p, default) for p in names]:
+ for kind, pat in [_patsplit(p, default) for p in patterns]:
if kind in ('glob', 'relpath'):
- name = pathutil.canonpath(root, cwd, name, auditor)
+ pat = pathutil.canonpath(root, cwd, pat, auditor)
elif kind in ('relglob', 'path'):
- name = util.normpath(name)
+ pat = util.normpath(pat)
elif kind in ('listfile', 'listfile0'):
try:
- files = util.readfile(name)
+ files = util.readfile(pat)
if kind == 'listfile0':
files = files.split('\0')
else:
files = files.splitlines()
files = [f for f in files if f]
except EnvironmentError:
- raise util.Abort(_("unable to read file list (%s)") % name)
+ raise util.Abort(_("unable to read file list (%s)") % pat)
pats += _normalize(files, default, root, cwd, auditor)
continue
-
- pats.append((kind, name))
+ # else: re or relre - which cannot be normalized
+ pats.append((kind, pat))
return pats
-def _roots(patterns):
+def _roots(pats):
+ '''Find the best root directories of the patterns in pats.'''
r = []
- for kind, name in patterns:
+ for kind, pat in pats:
if kind == 'glob': # find the non-glob prefix
root = []
- for p in name.split('/'):
+ for p in pat.split('/'):
if '[' in p or '{' in p or '*' in p or '?' in p:
break
root.append(p)
@@ -349,14 +387,16 @@ def _roots(patterns):
return ['.']
r.append('/'.join(root))
elif kind in ('relpath', 'path'):
- if not name:
+ if not pat:
return ['.']
- r.append(name)
+ r.append(pat)
else: # relglob, re, relre
return ['.']
return r
-def _anypats(patterns):
- for kind, name in patterns:
+def _anypats(pats):
+ '''Return True if any of the pats are non-trivial.'''
+ for kind, pat in pats:
if kind in ('glob', 're', 'relglob', 'relre', 'set'):
return True
+ # else: None, path or relpath
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -530,17 +530,19 @@ def revrange(repo, revs):
return l
-def expandpats(pats):
+def expandpats(patterns):
+ '''Expand bare globs when running on windows.
+ On posix we assume it already has already been done by sh.'''
if not util.expandglobs:
- return list(pats)
+ return list(patterns)
ret = []
- for p in pats:
- kind, name = matchmod._patsplit(p, None)
+ for p in patterns:
+ kind, pat = matchmod._patsplit(p, None)
if kind is None:
try:
- globbed = glob.glob(name)
+ globbed = glob.glob(pat)
except re.error:
- globbed = [name]
+ globbed = [pat]
if globbed:
ret.extend(globbed)
continue
@@ -548,6 +550,8 @@ def expandpats(pats):
return ret
def matchandpats(ctx, pats=[], opts={}, globbed=False, default='relpath'):
+ '''Return a matcher and the patterns that were used.
+ The matcher will warn about bad matches.'''
if pats == ("",):
pats = []
if not globbed and default == 'relpath':
@@ -561,12 +565,15 @@ def matchandpats(ctx, pats=[], opts={},
return m, pats
def match(ctx, pats=[], opts={}, globbed=False, default='relpath'):
+ '''Return a matcher that will warn about bad matches.'''
return matchandpats(ctx, pats, opts, globbed, default)[0]
def matchall(repo):
+ '''Return a matcher that efficiently will match everything.'''
return matchmod.always(repo.root, repo.getcwd())
def matchfiles(repo, files):
+ '''Return a matcher that efficiently will match exactly these files.'''
return matchmod.exact(repo.root, repo.getcwd(), files)
def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
More information about the Mercurial-devel
mailing list