[PATCH 2 of 7] match: improve documentation - docstrings and more descriptive variable naming

Wed Jan 15 19:18:10 CST 2014

# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1380816081 -7200
#      Thu Oct 03 18:01:21 2013 +0200
# Node ID 43617c49e7c0b0512e18f3fe9c5712a25e09cdbd
# Parent  7a74b5ba8e5e663786774fe84ec4ecca0207a9c5
match: improve documentation - docstrings and more descriptive variable naming

No real changes.

diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -9,8 +9,10 @@ import re
 import util, fileset, pathutil
 from i18n import _
 
-def _rematcher(pat):
-    m = util.compilere(pat)
+def _rematcher(regexp):
+    '''compile the regexp with the best available regexp engine and return a
+    matcher function'''
+    m = util.compilere(regexp)
     try:
         # slightly faster, provided by facebook's re2 bindings
         return m.test_match
@@ -18,7 +20,9 @@ def _rematcher(pat):
         return m.match
 
 def _expandsets(pats, ctx):
-    '''convert set: patterns into a list of files in the given context'''
+    '''Process the fileset patterns (kind='set') in pats.
+    Returns tuple with the patterns expanded in the given context plus the
+    remaining patterns.'''
     fset = set()
     other = []
 
@@ -41,10 +45,10 @@ class match(object):
         root - the canonical root of the tree you're matching against
         cwd - the current working directory, if relevant
         patterns - patterns to find
-        include - patterns to include
-        exclude - patterns to exclude
-        default - if a pattern in names has no explicit type, assume this one
-        exact - patterns are actually literals
+        include - patterns to include (unless excluded)
+        exclude - patterns to exclude (even if included)
+        default - if a pattern in patterns has no explicit type, assume this one
+        exact - patterns are actually filenames (include/exclude still apply)
 
         a pattern is one of:
         'glob:<glob>' - a glob relative to cwd
@@ -114,10 +118,12 @@ class match(object):
     def __iter__(self):
         for f in self._files:
             yield f
+
+    # Callbacks related to how the matcher is used by dirstate.walk.
+    # Subscribers to these events monkeypatch the matcher object.
     def bad(self, f, msg):
-        '''callback for each explicit file that can't be
-        found/accessed, with an error message
-        '''
+        '''Callback from dirstate.walk for each explicit file that can't be
+        found/accessed, with an error message.'''
         pass
     # If this is set, it will be called when an explicitly listed directory is
     # visited.
@@ -127,15 +133,25 @@ class match(object):
     traversedir = None
     def missing(self, f):
         pass
-    def exact(self, f):
-        return f in self._fmap
     def rel(self, f):
+        '''Convert repo path back to path that is relative to cwd of matcher.'''
         return util.pathto(self._root, self._cwd, f)
     def files(self):
+        '''Explicitly listed files or patterns or roots:
+        if no patterns or .always(): empty list,
+        if exact: list exact files,
+        if not .anypats(): list all files and dirs,
+        else: optimal roots'''
         return self._files
+    def exact(self, f):
+        '''Returns True if f is in .files().'''
+        return f in self._fmap
     def anypats(self):
+        '''Matcher uses patterns or include/exclude.'''
         return self._anypats
     def always(self):
+        '''Matcher will match everything and .files() will be empty
+        - optimization might be possible and necessary.'''
         return self._always
 
 class exact(match):
@@ -191,21 +207,36 @@ class narrowmatcher(match):
     def bad(self, f, msg):
         self._matcher.bad(self._path + "/" + f, msg)
 
-def patkind(pat):
-    return _patsplit(pat, None)[0]
+def patkind(pattern, default=None):
+    '''If pattern is 'kind:pattern' with a known kind, return kind.'''
+    return _patsplit(pattern, default)[0]
 
-def _patsplit(pat, default):
+def _patsplit(pattern, default):
     """Split a string into an optional pattern kind prefix and the
     actual pattern."""
-    if ':' in pat:
-        kind, val = pat.split(':', 1)
+    if ':' in pattern:
+        kind, pat = pattern.split(':', 1)
         if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
                     'listfile', 'listfile0', 'set'):
-            return kind, val
-    return default, pat
+            return kind, pat
+    return default, pattern
 
 def _globre(pat):
-    "convert a glob pattern into a regexp"
+    r'''Convert an extended glob pattern to a regexp pattern.
+
+    >>> print _globre(r'?')
+    .
+    >>> print _globre(r'*')
+    [^/]*
+    >>> print _globre(r'**')
+    .*
+    >>> print _globre(r'[a*?!^][^b][!c]')
+    [a*?!^][\^b][^c]
+    >>> print _globre(r'{a,b}')
+    (?:a|b)
+    >>> print _globre(r'.\*\?')
+    \.\*\?
+    '''
     i, n = 0, len(pat)
     res = ''
     group = 0
@@ -260,38 +291,42 @@ def _globre(pat):
             res += escape(c)
     return res
 
-def _regex(kind, name, tail):
-    '''convert a pattern into a regular expression'''
-    if not name:
+def _regex(kind, pat, globtailre):
+    '''Convert a (normalized) pattern of any kind into a regular expression.'''
+    if not pat:
         return ''
     if kind == 're':
-        return name
-    elif kind == 'path':
-        return '^' + re.escape(name) + '(?:/|$)'
-    elif kind == 'relglob':
-        return '(?:|.*/)' + _globre(name) + tail
-    elif kind == 'relpath':
-        return re.escape(name) + '(?:/|$)'
-    elif kind == 'relre':
-        if name.startswith('^'):
-            return name
-        return '.*' + name
-    return _globre(name) + tail
+        return pat
+    if kind == 'path':
+        return '^' + re.escape(pat) + '(?:/|$)'
+    if kind == 'relglob':
+        return '(?:|.*/)' + _globre(pat) + globtailre
+    if kind == 'relpath':
+        return re.escape(pat) + '(?:/|$)'
+    if kind == 'relre':
+        if pat.startswith('^'):
+            return pat
+        return '.*' + pat
+    # 'glob' - which also is the default
+    return _globre(pat) + globtailre
 
-def _buildmatch(ctx, pats, tail):
+def _buildmatch(ctx, pats, globtailre):
+    '''Return regexp string and a matcher function for pats.'''
     fset, pats = _expandsets(pats, ctx)
     if not pats:
         return "", fset.__contains__
 
-    pat, mf = _buildregexmatch(pats, tail)
+    pat, mf = _buildregexmatch(pats, globtailre)
     if fset:
         return pat, lambda f: f in fset or mf(f)
     return pat, mf
 
-def _buildregexmatch(pats, tail):
-    """build a matching function from a set of patterns"""
+def _buildregexmatch(pats, globtailre):
+    """Build a match function from a list of kinds and patterns,
+    return regexp string and a matcher function."""
     try:
-        pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
+        pat = '(?:%s)' % '|'.join([_regex(k, p, globtailre)
+                                   for (k, p) in pats])
         if len(pat) > 20000:
             raise OverflowError
         return pat, _rematcher(pat)
@@ -302,46 +337,49 @@ def _buildregexmatch(pats, tail):
         l = len(pats)
         if l < 2:
             raise
-        pata, a = _buildregexmatch(pats[:l//2], tail)
-        patb, b = _buildregexmatch(pats[l//2:], tail)
+        pata, a = _buildregexmatch(pats[:l//2], globtailre)
+        patb, b = _buildregexmatch(pats[l//2:], globtailre)
         return pat, lambda s: a(s) or b(s)
     except re.error:
         for k, p in pats:
             try:
-                _rematcher('(?:%s)' % _regex(k, p, tail))
+                _rematcher('(?:%s)' % _regex(k, p, globtailre))
             except re.error:
                 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
         raise util.Abort(_("invalid pattern"))
 
-def _normalize(names, default, root, cwd, auditor):
+def _normalize(patterns, default, root, cwd, auditor):
+    '''Convert 'kind:pat' from the patterns list to tuples with kind and
+    normalized and rooted patterns and with listfiles expanded.'''
     pats = []
-    for kind, name in [_patsplit(p, default) for p in names]:
+    for kind, pat in [_patsplit(p, default) for p in patterns]:
         if kind in ('glob', 'relpath'):
-            name = pathutil.canonpath(root, cwd, name, auditor)
+            pat = pathutil.canonpath(root, cwd, pat, auditor)
         elif kind in ('relglob', 'path'):
-            name = util.normpath(name)
+            pat = util.normpath(pat)
         elif kind in ('listfile', 'listfile0'):
             try:
-                files = util.readfile(name)
+                files = util.readfile(pat)
                 if kind == 'listfile0':
                     files = files.split('\0')
                 else:
                     files = files.splitlines()
                 files = [f for f in files if f]
             except EnvironmentError:
-                raise util.Abort(_("unable to read file list (%s)") % name)
+                raise util.Abort(_("unable to read file list (%s)") % pat)
             pats += _normalize(files, default, root, cwd, auditor)
             continue
-
-        pats.append((kind, name))
+        # else: re or relre - which cannot be normalized
+        pats.append((kind, pat))
     return pats
 
-def _roots(patterns):
+def _roots(pats):
+    '''Find the best root directories of the patterns in pats.'''
     r = []
-    for kind, name in patterns:
+    for kind, pat in pats:
         if kind == 'glob': # find the non-glob prefix
             root = []
-            for p in name.split('/'):
+            for p in pat.split('/'):
                 if '[' in p or '{' in p or '*' in p or '?' in p:
                     break
                 root.append(p)
@@ -349,14 +387,16 @@ def _roots(patterns):
                 return ['.']
             r.append('/'.join(root))
         elif kind in ('relpath', 'path'):
-            if not name:
+            if not pat:
                 return ['.']
-            r.append(name)
+            r.append(pat)
         else: # relglob, re, relre
             return ['.']
     return r
 
-def _anypats(patterns):
-    for kind, name in patterns:
+def _anypats(pats):
+    '''Return True if any of the pats are non-trivial.'''
+    for kind, pat in pats:
         if kind in ('glob', 're', 'relglob', 'relre', 'set'):
             return True
+        # else: None, path or relpath
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -530,17 +530,19 @@ def revrange(repo, revs):
 
     return l
 
-def expandpats(pats):
+def expandpats(patterns):
+    '''Expand bare globs when running on windows.
+    On posix we assume it already has already been done by sh.'''
     if not util.expandglobs:
-        return list(pats)
+        return list(patterns)
     ret = []
-    for p in pats:
-        kind, name = matchmod._patsplit(p, None)
+    for p in patterns:
+        kind, pat = matchmod._patsplit(p, None)
         if kind is None:
             try:
-                globbed = glob.glob(name)
+                globbed = glob.glob(pat)
             except re.error:
-                globbed = [name]
+                globbed = [pat]
             if globbed:
                 ret.extend(globbed)
                 continue
@@ -548,6 +550,8 @@ def expandpats(pats):
     return ret
 
 def matchandpats(ctx, pats=[], opts={}, globbed=False, default='relpath'):
+    '''Return a matcher and the patterns that were used.
+    The matcher will warn about bad matches.'''
     if pats == ("",):
         pats = []
     if not globbed and default == 'relpath':
@@ -561,12 +565,15 @@ def matchandpats(ctx, pats=[], opts={}, 
     return m, pats
 
 def match(ctx, pats=[], opts={}, globbed=False, default='relpath'):
+    '''Return a matcher that will warn about bad matches.'''
     return matchandpats(ctx, pats, opts, globbed, default)[0]
 
 def matchall(repo):
+    '''Return a matcher that efficiently will match everything.'''
     return matchmod.always(repo.root, repo.getcwd())
 
 def matchfiles(repo, files):
+    '''Return a matcher that efficiently will match exactly these files.'''
     return matchmod.exact(repo.root, repo.getcwd(), files)
 
 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):