[PATCH 2 of 3] Switch to new syntax for .hgignore files

Bryan O'Sullivan bos at serpentine.com
Sat Sep 17 02:38:32 CDT 2005


# HG changeset patch
# User Bryan O'Sullivan <bos at serpentine.com>
# Node ID fc3b41570082cea350ad4017e2b715a880749522
# Parent  5e9816decbb74f869518df583de257dd60595066
Switch to new syntax for .hgignore files.
Here is the new syntax, in summary.
Trailing white space is dropped.
The escape character is "\".
Comments start with #.
Empty lines are skipped.
Lines can be of the following formats:
syntax: regexp # defaults following lines to non-rooted regexps
syntax: glob   # defaults following lines to non-rooted globs
re:pattern     # non-rooted regular expression
glob:pattern   # non-rooted glob
pattern        # pattern of the current default type
The default pattern type is regexp, which is completely backwards
compatible with the old hgignore syntax.
In the dirstate class, the ignore method has been reworked to be based
on the util.matcher function, by way of a new dirstate.hgignore
method.

diff -r 5e9816decbb7 -r fc3b41570082 .hgignore
--- a/.hgignore	Sat Sep 17 07:23:58 2005
+++ b/.hgignore	Sat Sep 17 07:27:27 2005
@@ -1,16 +1,21 @@
-\.elc$
-\.orig$
-\.rej$
-~$
-\.so$
-\.pyc$
-\.swp$
-\.prof$
-^tests/.*\.err$
-^build/
-^dist/
-^doc/.*\.[0-9](\.(x|ht)ml)?$
-^MANIFEST$
+syntax: glob
+
+*.elc
+*.orig
+*.rej
+*~
+*.so
+*.pyc
+*.swp
+*.prof
+tests/*.err
+build
+dist
+doc/*.[0-9]
+doc/*.[0-9].{x,ht}ml
+MANIFEST
+patches
+mercurial/__version__.py
+
+syntax: regexp
 ^\.pc/
-^patches/
-^mercurial/__version__.py$
diff -r 5e9816decbb7 -r fc3b41570082 mercurial/dirstate.py
--- a/mercurial/dirstate.py	Sat Sep 17 07:23:58 2005
+++ b/mercurial/dirstate.py	Sat Sep 17 07:27:27 2005
@@ -32,33 +32,61 @@
         if cwd == self.root: return ''
         return cwd[len(self.root) + 1:]
 
-    def ignore(self, f):
+    def hgignore(self):
+        '''return the contents of .hgignore as a list of patterns.
+
+        trailing white space is dropped.
+        the escape character is backslash.
+        comments start with #.
+        empty lines are skipped.
+
+        lines can be of the following formats:
+
+        syntax: regexp # defaults following lines to non-rooted regexps
+        syntax: glob   # defaults following lines to non-rooted globs
+        re:pattern     # non-rooted regular expression
+        glob:pattern   # non-rooted glob
+        pattern        # pattern of the current default type'''
+        syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:'}
+        def parselines(fp):
+            for line in fp:
+                escape = False
+                for i in xrange(len(line)):
+                    if escape: escape = False
+                    elif line[i] == '\\': escape = True
+                    elif line[i] == '#': break
+                line = line[:i].rstrip()
+                if line: yield line
+        pats = []
+        try:
+            fp = open(self.wjoin('.hgignore'))
+            syntax = 'relre:'
+            for line in parselines(fp):
+                if line.startswith('syntax:'):
+                    s = line[7:].strip()
+                    try:
+                        syntax = syntaxes[s]
+                    except KeyError:
+                        self.ui.warn("ignoring invalid syntax '%s'\n" % s)
+                    continue
+                pat = syntax + line
+                for s in syntaxes.values():
+                    if line.startswith(s):
+                        pat = line
+                        break
+                pats.append(pat)
+        except IOError: pass
+        return pats
+
+    def ignore(self, fn):
+        '''default match function used by dirstate and localrepository.
+        this honours the .hgignore file, and nothing more.'''
         if self.blockignore:
             return False
         if not self.ignorefunc:
-            bigpat = []
-            try:
-                l = file(self.wjoin(".hgignore"))
-                for pat in l:
-                    p = pat.rstrip()
-                    if p:
-                        try:
-                            re.compile(p)
-                        except:
-                            self.ui.warn("ignoring invalid ignore"
-                                         + " regular expression '%s'\n" % p)
-                        else:
-                            bigpat.append(p)
-            except IOError: pass
-
-            if bigpat:
-                s = "(?:%s)" % (")|(?:".join(bigpat))
-                r = re.compile(s)
-                self.ignorefunc = r.search
-            else:
-                self.ignorefunc = util.never
-
-        return self.ignorefunc(f)
+            files, self.ignorefunc, anypats = util.matcher(self.root,
+                                                           inc=self.hgignore())
+        return self.ignorefunc(fn)
 
     def __del__(self):
         if self.dirty:
@@ -353,9 +381,8 @@
                     checkappend(added, fn)
                 elif type == 'r':
                     checkappend(unknown, fn)
-            else:
-                if not self.ignore(fn) and match(fn):
-                    unknown.append(fn)
+            elif not self.ignore(fn) and match(fn):
+                unknown.append(fn)
             # return false because we've already handled all cases above.
             # there's no need for the walking code to process the file
             # any further.
diff -r 5e9816decbb7 -r fc3b41570082 mercurial/util.py
--- a/mercurial/util.py	Sat Sep 17 07:23:58 2005
+++ b/mercurial/util.py	Sat Sep 17 07:27:27 2005
@@ -122,7 +122,7 @@
     else:
         raise Abort('%s not under root' % myname)
 
-def matcher(canonroot, cwd, names, inc, exc, head=''):
+def matcher(canonroot, cwd='', names=['.'], inc=[], exc=[], head=''):
     """build a function to match a set of file patterns
 
     arguments:
@@ -134,11 +134,13 @@
     head - a regex to prepend to patterns to control whether a match is rooted
 
     a pattern is one of:
-    're:<regex>'
-    'glob:<shellglob>'
-    'path:<explicit path>'
+    'glob:<rooted glob>'
+    're:<rooted regexp>'
+    'path:<rooted path>'
+    'relglob:<relative glob>'
     'relpath:<relative path>'
-    '<relative path>'
+    'relre:<relative regexp>'
+    '<rooted path or regexp>'
 
     returns:
     a 3-tuple containing
@@ -151,8 +153,8 @@
     """
 
     def patkind(name):
-        for prefix in 're:', 'glob:', 'path:', 'relpath:':
-            if name.startswith(prefix): return name.split(':', 1)
+        for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
+            if name.startswith(prefix + ':'): return name.split(':', 1)
         for c in name:
             if c in _globchars: return 'glob', name
         return 'relpath', name
@@ -163,8 +165,14 @@
             return name
         elif kind == 'path':
             return '^' + re.escape(name) + '(?:/|$)'
+        elif kind == 'relglob':
+            return head + globre(name, '(?:|.*/)', tail)
         elif kind == 'relpath':
             return head + re.escape(name) + tail
+        elif kind == 'relre':
+            if name.startswith('^'):
+                return name
+            return '.*' + name
         return head + globre(name, '', tail)
 
     def matchfn(pats, tail):


More information about the Mercurial mailing list