D6764: match: simplify the regexps created for glob patterns

valentin.gatienbaron (Valentin Gatien-Baron) phabricator at mercurial-scm.org
Mon Aug 26 00:17:22 EDT 2019


valentin.gatienbaron created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  For legibility of the resulting regexes, although it may help with
  performance as well.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6764

AFFECTED FILES
  mercurial/match.py
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/utils.rs
  tests/test-hgignore.t
  tests/test-walk.t

CHANGE DETAILS

diff --git a/tests/test-walk.t b/tests/test-walk.t
--- a/tests/test-walk.t
+++ b/tests/test-walk.t
@@ -100,7 +100,7 @@
   f  mammals/skunk  skunk
   $ hg debugwalk -v -I 'relglob:*k'
   * matcher:
-  <includematcher includes='(?:|.*/)[^/]*k(?:/|$)'>
+  <includematcher includes='.*k(?:/|$)'>
   f  beans/black    ../beans/black
   f  fenugreek      ../fenugreek
   f  mammals/skunk  skunk
@@ -108,7 +108,7 @@
   * matcher:
   <intersectionmatcher
     m1=<patternmatcher patterns='mammals(?:/|$)'>,
-    m2=<includematcher includes='(?:|.*/)[^/]*k(?:/|$)'>>
+    m2=<includematcher includes='.*k(?:/|$)'>>
   f  mammals/skunk  skunk
   $ hg debugwalk -v -I 're:.*k$'
   * matcher:
diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t
--- a/tests/test-hgignore.t
+++ b/tests/test-hgignore.t
@@ -177,7 +177,7 @@
   ? a.c
   ? syntax
   $ hg debugignore
-  <includematcher includes='(?:|.*/)[^/]*\\.o(?:/|$)'>
+  <includematcher includes='.*\\.o(?:/|$)'>
 
   $ cd ..
   $ echo > .hg/testhgignorerel
@@ -224,7 +224,7 @@
   A b.o
 
   $ hg debugignore
-  <includematcher includes='(?:|.*/)[^/]*(?:/|$)'>
+  <includematcher includes='.*(?:/|$)'>
 
   $ hg debugignore b.o
   b.o is ignored
diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -41,6 +41,7 @@
     fn trim_end(&self) -> &Self;
     fn trim_start(&self) -> &Self;
     fn trim(&self) -> &Self;
+    fn chop_prefix(&self, needle:&[u8]) -> Option<&[u8]>;
 }
 
 fn is_not_whitespace(c: &u8) -> bool {
@@ -81,4 +82,12 @@
     fn trim(&self) -> &[u8] {
         self.trim_start().trim_end()
     }
+
+    fn chop_prefix(&self, needle:&[u8]) -> Option<&[u8]> {
+        if self.starts_with(needle) {
+            Some(&self[needle.len()..])
+        } else {
+            None
+        }
+    }
 }
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -184,14 +184,22 @@
             res.extend(b"[^/]+$");
             res
         }
+        PatternSyntax::RelGlob => {
+            let mut res: Vec<u8> = vec![];
+            let glob_re = glob_to_re(pattern);
+            if let Some(rest) = glob_re.chop_prefix(b"[^/]*") {
+                res.extend(b".*");
+                res.extend(rest);
+            } else {
+                res.extend(b"(?:|.*/)");
+                res.extend(glob_re);
+            }
+            res.extend(globsuffix.iter());
+            res
+        }
         PatternSyntax::Glob
-        | PatternSyntax::RelGlob
         | PatternSyntax::RootGlob => {
             let mut res: Vec<u8> = vec![];
-            if syntax == PatternSyntax::RelGlob {
-                res.extend(b"(?:|.*/)");
-            }
-
             res.extend(glob_to_re(pattern));
             res.extend(globsuffix.iter());
             res
@@ -268,8 +276,8 @@
             continue;
         }
 
-        if line.starts_with(b"syntax:") {
-            let syntax = line[b"syntax:".len()..].trim();
+        if let Some(syntax) = line.chop_prefix(b"syntax:") {
+            let syntax = syntax.trim();
 
             if let Some(rel_syntax) = SYNTAXES.get(syntax) {
                 current_syntax = rel_syntax;
diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -1223,7 +1223,12 @@
         # Anything after the pattern must be a non-directory.
         return escaped + '[^/]+$'
     if kind == 'relglob':
-        return '(?:|.*/)' + _globre(pat) + globsuffix
+        globre = _globre(pat)
+        if globre.startswith('[^/]*'):
+            # When pat has the form *XYZ (common), make the returned regex more
+            # legible by returning the regex for **XYZ instead of **/*XYZ.
+            return '.*' + globre[len('[^/]*'):] + globsuffix
+        return '(?:|.*/)' + globre + globsuffix
     if kind == 'relre':
         if pat.startswith('^'):
             return pat



To: valentin.gatienbaron, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list