D6764: match: simplify the regexps created for glob patterns
valentin.gatienbaron (Valentin Gatien-Baron)
phabricator at mercurial-scm.org
Mon Aug 26 04:17:22 UTC 2019
valentin.gatienbaron created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
For legibility of the resulting regexes, although it may help with
performance as well.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D6764
AFFECTED FILES
mercurial/match.py
rust/hg-core/src/filepatterns.rs
rust/hg-core/src/utils.rs
tests/test-hgignore.t
tests/test-walk.t
CHANGE DETAILS
diff --git a/tests/test-walk.t b/tests/test-walk.t
--- a/tests/test-walk.t
+++ b/tests/test-walk.t
@@ -100,7 +100,7 @@
f mammals/skunk skunk
$ hg debugwalk -v -I 'relglob:*k'
* matcher:
- <includematcher includes='(?:|.*/)[^/]*k(?:/|$)'>
+ <includematcher includes='.*k(?:/|$)'>
f beans/black ../beans/black
f fenugreek ../fenugreek
f mammals/skunk skunk
@@ -108,7 +108,7 @@
* matcher:
<intersectionmatcher
m1=<patternmatcher patterns='mammals(?:/|$)'>,
- m2=<includematcher includes='(?:|.*/)[^/]*k(?:/|$)'>>
+ m2=<includematcher includes='.*k(?:/|$)'>>
f mammals/skunk skunk
$ hg debugwalk -v -I 're:.*k$'
* matcher:
diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t
--- a/tests/test-hgignore.t
+++ b/tests/test-hgignore.t
@@ -177,7 +177,7 @@
? a.c
? syntax
$ hg debugignore
- <includematcher includes='(?:|.*/)[^/]*\\.o(?:/|$)'>
+ <includematcher includes='.*\\.o(?:/|$)'>
$ cd ..
$ echo > .hg/testhgignorerel
@@ -224,7 +224,7 @@
A b.o
$ hg debugignore
- <includematcher includes='(?:|.*/)[^/]*(?:/|$)'>
+ <includematcher includes='.*(?:/|$)'>
$ hg debugignore b.o
b.o is ignored
diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -41,6 +41,7 @@
fn trim_end(&self) -> &Self;
fn trim_start(&self) -> &Self;
fn trim(&self) -> &Self;
+ fn chop_prefix(&self, needle:&[u8]) -> Option<&[u8]>;
}
fn is_not_whitespace(c: &u8) -> bool {
@@ -81,4 +82,12 @@
fn trim(&self) -> &[u8] {
self.trim_start().trim_end()
}
+
+ fn chop_prefix(&self, needle:&[u8]) -> Option<&[u8]> {
+ if self.starts_with(needle) {
+ Some(&self[needle.len()..])
+ } else {
+ None
+ }
+ }
}
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -184,14 +184,22 @@
res.extend(b"[^/]+$");
res
}
+ PatternSyntax::RelGlob => {
+ let mut res: Vec<u8> = vec![];
+ let glob_re = glob_to_re(pattern);
+ if let Some(rest) = glob_re.chop_prefix(b"[^/]*") {
+ res.extend(b".*");
+ res.extend(rest);
+ } else {
+ res.extend(b"(?:|.*/)");
+ res.extend(glob_re);
+ }
+ res.extend(globsuffix.iter());
+ res
+ }
PatternSyntax::Glob
- | PatternSyntax::RelGlob
| PatternSyntax::RootGlob => {
let mut res: Vec<u8> = vec![];
- if syntax == PatternSyntax::RelGlob {
- res.extend(b"(?:|.*/)");
- }
-
res.extend(glob_to_re(pattern));
res.extend(globsuffix.iter());
res
@@ -268,8 +276,8 @@
continue;
}
- if line.starts_with(b"syntax:") {
- let syntax = line[b"syntax:".len()..].trim();
+ if let Some(syntax) = line.chop_prefix(b"syntax:") {
+ let syntax = syntax.trim();
if let Some(rel_syntax) = SYNTAXES.get(syntax) {
current_syntax = rel_syntax;
diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -1223,7 +1223,12 @@
# Anything after the pattern must be a non-directory.
return escaped + '[^/]+$'
if kind == 'relglob':
- return '(?:|.*/)' + _globre(pat) + globsuffix
+ globre = _globre(pat)
+ if globre.startswith('[^/]*'):
+ # When pat has the form *XYZ (common), make the returned regex more
+ # legible by returning the regex for **XYZ instead of **/*XYZ.
+ return '.*' + globre[len('[^/]*'):] + globsuffix
+ return '(?:|.*/)' + globre + globsuffix
if kind == 'relre':
if pat.startswith('^'):
return pat
To: valentin.gatienbaron, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list