[PATCH 1 of 2 v2] matcher: use re2 bindings if available
Bryan O'Sullivan
bos at serpentine.com
Fri Jun 1 17:35:16 CDT 2012
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1338589580 25200
# Node ID 45de1c920fcaea462542a2eaee90c3ac0a83162e
# Parent 17d31c0fe181484e513832afa1c7a1095685dc25
matcher: use re2 bindings if available
There are two sets of Python re2 bindings available on the internet;
this code works with both.
Using re2 can greatly improve "hg status" performance when a .hgignore
file becomes even modestly complex.
Example: "hg status" on a clean tree with 134K files, where "hg
debugignore" reports a regexp 4256 bytes in size.
no .hgignore: 1.76 sec
Python re: 2.79
re2: 1.82
The overhead of regexp matching drops from 1.03 seconds with stock
re to 0.06 with re2.
(For comparison, a git repo with the same contents and .gitignore
file runs "git status -s" in 1.71 seconds, i.e. only slightly faster
than hg with re2.)
diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -9,6 +9,14 @@ import re
import scmutil, util, fileset
from i18n import _
+def _rematcher(pat):
+ m = util.compilere(pat)
+ try:
+ # slightly faster, provided by facebook's re2 bindings
+ return m.test_match
+ except AttributeError:
+ return m.match
+
def _expandsets(pats, ctx):
'''convert set: patterns into a list of files in the given context'''
fset = set()
@@ -280,7 +288,7 @@ def _buildregexmatch(pats, tail):
pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
if len(pat) > 20000:
raise OverflowError
- return pat, re.compile(pat).match
+ return pat, _rematcher(pat)
except OverflowError:
# We're using a Python with a tiny regex engine and we
# made it explode, so we'll divide the pattern list in two
@@ -294,7 +302,7 @@ def _buildregexmatch(pats, tail):
except re.error:
for k, p in pats:
try:
- re.compile('(?:%s)' % _regex(k, p, tail))
+ _rematcher('(?:%s)' % _regex(k, p, tail))
except re.error:
raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
raise util.Abort(_("invalid pattern"))
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -617,6 +617,30 @@ def checkcase(path):
except OSError:
return True
+try:
+ import re2
+ _re2 = None
+except ImportError:
+ _re2 = False
+
+def compilere(pat):
+ '''Compile a regular expression, using re2 if possible
+
+ For best performance, use only re2-compatible regexp features.'''
+ global _re2
+ if _re2 is None:
+ try:
+ re2.compile
+ _re2 = True
+ except ImportError:
+ _re2 = False
+ if _re2:
+ try:
+ return re2.compile(pat)
+ except re2.error:
+ pass
+ return re.compile(pat)
+
_fspathcache = {}
def fspath(name, root):
'''Get name in the case stored in the filesystem
More information about the Mercurial-devel
mailing list