D5815: global: use raw strings for regular expressions with escapes

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Mon Feb 4 17:20:30 UTC 2019


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Escape sequences like \w, \s, and \d are technically invalid
  in str/bytes. This became a deprecation warning in Python 3.6
  (https://bugs.python.org/issue27364). Python 3.8 bumps it to
  a SyntaxWarning (https://bugs.python.org/issue32912), which is
  non-silent by default.
  
  This commit changes a number of regular expressions to use
  br'' so regular expression special sequences don't need \\
  literals. This fixes roughly half of the SyntaxWarning we
  see in the code base with Python 3.8.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5815

AFFECTED FILES
  hgext/blackbox.py
  hgext/commitextras.py
  hgext/convert/cvsps.py
  hgext/mq.py
  hgext/phabricator.py
  hgext/releasenotes.py
  mercurial/color.py
  mercurial/patch.py

CHANGE DETAILS

diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -638,8 +638,8 @@
         return self.changed | self.removed
 
 # @@ -start,len +start,len @@ or @@ -start +start @@ if len is 1
-unidesc = re.compile('@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
-contextdesc = re.compile('(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
+unidesc = re.compile(br'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')
+contextdesc = re.compile(br'(?:---|\*\*\*) (\d+)(?:,(\d+))? (?:---|\*\*\*)')
 eolmodes = ['strict', 'crlf', 'lf', 'auto']
 
 class patchfile(object):
@@ -2762,7 +2762,7 @@
     return maxfile, maxtotal, addtotal, removetotal, binary
 
 def diffstatdata(lines):
-    diffre = re.compile('^diff .*-r [a-z0-9]+\s(.*)$')
+    diffre = re.compile(br'^diff .*-r [a-z0-9]+\s(.*)$')
 
     results = []
     filename, adds, removes, isbinary = None, 0, 0, False
diff --git a/mercurial/color.py b/mercurial/color.py
--- a/mercurial/color.py
+++ b/mercurial/color.py
@@ -484,7 +484,7 @@
             w32effects = None
         else:
             origattr = csbi.wAttributes
-            ansire = re.compile(b'\033\[([^m]*)m([^\033]*)(.*)',
+            ansire = re.compile(br'\033\[([^m]*)m([^\033]*)(.*)',
                                 re.MULTILINE | re.DOTALL)
 
     def win32print(ui, writefunc, text, **opts):
diff --git a/hgext/releasenotes.py b/hgext/releasenotes.py
--- a/hgext/releasenotes.py
+++ b/hgext/releasenotes.py
@@ -55,7 +55,7 @@
     ('api', _('API Changes')),
 ]
 
-RE_DIRECTIVE = re.compile('^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$')
+RE_DIRECTIVE = re.compile(br'^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$')
 RE_ISSUE = br'\bissue ?[0-9]{4,6}(?![0-9])\b'
 
 BULLET_SECTION = _('Other Changes')
diff --git a/hgext/phabricator.py b/hgext/phabricator.py
--- a/hgext/phabricator.py
+++ b/hgext/phabricator.py
@@ -255,9 +255,9 @@
     repo.ui.setconfig(b'phabricator', b'repophid', repophid)
     return repophid
 
-_differentialrevisiontagre = re.compile(b'\AD([1-9][0-9]*)\Z')
+_differentialrevisiontagre = re.compile(br'\AD([1-9][0-9]*)\Z')
 _differentialrevisiondescre = re.compile(
-    b'^Differential Revision:\s*(?P<url>(?:.*)D(?P<id>[1-9][0-9]*))$', re.M)
+    br'^Differential Revision:\s*(?P<url>(?:.*)D(?P<id>[1-9][0-9]*))$', re.M)
 
 def getoldnodedrevmap(repo, nodelist):
     """find previous nodes that has been sent to Phabricator
diff --git a/hgext/mq.py b/hgext/mq.py
--- a/hgext/mq.py
+++ b/hgext/mq.py
@@ -1181,7 +1181,7 @@
     def makepatchname(self, title, fallbackname):
         """Return a suitable filename for title, adding a suffix to make
         it unique in the existing list"""
-        namebase = re.sub('[\s\W_]+', '_', title.lower()).strip('_')
+        namebase = re.sub(br'[\s\W_]+', b'_', title.lower()).strip(b'_')
         namebase = namebase[:75] # avoid too long name (issue5117)
         if namebase:
             try:
diff --git a/hgext/convert/cvsps.py b/hgext/convert/cvsps.py
--- a/hgext/convert/cvsps.py
+++ b/hgext/convert/cvsps.py
@@ -122,7 +122,7 @@
     re_31 = re.compile(b'----------------------------$')
     re_32 = re.compile(b'======================================='
                        b'======================================$')
-    re_50 = re.compile(b'revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
+    re_50 = re.compile(br'revision ([\d.]+)(\s+locked by:\s+.+;)?$')
     re_60 = re.compile(br'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
                        br'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
                        br'(\s+commitid:\s+([^;]+);)?'
diff --git a/hgext/commitextras.py b/hgext/commitextras.py
--- a/hgext/commitextras.py
+++ b/hgext/commitextras.py
@@ -58,7 +58,7 @@
                 if not k:
                     msg = _("unable to parse '%s', keys can't be empty")
                     raise error.Abort(msg % raw)
-                if re.search('[^\w-]', k):
+                if re.search(br'[^\w-]', k):
                     msg = _("keys can only contain ascii letters, digits,"
                             " '_' and '-'")
                     raise error.Abort(msg)
diff --git a/hgext/blackbox.py b/hgext/blackbox.py
--- a/hgext/blackbox.py
+++ b/hgext/blackbox.py
@@ -190,7 +190,7 @@
             break
 
         # count the commands by matching lines like: 2013/01/23 19:13:36 root>
-        if re.match('^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} .*> .*', line):
+        if re.match(br'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} .*> .*', line):
             count += 1
         output.append(line)
 



To: indygreg, #hg-reviewers
Cc: mercurial-devel, spectral


More information about the Mercurial-devel mailing list