D6391: py3: make contrib/testparseutil.py to work on str(unicodes)

Thu May 16 18:38:35 UTC 2019

pulkit created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  contrib/check-code work on unicodes and call functions from testparseutil.py
  which before this patch used to work on bytes.
  
  This path removes that inconsistency and make testparseutil.py work on unicodes.
  
  This makes test-check-code.t and test-contrib-check-code.t work on Python 3
  again.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6391

AFFECTED FILES
  contrib/testparseutil.py

CHANGE DETAILS

diff --git a/contrib/testparseutil.py b/contrib/testparseutil.py
--- a/contrib/testparseutil.py
+++ b/contrib/testparseutil.py
@@ -54,7 +54,7 @@
         return s.decode(u'latin-1')
 
     def opentext(f):
-        return open(f, 'rb')
+        return open(f, 'r')
 else:
     stdin = sys.stdin
     stdout = sys.stdout
@@ -164,14 +164,14 @@
     ...         self.matchfunc = matchfunc
     ...     def startsat(self, line):
     ...         return self.matchfunc(line)
-    >>> ambig1 = ambigmatcher(b'ambiguous #1',
-    ...                       lambda l: l.startswith(b'  $ cat '))
-    >>> ambig2 = ambigmatcher(b'ambiguous #2',
-    ...                       lambda l: l.endswith(b'<< EOF\\n'))
-    >>> lines = [b'  $ cat > foo.py << EOF\\n']
+    >>> ambig1 = ambigmatcher('ambiguous #1',
+    ...                       lambda l: l.startswith('  $ cat '))
+    >>> ambig2 = ambigmatcher('ambiguous #2',
+    ...                       lambda l: l.endswith('<< EOF\\n'))
+    >>> lines = ['  $ cat > foo.py << EOF\\n']
     >>> errors = []
     >>> matchers = [ambig1, ambig2]
-    >>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))
+    >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
     []
     >>> b2s(errors)
     ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
@@ -181,21 +181,21 @@
     ctx = filename = code = startline = None # for pyflakes
 
     for lineno, line in enumerate(lines, 1):
-        if not line.endswith(b'\n'):
-            line += b'\n' # to normalize EOF line
+        if not line.endswith('\n'):
+            line += '\n' # to normalize EOF line
         if matcher: # now, inside embedded code
             if matcher.endsat(ctx, line):
                 codeatend = matcher.codeatend(ctx, line)
                 if codeatend is not None:
                     code.append(codeatend)
                 if not matcher.ignores(ctx):
-                    yield (filename, startline, lineno, b''.join(code))
+                    yield (filename, startline, lineno, ''.join(code))
                 matcher = None
                 # DO NOT "continue", because line might start next fragment
             elif not matcher.isinside(ctx, line):
                 # this is an error of basefile
                 # (if matchers are implemented correctly)
-                errors.append(b'%s:%d: unexpected line for "%s"'
+                errors.append('%s:%d: unexpected line for "%s"'
                               % (basefile, lineno, matcher.desc))
                 # stop extracting embedded code by current 'matcher',
                 # because appearance of unexpected line might mean
@@ -218,9 +218,9 @@
         if matched:
             if len(matched) > 1:
                 # this is an error of matchers, maybe
-                errors.append(b'%s:%d: ambiguous line for %s' %
+                errors.append('%s:%d: ambiguous line for %s' %
                               (basefile, lineno,
-                               b', '.join([b'"%s"' % m.desc
+                               ', '.join(['"%s"' % m.desc
                                            for m, c in matched])))
                 # omit extracting embedded code, because choosing
                 # arbitrary matcher from matched ones might fail to
@@ -239,68 +239,68 @@
     if matcher:
         # examine whether EOF ends embedded code, because embedded
         # code isn't yet ended explicitly
-        if matcher.endsat(ctx, b'\n'):
-            codeatend = matcher.codeatend(ctx, b'\n')
+        if matcher.endsat(ctx, '\n'):
+            codeatend = matcher.codeatend(ctx, '\n')
             if codeatend is not None:
                 code.append(codeatend)
             if not matcher.ignores(ctx):
-                yield (filename, startline, lineno + 1, b''.join(code))
+                yield (filename, startline, lineno + 1, ''.join(code))
         else:
             # this is an error of basefile
             # (if matchers are implemented correctly)
-            errors.append(b'%s:%d: unexpected end of file for "%s"'
+            errors.append('%s:%d: unexpected end of file for "%s"'
                           % (basefile, lineno, matcher.desc))
 
 # heredoc limit mark to ignore embedded code at check-code.py or so
-heredocignorelimit = b'NO_CHECK_EOF'
+heredocignorelimit = 'NO_CHECK_EOF'
 
 # the pattern to match against cases below, and to return a limit mark
 # string as 'lname' group
 #
 # - << LIMITMARK
 # - << "LIMITMARK"
 # - << 'LIMITMARK'
-heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
+heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
 
 class fileheredocmatcher(embeddedmatcher):
     """Detect "cat > FILE << LIMIT" style embedded code
 
     >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\\.py')
-    >>> b2s(matcher.startsat(b'  $ cat > file.py << EOF\\n'))
+    >>> b2s(matcher.startsat('  $ cat > file.py << EOF\\n'))
     ('file.py', '  > EOF\\n')
-    >>> b2s(matcher.startsat(b'  $ cat   >>file.py   <<EOF\\n'))
+    >>> b2s(matcher.startsat('  $ cat   >>file.py   <<EOF\\n'))
     ('file.py', '  > EOF\\n')
-    >>> b2s(matcher.startsat(b'  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
+    >>> b2s(matcher.startsat('  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
     ('any file.py', '  > EOF\\n')
-    >>> b2s(matcher.startsat(b"  $ cat > file.py << 'ANYLIMIT'\\n"))
+    >>> b2s(matcher.startsat("  $ cat > file.py << 'ANYLIMIT'\\n"))
     ('file.py', '  > ANYLIMIT\\n')
-    >>> b2s(matcher.startsat(b'  $ cat<<ANYLIMIT>"file.py"\\n'))
+    >>> b2s(matcher.startsat('  $ cat<<ANYLIMIT>"file.py"\\n'))
     ('file.py', '  > ANYLIMIT\\n')
-    >>> start = b'  $ cat > file.py << EOF\\n'
+    >>> start = '  $ cat > file.py << EOF\\n'
     >>> ctx = matcher.startsat(start)
     >>> matcher.codeatstart(ctx, start)
     >>> b2s(matcher.filename(ctx))
     'file.py'
     >>> matcher.ignores(ctx)
     False
-    >>> inside = b'  > foo = 1\\n'
+    >>> inside = '  > foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> end = b'  > EOF\\n'
+    >>> end = '  > EOF\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
-    >>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+    >>> matcher.endsat(ctx, '  > EOFEOF\\n')
     False
-    >>> ctx = matcher.startsat(b'  $ cat > file.py << NO_CHECK_EOF\\n')
+    >>> ctx = matcher.startsat('  $ cat > file.py << NO_CHECK_EOF\\n')
     >>> matcher.ignores(ctx)
     True
     """
-    _prefix = b'  > '
+    _prefix = '  > '
 
     def __init__(self, desc, namepat):
         super(fileheredocmatcher, self).__init__(desc)
@@ -312,31 +312,31 @@
         # - > NAMEPAT
         # - > "NAMEPAT"
         # - > 'NAMEPAT'
-        namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
+        namepat = (r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
                    % namepat)
         self._fileres = [
             # "cat > NAME << LIMIT" case
-            re.compile(br'  \$ \s*cat' + namepat + heredoclimitpat),
+            re.compile(r'  \$ \s*cat' + namepat + heredoclimitpat),
             # "cat << LIMIT > NAME" case
-            re.compile(br'  \$ \s*cat' + heredoclimitpat + namepat),
+            re.compile(r'  \$ \s*cat' + heredoclimitpat + namepat),
         ]
 
     def startsat(self, line):
         # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
         for filere in self._fileres:
             matched = filere.match(line)
             if matched:
                 return (matched.group('name'),
-                        b'  > %s\n' % matched.group('limit'))
+                        '  > %s\n' % matched.group('limit'))
 
     def endsat(self, ctx, line):
         return ctx[1] == line
 
     def isinside(self, ctx, line):
         return line.startswith(self._prefix)
 
     def ignores(self, ctx):
-        return b'  > %s\n' % heredocignorelimit == ctx[1]
+        return '  > %s\n' % heredocignorelimit == ctx[1]
 
     def filename(self, ctx):
         return ctx[0]
@@ -357,56 +357,56 @@
     """Detect ">>> code" style embedded python code
 
     >>> matcher = pydoctestmatcher()
-    >>> startline = b'  >>> foo = 1\\n'
+    >>> startline = '  >>> foo = 1\\n'
     >>> matcher.startsat(startline)
     True
-    >>> matcher.startsat(b'  ... foo = 1\\n')
+    >>> matcher.startsat('  ... foo = 1\\n')
     False
     >>> ctx = matcher.startsat(startline)
     >>> matcher.filename(ctx)
     >>> matcher.ignores(ctx)
     False
     >>> b2s(matcher.codeatstart(ctx, startline))
     'foo = 1\\n'
-    >>> inside = b'  >>> foo = 1\\n'
+    >>> inside = '  >>> foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> inside = b'  ... foo = 1\\n'
+    >>> inside = '  ... foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> inside = b'  expected output\\n'
+    >>> inside = '  expected output\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     '\\n'
-    >>> inside = b'  \\n'
+    >>> inside = '  \\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     '\\n'
-    >>> end = b'  $ foo bar\\n'
+    >>> end = '  $ foo bar\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
-    >>> end = b'\\n'
+    >>> end = '\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
     """
-    _prefix = b'  >>> '
-    _prefixre = re.compile(br'  (>>>|\.\.\.) ')
+    _prefix = '  >>> '
+    _prefixre = re.compile(r'  (>>>|\.\.\.) ')
 
     # If a line matches against not _prefixre but _outputre, that line
     # is "an expected output line" (= not a part of code fragment).
@@ -416,10 +416,10 @@
     # run-tests.py. But "directive line inside inline python code"
     # should be rejected by Mercurial reviewers. Therefore, this
     # regexp does not matche against such directive lines.
-    _outputre = re.compile(br'  $|  [^$]')
+    _outputre = re.compile(r'  $|  [^$]')
 
     def __init__(self):
-        super(pydoctestmatcher, self).__init__(b"doctest style python code")
+        super(pydoctestmatcher, self).__init__("doctest style python code")
 
     def startsat(self, line):
         # ctx is "True"
@@ -446,66 +446,66 @@
     def codeinside(self, ctx, line):
         if self._prefixre.match(line):
             return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '
-        return b'\n' # an expected output line is treated as an empty line
+        return '\n' # an expected output line is treated as an empty line
 
 class pyheredocmatcher(embeddedmatcher):
     """Detect "python << LIMIT" style embedded python code
 
     >>> matcher = pyheredocmatcher()
-    >>> b2s(matcher.startsat(b'  $ python << EOF\\n'))
+    >>> b2s(matcher.startsat('  $ python << EOF\\n'))
     '  > EOF\\n'
-    >>> b2s(matcher.startsat(b'  $ $PYTHON   <<EOF\\n'))
+    >>> b2s(matcher.startsat('  $ $PYTHON   <<EOF\\n'))
     '  > EOF\\n'
-    >>> b2s(matcher.startsat(b'  $ "$PYTHON"<<  "EOF"\\n'))
+    >>> b2s(matcher.startsat('  $ "$PYTHON"<<  "EOF"\\n'))
     '  > EOF\\n'
-    >>> b2s(matcher.startsat(b"  $ $PYTHON << 'ANYLIMIT'\\n"))
+    >>> b2s(matcher.startsat("  $ $PYTHON << 'ANYLIMIT'\\n"))
     '  > ANYLIMIT\\n'
-    >>> matcher.startsat(b'  $ "$PYTHON" < EOF\\n')
-    >>> start = b'  $ python << EOF\\n'
+    >>> matcher.startsat('  $ "$PYTHON" < EOF\\n')
+    >>> start = '  $ python << EOF\\n'
     >>> ctx = matcher.startsat(start)
     >>> matcher.codeatstart(ctx, start)
     >>> matcher.filename(ctx)
     >>> matcher.ignores(ctx)
     False
-    >>> inside = b'  > foo = 1\\n'
+    >>> inside = '  > foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> end = b'  > EOF\\n'
+    >>> end = '  > EOF\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
-    >>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+    >>> matcher.endsat(ctx, '  > EOFEOF\\n')
     False
-    >>> ctx = matcher.startsat(b'  $ python << NO_CHECK_EOF\\n')
+    >>> ctx = matcher.startsat('  $ python << NO_CHECK_EOF\\n')
     >>> matcher.ignores(ctx)
     True
     """
-    _prefix = b'  > '
+    _prefix = '  > '
 
-    _startre = re.compile(br'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
+    _startre = re.compile(r'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
                           heredoclimitpat)
 
     def __init__(self):
-        super(pyheredocmatcher, self).__init__(b"heredoc python invocation")
+        super(pyheredocmatcher, self).__init__("heredoc python invocation")
 
     def startsat(self, line):
         # ctx is END-LINE-OF-EMBEDDED-CODE
         matched = self._startre.match(line)
         if matched:
-            return b'  > %s\n' % matched.group('limit')
+            return '  > %s\n' % matched.group('limit')
 
     def endsat(self, ctx, line):
         return ctx == line
 
     def isinside(self, ctx, line):
         return line.startswith(self._prefix)
 
     def ignores(self, ctx):
-        return b'  > %s\n' % heredocignorelimit == ctx
+        return '  > %s\n' % heredocignorelimit == ctx
 
     def filename(self, ctx):
         return None # no filename
@@ -524,7 +524,7 @@
     pyheredocmatcher(),
     # use '[^<]+' instead of '\S+', in order to match against
     # paths including whitespaces
-    fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),
+    fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
 ]
 
 def pyembedded(basefile, lines, errors):
@@ -536,7 +536,7 @@
 _shmatchers = [
     # use '[^<]+' instead of '\S+', in order to match against
     # paths including whitespaces
-    fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),
+    fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
 ]
 
 def shembedded(basefile, lines, errors):
@@ -548,8 +548,8 @@
 _hgrcmatchers = [
     # use '[^<]+' instead of '\S+', in order to match against
     # paths including whitespaces
-    fileheredocmatcher(b'heredoc hgrc file',
-                       br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
+    fileheredocmatcher('heredoc hgrc file',
+                       r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
 ]
 
 def hgrcembedded(basefile, lines, errors):
@@ -565,26 +565,26 @@
         errors = []
         for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
             if not name:
-                name = b'<anonymous>'
-            writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))
+                name = '<anonymous>'
+            writeout("%s:%d: %s starts\n" % (basefile, starts, name))
             if opts.verbose and code:
-                writeout(b"  |%s\n" %
-                         b"\n  |".join(l for l in code.splitlines()))
-            writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))
+                writeout("  |%s\n" %
+                         "\n  |".join(l for l in code.splitlines()))
+            writeout("%s:%d: %s ends\n" % (basefile, ends, name))
         for e in errors:
-            writeerr(b"%s\n" % e)
+            writeerr("%s\n" % e)
         return len(errors)
 
     def applyembedded(args, embeddedfunc, opts):
         ret = 0
         if args:
             for f in args:
                 with opentext(f) as fp:
-                    if showembedded(bytestr(f), fp, embeddedfunc, opts):
+                    if showembedded(f, fp, embeddedfunc, opts):
                         ret = 1
         else:
             lines = [l for l in stdin.readlines()]
-            if showembedded(b'<stdin>', lines, embeddedfunc, opts):
+            if showembedded('<stdin>', lines, embeddedfunc, opts):
                 ret = 1
         return ret
 



To: pulkit, #hg-reviewers
Cc: mercurial-devel