D6391: py3: make contrib/testparseutil.py to work on str(unicodes)

Fri May 17 09:52:09 EDT 2019

This revision was automatically updated to reflect the committed changes.
Closed by commit rHG5364ba1f796f: py3: make contrib/testparseutil.py to work on str(unicodes) (authored by pulkit, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6391?vs=15154&id=15168

REVISION DETAIL
  https://phab.mercurial-scm.org/D6391

AFFECTED FILES
  contrib/testparseutil.py

CHANGE DETAILS

diff --git a/contrib/testparseutil.py b/contrib/testparseutil.py
--- a/contrib/testparseutil.py
+++ b/contrib/testparseutil.py
@@ -54,7 +54,7 @@
         return s.decode(u'latin-1')
 
     def opentext(f):
-        return open(f, 'rb')
+        return open(f, 'r')
 else:
     stdin = sys.stdin
     stdout = sys.stdout
@@ -164,14 +164,14 @@
     ...         self.matchfunc = matchfunc
     ...     def startsat(self, line):
     ...         return self.matchfunc(line)
-    >>> ambig1 = ambigmatcher(b'ambiguous #1',
-    ...                       lambda l: l.startswith(b'  $ cat '))
-    >>> ambig2 = ambigmatcher(b'ambiguous #2',
-    ...                       lambda l: l.endswith(b'<< EOF\\n'))
-    >>> lines = [b'  $ cat > foo.py << EOF\\n']
+    >>> ambig1 = ambigmatcher('ambiguous #1',
+    ...                       lambda l: l.startswith('  $ cat '))
+    >>> ambig2 = ambigmatcher('ambiguous #2',
+    ...                       lambda l: l.endswith('<< EOF\\n'))
+    >>> lines = ['  $ cat > foo.py << EOF\\n']
     >>> errors = []
     >>> matchers = [ambig1, ambig2]
-    >>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))
+    >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
     []
     >>> b2s(errors)
     ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
@@ -181,21 +181,21 @@
     ctx = filename = code = startline = None # for pyflakes
 
     for lineno, line in enumerate(lines, 1):
-        if not line.endswith(b'\n'):
-            line += b'\n' # to normalize EOF line
+        if not line.endswith('\n'):
+            line += '\n' # to normalize EOF line
         if matcher: # now, inside embedded code
             if matcher.endsat(ctx, line):
                 codeatend = matcher.codeatend(ctx, line)
                 if codeatend is not None:
                     code.append(codeatend)
                 if not matcher.ignores(ctx):
-                    yield (filename, startline, lineno, b''.join(code))
+                    yield (filename, startline, lineno, ''.join(code))
                 matcher = None
                 # DO NOT "continue", because line might start next fragment
             elif not matcher.isinside(ctx, line):
                 # this is an error of basefile
                 # (if matchers are implemented correctly)
-                errors.append(b'%s:%d: unexpected line for "%s"'
+                errors.append('%s:%d: unexpected line for "%s"'
                               % (basefile, lineno, matcher.desc))
                 # stop extracting embedded code by current 'matcher',
                 # because appearance of unexpected line might mean
@@ -218,9 +218,9 @@
         if matched:
             if len(matched) > 1:
                 # this is an error of matchers, maybe
-                errors.append(b'%s:%d: ambiguous line for %s' %
+                errors.append('%s:%d: ambiguous line for %s' %
                               (basefile, lineno,
-                               b', '.join([b'"%s"' % m.desc
+                               ', '.join(['"%s"' % m.desc
                                            for m, c in matched])))
                 # omit extracting embedded code, because choosing
                 # arbitrary matcher from matched ones might fail to
@@ -239,68 +239,68 @@
     if matcher:
         # examine whether EOF ends embedded code, because embedded
         # code isn't yet ended explicitly
-        if matcher.endsat(ctx, b'\n'):
-            codeatend = matcher.codeatend(ctx, b'\n')
+        if matcher.endsat(ctx, '\n'):
+            codeatend = matcher.codeatend(ctx, '\n')
             if codeatend is not None:
                 code.append(codeatend)
             if not matcher.ignores(ctx):
-                yield (filename, startline, lineno + 1, b''.join(code))
+                yield (filename, startline, lineno + 1, ''.join(code))
         else:
             # this is an error of basefile
             # (if matchers are implemented correctly)
-            errors.append(b'%s:%d: unexpected end of file for "%s"'
+            errors.append('%s:%d: unexpected end of file for "%s"'
                           % (basefile, lineno, matcher.desc))
 
 # heredoc limit mark to ignore embedded code at check-code.py or so
-heredocignorelimit = b'NO_CHECK_EOF'
+heredocignorelimit = 'NO_CHECK_EOF'
 
 # the pattern to match against cases below, and to return a limit mark
 # string as 'lname' group
 #
 # - << LIMITMARK
 # - << "LIMITMARK"
 # - << 'LIMITMARK'
-heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
+heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
 
 class fileheredocmatcher(embeddedmatcher):
     """Detect "cat > FILE << LIMIT" style embedded code
 
     >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\\.py')
-    >>> b2s(matcher.startsat(b'  $ cat > file.py << EOF\\n'))
+    >>> b2s(matcher.startsat('  $ cat > file.py << EOF\\n'))
     ('file.py', '  > EOF\\n')
-    >>> b2s(matcher.startsat(b'  $ cat   >>file.py   <<EOF\\n'))
+    >>> b2s(matcher.startsat('  $ cat   >>file.py   <<EOF\\n'))
     ('file.py', '  > EOF\\n')
-    >>> b2s(matcher.startsat(b'  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
+    >>> b2s(matcher.startsat('  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
     ('any file.py', '  > EOF\\n')
-    >>> b2s(matcher.startsat(b"  $ cat > file.py << 'ANYLIMIT'\\n"))
+    >>> b2s(matcher.startsat("  $ cat > file.py << 'ANYLIMIT'\\n"))
     ('file.py', '  > ANYLIMIT\\n')
-    >>> b2s(matcher.startsat(b'  $ cat<<ANYLIMIT>"file.py"\\n'))
+    >>> b2s(matcher.startsat('  $ cat<<ANYLIMIT>"file.py"\\n'))
     ('file.py', '  > ANYLIMIT\\n')
-    >>> start = b'  $ cat > file.py << EOF\\n'
+    >>> start = '  $ cat > file.py << EOF\\n'
     >>> ctx = matcher.startsat(start)
     >>> matcher.codeatstart(ctx, start)
     >>> b2s(matcher.filename(ctx))
     'file.py'
     >>> matcher.ignores(ctx)
     False
-    >>> inside = b'  > foo = 1\\n'
+    >>> inside = '  > foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> end = b'  > EOF\\n'
+    >>> end = '  > EOF\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
-    >>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+    >>> matcher.endsat(ctx, '  > EOFEOF\\n')
     False
-    >>> ctx = matcher.startsat(b'  $ cat > file.py << NO_CHECK_EOF\\n')
+    >>> ctx = matcher.startsat('  $ cat > file.py << NO_CHECK_EOF\\n')
     >>> matcher.ignores(ctx)
     True
     """
-    _prefix = b'  > '
+    _prefix = '  > '
 
     def __init__(self, desc, namepat):
         super(fileheredocmatcher, self).__init__(desc)
@@ -312,31 +312,31 @@
         # - > NAMEPAT
         # - > "NAMEPAT"
         # - > 'NAMEPAT'
-        namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
+        namepat = (r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
                    % namepat)
         self._fileres = [
             # "cat > NAME << LIMIT" case
-            re.compile(br'  \$ \s*cat' + namepat + heredoclimitpat),
+            re.compile(r'  \$ \s*cat' + namepat + heredoclimitpat),
             # "cat << LIMIT > NAME" case
-            re.compile(br'  \$ \s*cat' + heredoclimitpat + namepat),
+            re.compile(r'  \$ \s*cat' + heredoclimitpat + namepat),
         ]
 
     def startsat(self, line):
         # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
         for filere in self._fileres:
             matched = filere.match(line)
             if matched:
                 return (matched.group('name'),
-                        b'  > %s\n' % matched.group('limit'))
+                        '  > %s\n' % matched.group('limit'))
 
     def endsat(self, ctx, line):
         return ctx[1] == line
 
     def isinside(self, ctx, line):
         return line.startswith(self._prefix)
 
     def ignores(self, ctx):
-        return b'  > %s\n' % heredocignorelimit == ctx[1]
+        return '  > %s\n' % heredocignorelimit == ctx[1]
 
     def filename(self, ctx):
         return ctx[0]
@@ -357,56 +357,56 @@
     """Detect ">>> code" style embedded python code
 
     >>> matcher = pydoctestmatcher()
-    >>> startline = b'  >>> foo = 1\\n'
+    >>> startline = '  >>> foo = 1\\n'
     >>> matcher.startsat(startline)
     True
-    >>> matcher.startsat(b'  ... foo = 1\\n')
+    >>> matcher.startsat('  ... foo = 1\\n')
     False
     >>> ctx = matcher.startsat(startline)
     >>> matcher.filename(ctx)
     >>> matcher.ignores(ctx)
     False
     >>> b2s(matcher.codeatstart(ctx, startline))
     'foo = 1\\n'
-    >>> inside = b'  >>> foo = 1\\n'
+    >>> inside = '  >>> foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> inside = b'  ... foo = 1\\n'
+    >>> inside = '  ... foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> inside = b'  expected output\\n'
+    >>> inside = '  expected output\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     '\\n'
-    >>> inside = b'  \\n'
+    >>> inside = '  \\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     '\\n'
-    >>> end = b'  $ foo bar\\n'
+    >>> end = '  $ foo bar\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
-    >>> end = b'\\n'
+    >>> end = '\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
     """
-    _prefix = b'  >>> '
-    _prefixre = re.compile(br'  (>>>|\.\.\.) ')
+    _prefix = '  >>> '
+    _prefixre = re.compile(r'  (>>>|\.\.\.) ')
 
     # If a line matches against not _prefixre but _outputre, that line
     # is "an expected output line" (= not a part of code fragment).
@@ -416,10 +416,10 @@
     # run-tests.py. But "directive line inside inline python code"
     # should be rejected by Mercurial reviewers. Therefore, this
     # regexp does not matche against such directive lines.
-    _outputre = re.compile(br'  $|  [^$]')
+    _outputre = re.compile(r'  $|  [^$]')
 
     def __init__(self):
-        super(pydoctestmatcher, self).__init__(b"doctest style python code")
+        super(pydoctestmatcher, self).__init__("doctest style python code")
 
     def startsat(self, line):
         # ctx is "True"
@@ -446,66 +446,66 @@
     def codeinside(self, ctx, line):
         if self._prefixre.match(line):
             return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '
-        return b'\n' # an expected output line is treated as an empty line
+        return '\n' # an expected output line is treated as an empty line
 
 class pyheredocmatcher(embeddedmatcher):
     """Detect "python << LIMIT" style embedded python code
 
     >>> matcher = pyheredocmatcher()
-    >>> b2s(matcher.startsat(b'  $ python << EOF\\n'))
+    >>> b2s(matcher.startsat('  $ python << EOF\\n'))
     '  > EOF\\n'
-    >>> b2s(matcher.startsat(b'  $ $PYTHON   <<EOF\\n'))
+    >>> b2s(matcher.startsat('  $ $PYTHON   <<EOF\\n'))
     '  > EOF\\n'
-    >>> b2s(matcher.startsat(b'  $ "$PYTHON"<<  "EOF"\\n'))
+    >>> b2s(matcher.startsat('  $ "$PYTHON"<<  "EOF"\\n'))
     '  > EOF\\n'
-    >>> b2s(matcher.startsat(b"  $ $PYTHON << 'ANYLIMIT'\\n"))
+    >>> b2s(matcher.startsat("  $ $PYTHON << 'ANYLIMIT'\\n"))
     '  > ANYLIMIT\\n'
-    >>> matcher.startsat(b'  $ "$PYTHON" < EOF\\n')
-    >>> start = b'  $ python << EOF\\n'
+    >>> matcher.startsat('  $ "$PYTHON" < EOF\\n')
+    >>> start = '  $ python << EOF\\n'
     >>> ctx = matcher.startsat(start)
     >>> matcher.codeatstart(ctx, start)
     >>> matcher.filename(ctx)
     >>> matcher.ignores(ctx)
     False
-    >>> inside = b'  > foo = 1\\n'
+    >>> inside = '  > foo = 1\\n'
     >>> matcher.endsat(ctx, inside)
     False
     >>> matcher.isinside(ctx, inside)
     True
     >>> b2s(matcher.codeinside(ctx, inside))
     'foo = 1\\n'
-    >>> end = b'  > EOF\\n'
+    >>> end = '  > EOF\\n'
     >>> matcher.endsat(ctx, end)
     True
     >>> matcher.codeatend(ctx, end)
-    >>> matcher.endsat(ctx, b'  > EOFEOF\\n')
+    >>> matcher.endsat(ctx, '  > EOFEOF\\n')
     False
-    >>> ctx = matcher.startsat(b'  $ python << NO_CHECK_EOF\\n')
+    >>> ctx = matcher.startsat('  $ python << NO_CHECK_EOF\\n')
     >>> matcher.ignores(ctx)
     True
     """
-    _prefix = b'  > '
+    _prefix = '  > '
 
-    _startre = re.compile(br'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
+    _startre = re.compile(r'  \$ (\$PYTHON|"\$PYTHON"|python).*' +
                           heredoclimitpat)
 
     def __init__(self):
-        super(pyheredocmatcher, self).__init__(b"heredoc python invocation")
+        super(pyheredocmatcher, self).__init__("heredoc python invocation")
 
     def startsat(self, line):
         # ctx is END-LINE-OF-EMBEDDED-CODE
         matched = self._startre.match(line)
         if matched:
-            return b'  > %s\n' % matched.group('limit')
+            return '  > %s\n' % matched.group('limit')
 
     def endsat(self, ctx, line):
         return ctx == line
 
     def isinside(self, ctx, line):
         return line.startswith(self._prefix)
 
     def ignores(self, ctx):
-        return b'  > %s\n' % heredocignorelimit == ctx
+        return '  > %s\n' % heredocignorelimit == ctx
 
     def filename(self, ctx):
         return None # no filename
@@ -524,7 +524,7 @@
     pyheredocmatcher(),
     # use '[^<]+' instead of '\S+', in order to match against
     # paths including whitespaces
-    fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),
+    fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
 ]
 
 def pyembedded(basefile, lines, errors):
@@ -536,7 +536,7 @@
 _shmatchers = [
     # use '[^<]+' instead of '\S+', in order to match against
     # paths including whitespaces
-    fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),
+    fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
 ]
 
 def shembedded(basefile, lines, errors):
@@ -548,8 +548,8 @@
 _hgrcmatchers = [
     # use '[^<]+' instead of '\S+', in order to match against
     # paths including whitespaces
-    fileheredocmatcher(b'heredoc hgrc file',
-                       br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
+    fileheredocmatcher('heredoc hgrc file',
+                       r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),
 ]
 
 def hgrcembedded(basefile, lines, errors):
@@ -565,26 +565,26 @@
         errors = []
         for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
             if not name:
-                name = b'<anonymous>'
-            writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))
+                name = '<anonymous>'
+            writeout("%s:%d: %s starts\n" % (basefile, starts, name))
             if opts.verbose and code:
-                writeout(b"  |%s\n" %
-                         b"\n  |".join(l for l in code.splitlines()))
-            writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))
+                writeout("  |%s\n" %
+                         "\n  |".join(l for l in code.splitlines()))
+            writeout("%s:%d: %s ends\n" % (basefile, ends, name))
         for e in errors:
-            writeerr(b"%s\n" % e)
+            writeerr("%s\n" % e)
         return len(errors)
 
     def applyembedded(args, embeddedfunc, opts):
         ret = 0
         if args:
             for f in args:
                 with opentext(f) as fp:
-                    if showembedded(bytestr(f), fp, embeddedfunc, opts):
+                    if showembedded(f, fp, embeddedfunc, opts):
                         ret = 1
         else:
             lines = [l for l in stdin.readlines()]
-            if showembedded(b'<stdin>', lines, embeddedfunc, opts):
+            if showembedded('<stdin>', lines, embeddedfunc, opts):
                 ret = 1
         return ret
 



To: pulkit, #hg-reviewers
Cc: mercurial-devel