[PATCH 5 of 5] import-checker: use testparseutil.embedded() to centralize detection logic

Mon Oct 8 13:34:40 EDT 2018

# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1534994754 -32400
#      Thu Aug 23 12:25:54 2018 +0900
# Node ID 850b28c6b835f140ac9516de00c48c797b5e66e8
# Parent  814c18796bdc642417f8ef47565ca2dbea1a8855
# Available At https://bitbucket.org/foozy/mercurial-wip
#              hg pull https://bitbucket.org/foozy/mercurial-wip -r 850b28c6b835
# EXP-Topic contrib-improve-import-checker
import-checker: use testparseutil.embedded() to centralize detection logic

This patch fixes issues of embedded() in import-checker.py below, too.

  - overlook (or mis-detect) the end of inline script in doctest style

  - overlook inline script in doctest style at the end of file
    (and ignore invalid un-closed heredoc at the end of file, too)

  - overlook code fragment in styles below
    - "python <<EOF" (heredoc should be "cat > file <<EOF" style)
    - "cat > foobar.py << ANYLIMIT" (limit mark should be "EOF")
    - "cat << EOF > foobar.py" (filename should be placed before limit mark)
    - "cat >> foobar.py << EOF" (appending is ignored)

diff --git a/contrib/import-checker.py b/contrib/import-checker.py
--- a/contrib/import-checker.py
+++ b/contrib/import-checker.py
@@ -5,7 +5,6 @@ from __future__ import absolute_import, 
 import ast
 import collections
 import os
-import re
 import sys
 
 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
@@ -18,6 +17,8 @@ if True: # disable lexical sorting check
         basehttpserver = None
     import zlib
 
+import testparseutil
+
 # Whitelist of modules that symbols can be directly imported from.
 allowsymbolimports = (
     '__future__',
@@ -659,61 +660,21 @@ def embedded(f, modname, src):
     ...   b'  > EOF',
     ... ]
     >>> test(b"example.t", lines)
-    example[2] doctest.py 2
-    "from __future__ import print_function\\n' multiline\\nstring'\\n"
-    example[7] foo.py 7
+    example[2] doctest.py 1
+    "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
+    example[8] foo.py 7
     'from __future__ import print_function\\n'
     """
-    inlinepython = 0
-    shpython = 0
-    script = []
-    prefix = 6
-    t = ''
-    n = 0
-    for l in src:
-        n += 1
-        if not l.endswith(b'\n'):
-            l += b'\n'
-        if l.startswith(b'  >>> '): # python inlines
-            if shpython:
-                print("%s:%d: Parse Error" % (f, n))
-            if not inlinepython:
-                # We've just entered a Python block.
-                inlinepython = n
-                t = b'doctest.py'
-            script.append(l[prefix:])
-            continue
-        if l.startswith(b'  ... '): # python inlines
-            script.append(l[prefix:])
-            continue
-        cat = re.search(br"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
-        if cat:
-            if inlinepython:
-                yield b''.join(script), (b"%s[%d]" %
-                       (modname, inlinepython)), t, inlinepython
-                script = []
-                inlinepython = 0
-            shpython = n
-            t = cat.group(1)
-            continue
-        if shpython and l.startswith(b'  > '): # sh continuation
-            if l == b'  > EOF\n':
-                yield b''.join(script), (b"%s[%d]" %
-                       (modname, shpython)), t, shpython
-                script = []
-                shpython = 0
-            else:
-                script.append(l[4:])
-            continue
-        # If we have an empty line or a command for sh, we end the
-        # inline script.
-        if inlinepython and (l == b'  \n'
-                             or l.startswith(b'  $ ')):
-            yield b''.join(script), (b"%s[%d]" %
-                   (modname, inlinepython)), t, inlinepython
-            script = []
-            inlinepython = 0
-            continue
+    errors = []
+    for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
+        if not name:
+            # use 'doctest.py', in order to make already existing
+            # doctest above pass instantly
+            name = 'doctest.py'
+        # "starts" is "line number" (1-origin), but embedded() is
+        # expected to return "line offset" (0-origin). Therefore, this
+        # yields "starts - 1".
+        yield code, "%s[%d]" % (modname, starts), name, starts - 1
 
 def sources(f, modname):
     """Yields possibly multiple sources from a filepath