[PATCH 4 of 7 v2] import-checker: parse python code from .t files

Fri Apr 15 10:35:56 EDT 2016

On Wed, 13 Apr 2016 12:21:47 -0500, timeless wrote:
> # HG changeset patch
> # User timeless <timeless at mozdev.org>
> # Date 1460497436 0
> #      Tue Apr 12 21:43:56 2016 +0000
> # Node ID 0fd09ef3b42024889053a97b4610a5f6b009bf34
> # Parent  5af048d312cd081f88706aa7b19af85083c197ec
> import-checker: parse python code from .t files
> 
> diff --git a/contrib/import-checker.py b/contrib/import-checker.py
> --- a/contrib/import-checker.py
> +++ b/contrib/import-checker.py
> @@ -5,6 +5,7 @@
>  import ast
>  import collections
>  import os
> +import re
>  import sys
>  
>  # Import a minimal set of stdlib modules needed for list_stdlib_modules()
> @@ -568,10 +569,97 @@
>  def _cycle_sortkey(c):
>      return len(c), c
>  
> +def embedded(f, modname, src):
> +    """ Extract embedded python code
> +
> +    >>> def test(fn, lines):
> +    ...     for s, m, f, l in embedded(fn, "example", lines):
> +    ...         print("%s %s %s" % (m, f, l))
> +    ...         print(repr(s))
> +    >>> lines = [
> +    ...   'comment',
> +    ...   '  >>> from __future__ import print_function',
> +    ...   "  >>> ' multiline",
> +    ...   "  ... string'",
> +    ...   '  ',
> +    ...   'comment',
> +    ...   '  $ cat > foo.py <<EOF',
> +    ...   '  > from __future__ import print_function',
> +    ...   '  > EOF',
> +    ... ]
> +    >>> test("example.t", lines)
> +    example[2] doctest.py 2
> +    "from __future__ import print_function\\n' multiline\\nstring'\\n"
> +    example[7] foo.py 7
> +    'from __future__ import print_function\\n'
> +    """
> +    inlinepython = 0
> +    shpython = 0
> +    script = []
> +    prefix = 6
> +    t = ''
> +    n = 0
> +    for l in src:
> +        n += 1
> +        if not l.endswith(b'\n'):
> +            l += b'\n'

Mixing bytes and str would be problem on Python3, but seems okay for now.

> +        if l.startswith(b'  >>> '): # python inlines
> +            if shpython:
> +                print("Parse Error %s:%d\n" % (f, n))

Adjusted as "%s:%d: Parse Error" and dropped extra "\n".

> +            if not inlinepython:
> +                # We've just entered a Python block.
> +                inlinepython = n
> +                t = 'doctest.py'
> +            script.append(l[prefix:])
> +            continue
> +        if l.startswith(b'  ... '): # python inlines
> +            script.append(l[prefix:])
> +            continue
> +        cat = re.search("\$ \s*cat\s*>\s*(\S+.py)\s*<<\s*EOF", l)

Should be r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF". Fixed in flight.
          ^                     ^

> +    py = False
>      if f.endswith('.py'):
>          with open(f) as src:
> -            yield src.read(), modname
> +            yield src.read(), modname, f, 0
> +            py = True
> +    if py or f.endswith('.t'):
> +        with open(f) as src:
> +            for script, modname, t, line in embedded(f, modname, src):
> +                yield script, modname, t, line

I don't understand this "py" flag, but it won't be big deal.