[PATCH 2 of 2] py3: fix str vs bytes in enough places to run `hg version` on Windows

Fri Sep 14 08:09:14 EDT 2018

On Fri, 14 Sep 2018 00:52:34 -0400, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison at yahoo.com>
> # Date 1536890820 14400
> #      Thu Sep 13 22:07:00 2018 -0400
> # Node ID a1c3c33e911a449c1e67d6c70a1320f34233f253
> # Parent  82be987da1489e3ff7411540b473690e6039fd67
> py3: fix str vs bytes in enough places to run `hg version` on Windows
> 
> I don't have Visual Studio 2015 at home, but this now works with a handful of
> extensions (blackbox, extdiff, patchbomb, phabricator and rebase, but not
> evolve):
> 
>     $ HGMODULEPOLICY=py py -3 ../hg version
> 
> Enabling the evolve extension causes the usual "failed to import ..." line, but
> then print this before the usual version output:
> 
>     ('commit', '[b'debugancestor', b'debugapplystreamclonebundle', ...,
>      b'verify', b'version']')
> 
> ... where the elided part seems to be every command and alias known.
> 
> I'm not at all clear on when to use pycompat.sysstr() vs
> encoding.unifromlocal(), but I think it's encoding.* for things the user inputs
> or would be user facing?  I would have thought that this meant using encoding.*
> for these windows.py changes, but then test-help.t fails.

If we don't care non-ASCII characters, use sysstr/sysbytes. In other cases,
use encoding.strfrom/tolocal, fsencode/fsdecode, or .encode/.decode('latin-1')
depending on how non-ASCII characters should be processed.

> diff --git a/mercurial/color.py b/mercurial/color.py
> --- a/mercurial/color.py
> +++ b/mercurial/color.py
> @@ -408,21 +408,21 @@ if pycompat.iswindows:
>      _INVALID_HANDLE_VALUE = -1
>  
>      class _COORD(ctypes.Structure):
> -        _fields_ = [('X', ctypes.c_short),
> -                    ('Y', ctypes.c_short)]
> +        _fields_ = [(r'X', ctypes.c_short),
> +                    (r'Y', ctypes.c_short)]
>  
>      class _SMALL_RECT(ctypes.Structure):
> -        _fields_ = [('Left', ctypes.c_short),
> -                    ('Top', ctypes.c_short),
> -                    ('Right', ctypes.c_short),
> -                    ('Bottom', ctypes.c_short)]
> +        _fields_ = [(r'Left', ctypes.c_short),
> +                    (r'Top', ctypes.c_short),
> +                    (r'Right', ctypes.c_short),
> +                    (r'Bottom', ctypes.c_short)]
>  
>      class _CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure):
> -        _fields_ = [('dwSize', _COORD),
> -                    ('dwCursorPosition', _COORD),
> -                    ('wAttributes', _WORD),
> -                    ('srWindow', _SMALL_RECT),
> -                    ('dwMaximumWindowSize', _COORD)]
> +        _fields_ = [(r'dwSize', _COORD),
> +                    (r'dwCursorPosition', _COORD),
> +                    (r'wAttributes', _WORD),
> +                    (r'srWindow', _SMALL_RECT),
> +                    (r'dwMaximumWindowSize', _COORD)]

These look good.

> @@ -484,7 +484,7 @@ if pycompat.iswindows:
>              w32effects = None
>          else:
>              origattr = csbi.wAttributes
> -            ansire = re.compile('\033\[([^m]*)m([^\033]*)(.*)',
> +            ansire = re.compile(r'\033\[([^m]*)m([^\033]*)(.*)',
>                                  re.MULTILINE | re.DOTALL)
>  
>      def win32print(ui, writefunc, *msgs, **opts):
> @@ -520,16 +520,16 @@ if pycompat.iswindows:
>              text = '\033[m' + text
>  
>          # Look for ANSI-like codes embedded in text
> -        m = re.match(ansire, text)
> +        m = re.match(ansire, pycompat.sysstr(text))

Why do you want to convert text to unicode here? It's converted back to
bytes later.

> -                for sattr in m.group(1).split(';'):
> +                for sattr in m.group(1).split(r';'):
>                      if sattr:
>                          attr = mapcolor(int(sattr), attr)
>                  ui.flush()
>                  _kernel32.SetConsoleTextAttribute(stdout, attr)
> -                writefunc(m.group(2), **opts)
> +                writefunc(encoding.unitolocal(m.group(2)), **opts)

> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
> --- a/mercurial/pure/osutil.py
> +++ b/mercurial/pure/osutil.py
> @@ -193,7 +193,8 @@ else:
>  
>      def _raiseioerror(name):
>          err = ctypes.WinError()
> -        raise IOError(err.errno, '%s: %s' % (name, err.strerror))
> +        raise IOError(err.errno, r'%s: %s' % (pycompat.sysstr(name),

encoding.strfromlocal() since the name would come from environment.

> diff --git a/mercurial/windows.py b/mercurial/windows.py
> --- a/mercurial/windows.py
> +++ b/mercurial/windows.py
> @@ -398,10 +398,11 @@ def shellquote(s):
>          # drops it.  It will leave the next character, even if it is another
>          # "\".
>          _needsshellquote = re.compile(r'[^a-zA-Z0-9._:/-]').search

Perhaps, _needsshellquote can be byteified instead.

> -    if s and not _needsshellquote(s) and not _quotere.search(s):
> +    u = pycompat.sysstr(s)
> +    if s and not _needsshellquote(u) and not _quotere.search(u):
>          # "s" shouldn't have to be quoted
>          return s
> -    return '"%s"' % _quotere.sub(r'\1\1\\\2', s)
> +    return pycompat.bytestr(r'"%s"' % _quotere.sub(r'\1\1\\\2', u))

sysstr -> bytestr isn't round-trip conversion.