[PATCH 2 of 2] py3: fix str vs bytes in enough places to run `hg version` on Windows
Yuya Nishihara
yuya at tcha.org
Fri Sep 14 08:09:14 EDT 2018
On Fri, 14 Sep 2018 00:52:34 -0400, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison at yahoo.com>
> # Date 1536890820 14400
> # Thu Sep 13 22:07:00 2018 -0400
> # Node ID a1c3c33e911a449c1e67d6c70a1320f34233f253
> # Parent 82be987da1489e3ff7411540b473690e6039fd67
> py3: fix str vs bytes in enough places to run `hg version` on Windows
>
> I don't have Visual Studio 2015 at home, but this now works with a handful of
> extensions (blackbox, extdiff, patchbomb, phabricator and rebase, but not
> evolve):
>
> $ HGMODULEPOLICY=py py -3 ../hg version
>
> Enabling the evolve extension causes the usual "failed to import ..." line, but
> then print this before the usual version output:
>
> ('commit', '[b'debugancestor', b'debugapplystreamclonebundle', ...,
> b'verify', b'version']')
>
> ... where the elided part seems to be every command and alias known.
>
> I'm not at all clear on when to use pycompat.sysstr() vs
> encoding.unifromlocal(), but I think it's encoding.* for things the user inputs
> or would be user facing? I would have thought that this meant using encoding.*
> for these windows.py changes, but then test-help.t fails.
If we don't care non-ASCII characters, use sysstr/sysbytes. In other cases,
use encoding.strfrom/tolocal, fsencode/fsdecode, or .encode/.decode('latin-1')
depending on how non-ASCII characters should be processed.
> diff --git a/mercurial/color.py b/mercurial/color.py
> --- a/mercurial/color.py
> +++ b/mercurial/color.py
> @@ -408,21 +408,21 @@ if pycompat.iswindows:
> _INVALID_HANDLE_VALUE = -1
>
> class _COORD(ctypes.Structure):
> - _fields_ = [('X', ctypes.c_short),
> - ('Y', ctypes.c_short)]
> + _fields_ = [(r'X', ctypes.c_short),
> + (r'Y', ctypes.c_short)]
>
> class _SMALL_RECT(ctypes.Structure):
> - _fields_ = [('Left', ctypes.c_short),
> - ('Top', ctypes.c_short),
> - ('Right', ctypes.c_short),
> - ('Bottom', ctypes.c_short)]
> + _fields_ = [(r'Left', ctypes.c_short),
> + (r'Top', ctypes.c_short),
> + (r'Right', ctypes.c_short),
> + (r'Bottom', ctypes.c_short)]
>
> class _CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure):
> - _fields_ = [('dwSize', _COORD),
> - ('dwCursorPosition', _COORD),
> - ('wAttributes', _WORD),
> - ('srWindow', _SMALL_RECT),
> - ('dwMaximumWindowSize', _COORD)]
> + _fields_ = [(r'dwSize', _COORD),
> + (r'dwCursorPosition', _COORD),
> + (r'wAttributes', _WORD),
> + (r'srWindow', _SMALL_RECT),
> + (r'dwMaximumWindowSize', _COORD)]
These look good.
> @@ -484,7 +484,7 @@ if pycompat.iswindows:
> w32effects = None
> else:
> origattr = csbi.wAttributes
> - ansire = re.compile('\033\[([^m]*)m([^\033]*)(.*)',
> + ansire = re.compile(r'\033\[([^m]*)m([^\033]*)(.*)',
> re.MULTILINE | re.DOTALL)
>
> def win32print(ui, writefunc, *msgs, **opts):
> @@ -520,16 +520,16 @@ if pycompat.iswindows:
> text = '\033[m' + text
>
> # Look for ANSI-like codes embedded in text
> - m = re.match(ansire, text)
> + m = re.match(ansire, pycompat.sysstr(text))
Why do you want to convert text to unicode here? It's converted back to
bytes later.
> - for sattr in m.group(1).split(';'):
> + for sattr in m.group(1).split(r';'):
> if sattr:
> attr = mapcolor(int(sattr), attr)
> ui.flush()
> _kernel32.SetConsoleTextAttribute(stdout, attr)
> - writefunc(m.group(2), **opts)
> + writefunc(encoding.unitolocal(m.group(2)), **opts)
> diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
> --- a/mercurial/pure/osutil.py
> +++ b/mercurial/pure/osutil.py
> @@ -193,7 +193,8 @@ else:
>
> def _raiseioerror(name):
> err = ctypes.WinError()
> - raise IOError(err.errno, '%s: %s' % (name, err.strerror))
> + raise IOError(err.errno, r'%s: %s' % (pycompat.sysstr(name),
encoding.strfromlocal() since the name would come from environment.
> diff --git a/mercurial/windows.py b/mercurial/windows.py
> --- a/mercurial/windows.py
> +++ b/mercurial/windows.py
> @@ -398,10 +398,11 @@ def shellquote(s):
> # drops it. It will leave the next character, even if it is another
> # "\".
> _needsshellquote = re.compile(r'[^a-zA-Z0-9._:/-]').search
Perhaps, _needsshellquote can be byteified instead.
> - if s and not _needsshellquote(s) and not _quotere.search(s):
> + u = pycompat.sysstr(s)
> + if s and not _needsshellquote(u) and not _quotere.search(u):
> # "s" shouldn't have to be quoted
> return s
> - return '"%s"' % _quotere.sub(r'\1\1\\\2', s)
> + return pycompat.bytestr(r'"%s"' % _quotere.sub(r'\1\1\\\2', u))
sysstr -> bytestr isn't round-trip conversion.
More information about the Mercurial-devel
mailing list