[PATCH v2] encoding: mercurial ignores setlocale and uses ascii instead of utf8
ehpc
ehpc at ehpc.io
Sat Oct 29 08:53:20 UTC 2016
# HG changeset patch
# User ehpc <ehpc at ehpc.io>
# Date 1477731183 -10800
# Sat Oct 29 11:53:03 2016 +0300
# Node ID 667842e5f8406e794f5b22930f4b5715a1dcdfe4
# Parent 260af19891f2bed679a662be07d1379bb8207592
encoding: mercurial ignores setlocale and uses ascii instead of utf8
locale.getpreferredencoding() internally uses locale.setlocale(locale.LC_CTYPE, '')
so even if a user sets locale explicitly via
locale.setlocale(locale.LC_ALL, 'ru_RU.utf8')
locale.setlocale(locale.LC_CTYPE, 'ru_RU.utf8')
mercurial still detects ascii. There is also a problem with tolocal method
even when encoding is detected correctly. If a string of type 'str' was fed to
the method and encoding is UTF-8 it won't convert it to a proper 'unicode' string
feeding back an 'str' instead.
diff --git a/mercurial/encoding.py b/mercurial/encoding.py
--- a/mercurial/encoding.py
+++ b/mercurial/encoding.py
@@ -93,7 +93,7 @@
try:
encoding = environ.get("HGENCODING")
if not encoding:
- encoding = locale.getpreferredencoding() or 'ascii'
+ encoding = locale.getpreferredencoding(False) or 'ascii'
encoding = _encodingfixers.get(encoding, lambda: encoding)()
except locale.Error:
encoding = 'ascii'
@@ -146,11 +146,14 @@
try:
try:
+ if encoding == 'UTF-8':
+ # fast path
+ if isinstance(s, unicode):
+ return s
+ else:
+ return s.decode('UTF-8')
# make sure string is actually stored in UTF-8
u = s.decode('UTF-8')
- if encoding == 'UTF-8':
- # fast path
- return s
r = u.encode(_sysstr(encoding), u"replace")
if u == r.decode(_sysstr(encoding)):
# r is a safe, non-lossy encoding of s
More information about the Mercurial-devel
mailing list