[PATCH STABLE] encoding: mercurial ignores setlocale and uses ascii instead of utf8

ehpc ehpc at ehpc.io
Sat Oct 29 08:50:42 UTC 2016


# HG changeset patch
# User ehpc <ehpc at ehpc.io>
# Date 1477731007 -10800
#      Sat Oct 29 11:50:07 2016 +0300
# Branch stable
# Node ID 48e5dc130032990292d92e394b1759496b0a7143
# Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
encoding: mercurial ignores setlocale and uses ascii instead of utf8

locale.getpreferredencoding() internally uses locale.setlocale(locale.LC_CTYPE, '')
so even if a user sets locale explicitly via

locale.setlocale(locale.LC_ALL, 'ru_RU.utf8')
locale.setlocale(locale.LC_CTYPE, 'ru_RU.utf8')

mercurial still detects ascii. There is also a problem with tolocal method
even when encoding is detected correctly. If a string of type 'str' was fed to
the method and encoding is UTF-8 it won't convert it to a proper 'unicode' string
feeding back an 'str' instead.

diff --git a/mercurial/encoding.py b/mercurial/encoding.py
--- a/mercurial/encoding.py
+++ b/mercurial/encoding.py
@@ -93,7 +93,7 @@
 try:
     encoding = environ.get("HGENCODING")
     if not encoding:
-        encoding = locale.getpreferredencoding() or 'ascii'
+        encoding = locale.getpreferredencoding(False) or 'ascii'
         encoding = _encodingfixers.get(encoding, lambda: encoding)()
 except locale.Error:
     encoding = 'ascii'
@@ -146,11 +146,14 @@
 
     try:
         try:
+            if encoding == 'UTF-8':
+                # fast path
+                if isinstance(s, unicode):
+                    return s
+                else:
+                    return s.decode('UTF-8')
             # make sure string is actually stored in UTF-8
             u = s.decode('UTF-8')
-            if encoding == 'UTF-8':
-                # fast path
-                return s
             r = u.encode(_sysstr(encoding), u"replace")
             if u == r.decode(_sysstr(encoding)):
                 # r is a safe, non-lossy encoding of s


More information about the Mercurial-devel mailing list