[PATCH 2 of 3 RFC] global: u'' prefix some literals to appease module importing

Gregory Szorc gregory.szorc at gmail.com
Mon May 16 00:02:52 EDT 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1463371145 25200
#      Sun May 15 20:59:05 2016 -0700
# Node ID effc44c0ec7440fabb134a076e974ca478d89e11
# Parent  7c5d1f8db9618f511f40bc4089145310671ca57b
global: u'' prefix some literals to appease module importing

This demonstrates how global mass rewriting of string literals
to b'' in Python 3 can be too aggressive and result in Python 3
complaining. All these changes (and more) are needed to preserve
literals in Python 3 as the str (not bytes) type.

With this change, running `hg` fails on Python due to encountering
an "iteritems," which of course does not exist.

This change also breaks Python 2, so it isn't fit for checkin.

diff --git a/mercurial/demandimport.py b/mercurial/demandimport.py
--- a/mercurial/demandimport.py
+++ b/mercurial/demandimport.py
@@ -262,17 +262,17 @@ ignore = [
     'distutils.msvc9compiler'
     ]
 
 def isenabled():
     return builtins.__import__ == _demandimport
 
 def enable():
     "enable global demand-loading of modules"
-    if os.environ.get('HGDEMANDIMPORT') != 'disable':
+    if os.environ.get(u'HGDEMANDIMPORT') != u'disable':
         builtins.__import__ = _demandimport
 
 def disable():
     "disable global demand-loading of modules"
     builtins.__import__ = _origimport
 
 @contextmanager
 def deactivated():
diff --git a/mercurial/encoding.py b/mercurial/encoding.py
--- a/mercurial/encoding.py
+++ b/mercurial/encoding.py
@@ -18,17 +18,17 @@ from . import (
 )
 
 if sys.version_info[0] >= 3:
     unichr = chr
 
 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
 # "Unicode Subtleties"), so we need to ignore them in some places for
 # sanity.
-_ignore = [unichr(int(x, 16)).encode("utf-8") for x in
+_ignore = [unichr(int(x, 16)).encode(u"utf-8") for x in
            "200c 200d 200e 200f 202a 202b 202c 202d 202e "
            "206a 206b 206c 206d 206e 206f feff".split()]
 # verify the next function will work
 if sys.version_info[0] >= 3:
     assert set(i[0] for i in _ignore) == set([ord(b'\xe2'), ord(b'\xef')])
 else:
     assert set(i[0] for i in _ignore) == set(["\xe2", "\xef"])
 
@@ -71,23 +71,23 @@ def _getpreferredencoding():
 
 _encodingfixers = {
     '646': lambda: 'ascii',
     'ANSI_X3.4-1968': lambda: 'ascii',
     'mac-roman': _getpreferredencoding
 }
 
 try:
-    encoding = os.environ.get("HGENCODING")
+    encoding = os.environ.get(u"HGENCODING")
     if not encoding:
         encoding = locale.getpreferredencoding() or 'ascii'
         encoding = _encodingfixers.get(encoding, lambda: encoding)()
 except locale.Error:
     encoding = 'ascii'
-encodingmode = os.environ.get("HGENCODINGMODE", "strict")
+encodingmode = os.environ.get(u"HGENCODINGMODE", u"strict")
 fallbackencoding = 'ISO-8859-1'
 
 class localstr(str):
     '''This class allows strings that are unmodified to be
     round-tripped to the local encoding and back'''
     def __new__(cls, u, l):
         s = str.__new__(cls, l)
         s._utf8 = u
@@ -175,17 +175,17 @@ def fromlocal(s):
         return s.decode(encoding, encodingmode).encode("utf-8")
     except UnicodeDecodeError as inst:
         sub = s[max(0, inst.start - 10):inst.start + 10]
         raise error.Abort("decoding near '%s': %s!" % (sub, inst))
     except LookupError as k:
         raise error.Abort(k, hint="please check your locale settings")
 
 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
-wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
+wide = (os.environ.get(u"HGENCODINGAMBIGUOUS", u"narrow") == u"wide"
         and "WFA" or "WF")
 
 def colwidth(s):
     "Find the column width of a string for display in the local encoding"
     return ucolwidth(s.decode(encoding, 'replace'))
 
 def ucolwidth(d):
     "Find the column width of a Unicode string for display"
diff --git a/mercurial/i18n.py b/mercurial/i18n.py
--- a/mercurial/i18n.py
+++ b/mercurial/i18n.py
@@ -10,17 +10,17 @@ from __future__ import absolute_import
 import gettext as gettextmod
 import locale
 import os
 import sys
 
 from . import encoding
 
 # modelled after templater.templatepath:
-if getattr(sys, 'frozen', None) is not None:
+if getattr(sys, u'frozen', None) is not None:
     module = sys.executable
 else:
     module = __file__
 
 try:
     unicode
 except NameError:
     unicode = str
@@ -41,17 +41,17 @@ if (os.name == 'nt'
         _languages = [locale.windows_locale[langid]]
     except (ImportError, AttributeError, KeyError):
         # ctypes not found or unknown langid
         pass
 
 _ugettext = None
 
 def setdatapath(datapath):
-    localedir = os.path.join(datapath, 'locale')
+    localedir = os.path.join(datapath, u'locale')
     t = gettextmod.translation('hg', localedir, _languages, fallback=True)
     global _ugettext
     try:
         _ugettext = t.ugettext
     except AttributeError:
         _ugettext = t.gettext
 
 _msgcache = {}
@@ -70,34 +70,34 @@ def gettext(message):
     if message is None or not _ugettext:
         return message
 
     if message not in _msgcache:
         if type(message) is unicode:
             # goofy unicode docstrings in test
             paragraphs = message.split(u'\n\n')
         else:
-            paragraphs = [p.decode("ascii") for p in message.split('\n\n')]
+            paragraphs = [p.decode(u"ascii") for p in message.split('\n\n')]
         # Be careful not to translate the empty string -- it holds the
         # meta data of the .po file.
         u = u'\n\n'.join([p and _ugettext(p) or '' for p in paragraphs])
         try:
             # encoding.tolocal cannot be used since it will first try to
             # decode the Unicode string. Calling u.decode(enc) really
             # means u.encode(sys.getdefaultencoding()).decode(enc). Since
             # the Python encoding defaults to 'ascii', this fails if the
             # translated string use non-ASCII characters.
-            _msgcache[message] = u.encode(encoding.encoding, "replace")
+            _msgcache[message] = u.encode(encoding.encoding, u"replace")
         except LookupError:
             # An unknown encoding results in a LookupError.
             _msgcache[message] = message
     return _msgcache[message]
 
 def _plain():
-    if 'HGPLAIN' not in os.environ and 'HGPLAINEXCEPT' not in os.environ:
+    if u'HGPLAIN' not in os.environ and u'HGPLAINEXCEPT' not in os.environ:
         return False
-    exceptions = os.environ.get('HGPLAINEXCEPT', '').strip().split(',')
-    return 'i18n' not in exceptions
+    exceptions = os.environ.get(u'HGPLAINEXCEPT', u'').strip().split(u',')
+    return u'i18n' not in exceptions
 
 if _plain():
     _ = lambda message: message
 else:
     _ = gettext
diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -40,27 +40,27 @@ class diffopts(object):
     noprefix disables the 'a/' and 'b/' prefixes (ignored in plain mode)
     ignorews ignores all whitespace changes in the diff
     ignorewsamount ignores changes in the amount of whitespace
     ignoreblanklines ignores changes whose lines are all blank
     upgrade generates git diffs to avoid data loss
     '''
 
     defaults = {
-        'context': 3,
-        'text': False,
-        'showfunc': False,
-        'git': False,
-        'nodates': False,
-        'nobinary': False,
-        'noprefix': False,
-        'ignorews': False,
-        'ignorewsamount': False,
-        'ignoreblanklines': False,
-        'upgrade': False,
+        u'context': 3,
+        u'text': False,
+        u'showfunc': False,
+        u'git': False,
+        u'nodates': False,
+        u'nobinary': False,
+        u'noprefix': False,
+        u'ignorews': False,
+        u'ignorewsamount': False,
+        u'ignoreblanklines': False,
+        u'upgrade': False,
         }
 
     __slots__ = defaults.keys()
 
     def __init__(self, **opts):
         for k in self.__slots__:
             v = opts.get(k)
             if v is None:
diff --git a/mercurial/pure/osutil.py b/mercurial/pure/osutil.py
--- a/mercurial/pure/osutil.py
+++ b/mercurial/pure/osutil.py
@@ -75,43 +75,43 @@ if os.name != 'nt':
         _msg_iovlen_t = ctypes.c_size_t
     else:
         _cmsg_len_t = _socklen_t
         _msg_controllen_t = _socklen_t
         _msg_iovlen_t = ctypes.c_int
 
     class _iovec(ctypes.Structure):
         _fields_ = [
-            ('iov_base', ctypes.c_void_p),
-            ('iov_len', ctypes.c_size_t),
+            (u'iov_base', ctypes.c_void_p),
+            (u'iov_len', ctypes.c_size_t),
         ]
 
     class _msghdr(ctypes.Structure):
         _fields_ = [
-            ('msg_name', ctypes.c_void_p),
-            ('msg_namelen', _socklen_t),
-            ('msg_iov', ctypes.POINTER(_iovec)),
-            ('msg_iovlen', _msg_iovlen_t),
-            ('msg_control', ctypes.c_void_p),
-            ('msg_controllen', _msg_controllen_t),
-            ('msg_flags', ctypes.c_int),
+            (u'msg_name', ctypes.c_void_p),
+            (u'msg_namelen', _socklen_t),
+            (u'msg_iov', ctypes.POINTER(_iovec)),
+            (u'msg_iovlen', _msg_iovlen_t),
+            (u'msg_control', ctypes.c_void_p),
+            (u'msg_controllen', _msg_controllen_t),
+            (u'msg_flags', ctypes.c_int),
         ]
 
     class _cmsghdr(ctypes.Structure):
         _fields_ = [
-            ('cmsg_len', _cmsg_len_t),
-            ('cmsg_level', ctypes.c_int),
-            ('cmsg_type', ctypes.c_int),
-            ('cmsg_data', ctypes.c_ubyte * 0),
+            (u'cmsg_len', _cmsg_len_t),
+            (u'cmsg_level', ctypes.c_int),
+            (u'cmsg_type', ctypes.c_int),
+            (u'cmsg_data', ctypes.c_ubyte * 0),
         ]
 
-    _libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
-    _recvmsg = getattr(_libc, 'recvmsg', None)
+    _libc = ctypes.CDLL(ctypes.util.find_library(u'c'), use_errno=True)
+    _recvmsg = getattr(_libc, u'recvmsg', None)
     if _recvmsg:
-        _recvmsg.restype = getattr(ctypes, 'c_ssize_t', ctypes.c_long)
+        _recvmsg.restype = getattr(ctypes, u'c_ssize_t', ctypes.c_long)
         _recvmsg.argtypes = (ctypes.c_int, ctypes.POINTER(_msghdr),
                              ctypes.c_int)
     else:
         # recvmsg isn't always provided by libc; such systems are unsupported
         def _recvmsg(sockfd, msg, flags):
             raise NotImplementedError('unsupported platform')
 
     def _CMSG_FIRSTHDR(msgh):
diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -29,17 +29,17 @@ class _pycompatstub(object):
     pass
 
 def _alias(alias, origin, items):
     """ populate a _pycompatstub
 
     copies items from origin to alias
     """
     def hgcase(item):
-        return item.replace('_', '').lower()
+        return item.replace(u'_', u'').lower()
     for item in items:
         try:
             setattr(alias, hgcase(item), getattr(origin, item))
         except AttributeError:
             pass
 
 urlreq = _pycompatstub()
 urlerr = _pycompatstub()
@@ -80,44 +80,44 @@ try:
     _alias(urlerr, urllib2, (
         "HTTPError",
         "URLError",
     ))
 
 except ImportError:
     import urllib.request
     _alias(urlreq, urllib.request, (
-        "AbstractHTTPHandler",
-        "addclosehook",
-        "addinfourl",
-        "BaseHandler",
-        "build_opener",
-        "FileHandler",
-        "FTPHandler",
-        "ftpwrapper",
-        "HTTPHandler",
-        "HTTPSHandler",
-        "install_opener",
-        "pathname2url",
-        "HTTPBasicAuthHandler",
-        "HTTPDigestAuthHandler",
-        "ProxyHandler",
-        "quote",
-        "Request",
-        "splitattr",
-        "splitpasswd",
-        "splitport",
-        "splituser",
-        "unquote",
-        "url2pathname",
-        "urlopen",
+        u"AbstractHTTPHandler",
+        u"addclosehook",
+        u"addinfourl",
+        u"BaseHandler",
+        u"build_opener",
+        u"FileHandler",
+        u"FTPHandler",
+        u"ftpwrapper",
+        u"HTTPHandler",
+        u"HTTPSHandler",
+        u"install_opener",
+        u"pathname2url",
+        u"HTTPBasicAuthHandler",
+        u"HTTPDigestAuthHandler",
+        u"ProxyHandler",
+        u"quote",
+        u"Request",
+        u"splitattr",
+        u"splitpasswd",
+        u"splitport",
+        u"splituser",
+        u"unquote",
+        u"url2pathname",
+        u"urlopen",
     ))
     import urllib.error
     _alias(urlerr, urllib.error, (
-        "HTTPError",
-        "URLError",
+        u"HTTPError",
+        u"URLError",
     ))
 
 try:
     xrange
 except NameError:
     import builtins
     builtins.xrange = range
diff --git a/mercurial/registrar.py b/mercurial/registrar.py
--- a/mercurial/registrar.py
+++ b/mercurial/registrar.py
@@ -44,17 +44,17 @@ class _funcregistrarbase(object):
             self._table = table
 
     def __call__(self, decl, *args, **kwargs):
         return lambda func: self._doregister(func, decl, *args, **kwargs)
 
     def _doregister(self, func, decl, *args, **kwargs):
         name = self._getname(decl)
 
-        if func.__doc__ and not util.safehasattr(func, '_origdoc'):
+        if func.__doc__ and not util.safehasattr(func, u'_origdoc'):
             doc = func.__doc__.strip()
             func._origdoc = doc
             func.__doc__ = self._formatdoc(decl, doc)
 
         self._table[name] = func
         self._extrasetup(name, func, *args, **kwargs)
 
         return func
@@ -78,17 +78,19 @@ class _funcregistrarbase(object):
 
     _docformat = None
 
     def _formatdoc(self, decl, doc):
         """Return formatted document of the registered function for help
 
         'doc' is '__doc__.strip()' of the registered function.
         """
-        return self._docformat % (decl, doc)
+        # docstrings are using the source file encoding, which should be
+        # utf-8.
+        return self._docformat % (decl, doc.encode(u'utf-8'))
 
     def _extrasetup(self, name, func):
         """Execute exra setup for registered function, if needed
         """
         pass
 
 class revsetpredicate(_funcregistrarbase):
     """Decorator to register revset predicate
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -171,21 +171,21 @@ elements = {
     "string": (0, "string", None, None, None),
     "end": (0, None, None, None, None),
 }
 
 keywords = set(['and', 'or', 'not'])
 
 # default set of valid characters for the initial letter of symbols
 _syminitletters = set(c for c in [chr(i) for i in xrange(256)]
-                      if c.isalnum() or c in '._@' or ord(c) > 127)
+                      if c.isalnum() or c in u'._@' or ord(c) > 127)
 
 # default set of valid characters for non-initial letters of symbols
 _symletters = set(c for c in  [chr(i) for i in xrange(256)]
-                  if c.isalnum() or c in '-._/@' or ord(c) > 127)
+                  if c.isalnum() or c in u'-._/@' or ord(c) > 127)
 
 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
     '''
     Parse a revset statement into a stream of tokens
 
     ``syminitletters`` is the set of valid characters for the initial
     letter of symbols.
 
@@ -2215,17 +2215,17 @@ def _optimize(x, small):
 
 def optimize(tree):
     _weight, newtree = _optimize(tree, small=True)
     return newtree
 
 # the set of valid characters for the initial letter of symbols in
 # alias declarations and definitions
 _aliassyminitletters = set(c for c in [chr(i) for i in xrange(256)]
-                           if c.isalnum() or c in '._@$' or ord(c) > 127)
+                           if c.isalnum() or c in u'._@$' or ord(c) > 127)
 
 def _parsewith(spec, lookup=None, syminitletters=None):
     """Generate a parse tree of given spec with given tokenizing options
 
     >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
     ('func', ('symbol', 'foo'), ('symbol', '$1'))
     >>> _parsewith('$1')
     Traceback (most recent call last):
diff --git a/mercurial/sslutil.py b/mercurial/sslutil.py
--- a/mercurial/sslutil.py
+++ b/mercurial/sslutil.py
@@ -22,31 +22,31 @@ from . import (
 # Python 2.7.9+ overhauled the built-in SSL/TLS features of Python. It added
 # support for TLS 1.1, TLS 1.2, SNI, system CA stores, etc. These features are
 # all exposed via the "ssl" module.
 #
 # Depending on the version of Python being used, SSL/TLS support is either
 # modern/secure or legacy/insecure. Many operations in this module have
 # separate code paths depending on support in Python.
 
-hassni = getattr(ssl, 'HAS_SNI', False)
+hassni = getattr(ssl, u'HAS_SNI', False)
 
 try:
     OP_NO_SSLv2 = ssl.OP_NO_SSLv2
     OP_NO_SSLv3 = ssl.OP_NO_SSLv3
 except AttributeError:
     OP_NO_SSLv2 = 0x1000000
     OP_NO_SSLv3 = 0x2000000
 
 try:
     # ssl.SSLContext was added in 2.7.9 and presence indicates modern
     # SSL/TLS features are available.
     SSLContext = ssl.SSLContext
     modernssl = True
-    _canloaddefaultcerts = util.safehasattr(SSLContext, 'load_default_certs')
+    _canloaddefaultcerts = util.safehasattr(SSLContext, u'load_default_certs')
 except AttributeError:
     modernssl = False
     _canloaddefaultcerts = False
 
     # We implement SSLContext using the interface from the standard library.
     class SSLContext(object):
         # ssl.wrap_socket gained the "ciphers" named argument in 2.7.
         _supportsciphers = sys.version_info >= (2, 7)
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -41,22 +41,22 @@ from . import (
     error,
     i18n,
     osutil,
     parsers,
     pycompat,
 )
 
 for attr in (
-    'empty',
-    'queue',
-    'urlerr',
+    u'empty',
+    u'queue',
+    u'urlerr',
     # we do import urlreq, but we do it outside the loop
     #'urlreq',
-    'stringio',
+    u'stringio',
 ):
     globals()[attr] = getattr(pycompat, attr)
 
 # This line is to make pyflakes happy:
 urlreq = pycompat.urlreq
 
 if os.name == 'nt':
     from . import windows as platform
@@ -108,17 +108,17 @@ samefile = platform.samefile
 samestat = platform.samestat
 setbinary = platform.setbinary
 setflags = platform.setflags
 setsignalhandler = platform.setsignalhandler
 shellquote = platform.shellquote
 spawndetached = platform.spawndetached
 split = platform.split
 sshargs = platform.sshargs
-statfiles = getattr(osutil, 'statfiles', platform.statfiles)
+statfiles = getattr(osutil, u'statfiles', platform.statfiles)
 statisexec = platform.statisexec
 statislink = platform.statislink
 termwidth = platform.termwidth
 testpid = platform.testpid
 umask = platform.umask
 unlink = platform.unlink
 unlinkpath = platform.unlinkpath
 username = platform.username
@@ -525,17 +525,17 @@ class sortdict(dict):
         dict.__setitem__(self, key, val)
 
 class _lrucachenode(object):
     """A node in a doubly linked list.
 
     Holds a reference to nodes on either side as well as a key-value
     pair for the dictionary entry.
     """
-    __slots__ = ('next', 'prev', 'key', 'value')
+    __slots__ = (u'next', u'prev', u'key', u'value')
 
     def __init__(self):
         self.next = None
         self.prev = None
 
         self.key = _notset
         self.value = None
 
@@ -887,22 +887,22 @@ def pathto(root, n1, n2):
     return os.sep.join((['..'] * len(a)) + b) or '.'
 
 def mainfrozen():
     """return True if we are a frozen executable.
 
     The code supports py2exe (most common, Windows only) and tools/freeze
     (portable, not much used).
     """
-    return (safehasattr(sys, "frozen") or # new py2exe
-            safehasattr(sys, "importers") or # old py2exe
-            imp.is_frozen("__main__")) # tools/freeze
+    return (safehasattr(sys, u"frozen") or # new py2exe
+            safehasattr(sys, u"importers") or # old py2exe
+            imp.is_frozen(u"__main__")) # tools/freeze
 
 # the location of data files matching the source code
-if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
+if mainfrozen() and getattr(sys, u'frozen', None) != u'macosx_app':
     # executable version (py2exe) doesn't support __file__
     datapath = os.path.dirname(sys.executable)
 else:
     datapath = os.path.dirname(__file__)
 
 i18n.setdatapath(datapath)
 
 _hgexecutable = None
@@ -2135,17 +2135,17 @@ def getport(port):
 
 def parsebool(s):
     """Parse s into a boolean.
 
     If s is not a valid boolean, returns None.
     """
     return _booleans.get(s.lower(), None)
 
-_hexdig = '0123456789ABCDEFabcdef'
+_hexdig = u'0123456789ABCDEFabcdef'
 _hextochr = dict((a + b, chr(int(a + b, 16)))
                  for a in _hexdig for b in _hexdig)
 
 def _urlunquote(s):
     """Decode HTTP/HTML % encoding.
 
     >>> _urlunquote('abc%20def')
     'abc def'
@@ -2628,17 +2628,17 @@ class dirs(object):
             del dirs[base]
 
     def __iter__(self):
         return self._dirs.iterkeys()
 
     def __contains__(self, d):
         return d in self._dirs
 
-if safehasattr(parsers, 'dirs'):
+if safehasattr(parsers, u'dirs'):
     dirs = parsers.dirs
 
 def finddirs(path):
     pos = path.rfind('/')
     while pos != -1:
         yield path[:pos]
         pos = path.rfind('/', 0, pos)
 


More information about the Mercurial-devel mailing list