[PATCH] py3: have registrar process docstrings in bytes

Yuya Nishihara yuya at tcha.org
Tue Apr 4 15:56:54 UTC 2017


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1491320098 -32400
#      Wed Apr 05 00:34:58 2017 +0900
# Node ID de8d9a4ba73002af9d2992a15e0a196c2fcaf217
# Parent  07edd8c2f59a8f19283d038ea3af5fa5532bd6dc
py3: have registrar process docstrings in bytes

Mixing bytes and unicode creates a mess. Do things in bytes as possible.

New sysbytes() helper only takes care of ASCII characters, but avoids raising
nasty unicode exception. This is the same design principle as sysstr().

diff --git a/hgext/show.py b/hgext/show.py
--- a/hgext/show.py
+++ b/hgext/show.py
@@ -19,6 +19,7 @@ from mercurial import (
     cmdutil,
     commands,
     error,
+    pycompat,
     registrar,
 )
 
@@ -133,5 +134,5 @@ def showbookmarks(ui, repo, fm):
 # TODO make this more robust.
 longest = max(map(len, showview._table.keys()))
 for key in sorted(showview._table.keys()):
-    cmdtable['show'][0].__doc__ += ' %s   %s\n' % (
-        key.ljust(longest), showview._table[key]._origdoc)
+    cmdtable['show'][0].__doc__ += pycompat.sysstr(' %s   %s\n' % (
+        key.ljust(longest), showview._table[key]._origdoc))
diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -142,6 +142,14 @@ if ispy3:
         """Iterate bytes as if it were a str object of Python 2"""
         return map(bytechr, s)
 
+    def sysbytes(s):
+        """Convert an internal str (e.g. keyword, __doc__) back to bytes
+
+        This never raises UnicodeEncodeError, but only ASCII characters
+        can be round-trip by sysstr(sysbytes(s)).
+        """
+        return s.encode(u'utf-8')
+
     def sysstr(s):
         """Return a keyword str to be passed to Python functions such as
         getattr() and str.encode()
@@ -210,6 +218,7 @@ else:
     bytechr = chr
     bytestr = str
     iterbytestr = iter
+    sysbytes = identity
     sysstr = identity
 
     # Partial backport from os.py in Python 3, which only accepts bytes.
diff --git a/mercurial/registrar.py b/mercurial/registrar.py
--- a/mercurial/registrar.py
+++ b/mercurial/registrar.py
@@ -56,9 +56,9 @@ class _funcregistrarbase(object):
             raise error.ProgrammingError(msg)
 
         if func.__doc__ and not util.safehasattr(func, '_origdoc'):
-            doc = func.__doc__.strip()
+            doc = pycompat.sysbytes(func.__doc__).strip()
             func._origdoc = doc
-            func.__doc__ = self._formatdoc(decl, doc)
+            func.__doc__ = pycompat.sysstr(self._formatdoc(decl, doc))
 
         self._table[name] = func
         self._extrasetup(name, func, *args, **kwargs)
@@ -127,7 +127,7 @@ class revsetpredicate(_funcregistrarbase
     Otherwise, explicit 'revset.loadpredicate()' is needed.
     """
     _getname = _funcregistrarbase._parsefuncdecl
-    _docformat = pycompat.sysstr("``%s``\n    %s")
+    _docformat = "``%s``\n    %s"
 
     def _extrasetup(self, name, func, safe=False, takeorder=False):
         func._safe = safe
@@ -166,7 +166,7 @@ class filesetpredicate(_funcregistrarbas
     Otherwise, explicit 'fileset.loadpredicate()' is needed.
     """
     _getname = _funcregistrarbase._parsefuncdecl
-    _docformat = pycompat.sysstr("``%s``\n    %s")
+    _docformat = "``%s``\n    %s"
 
     def _extrasetup(self, name, func, callstatus=False, callexisting=False):
         func._callstatus = callstatus
@@ -175,7 +175,7 @@ class filesetpredicate(_funcregistrarbas
 class _templateregistrarbase(_funcregistrarbase):
     """Base of decorator to register functions as template specific one
     """
-    _docformat = pycompat.sysstr(":%s: %s")
+    _docformat = ":%s: %s"
 
 class templatekeyword(_templateregistrarbase):
     """Decorator to register template keyword


More information about the Mercurial-devel mailing list