[PATCH STABLE] win32mbcs: add reversing wrapper for some unicode-incompatible functions

Shun-ichi Goto shunichi.goto at gmail.com
Wed Oct 17 04:34:23 CDT 2012


# HG changeset patch
# User Shun-ichi GOTO <shunichi.goto at gmail.com>
# Date 1350464940 -32400
# Node ID 59e2412b40596d398cecfc29d3a1b7f507b4fff2
# Parent  72c234081ae1350220132c69750f5a093902a1e7
win32mbcs: add reversing wrapper for some unicode-incompatible functions.

This changeset fix the problem to use win32mbcs with mercurial 2.3 or
later.  And also includes small changes for PEP8 and check-code.py
adaptation.

The problem is brought by side effect of modification of
encoding.upper() (changeset 17236:9fb8312dbdbd) because upper() does
not accept unicode string argument. So wrapped util.normcase() which
uses upper() will fail. In other words, upper() and lower() are
unicode incompatible.

To fix this issue, this changeset adds new wrapper for reversed
conversion (unicode to str) for lower() and upper() to use them
safely.

diff -r 72c234081ae1350220132c69750f5a093902a1e7 -r 59e2412b40596d398cecfc29d3a1b7f507b4fff2 hgext/win32mbcs.py
--- a/hgext/win32mbcs.py	Mon Oct 15 17:43:05 2012 +0200
+++ b/hgext/win32mbcs.py	Wed Oct 17 18:09:00 2012 +0900
@@ -45,13 +45,15 @@
 It is useful for the users who want to commit with UTF-8 log message.
 '''
 
-import os, sys
+import os
+import sys
 from mercurial.i18n import _
 from mercurial import util, encoding
 testedwith = 'internal'
 
 _encoding = None                                # see extsetup
 
+
 def decode(arg):
     if isinstance(arg, str):
         uarg = arg.decode(_encoding)
@@ -67,6 +69,7 @@
             arg[k] = decode(v)
     return arg
 
+
 def encode(arg):
     if isinstance(arg, unicode):
         return arg.encode(_encoding)
@@ -79,6 +82,7 @@
             arg[k] = encode(v)
     return arg
 
+
 def appendsep(s):
     # ensure the path ends with os.sep, appending it if necessary.
     try:
@@ -89,19 +93,30 @@
         s += os.sep
     return s
 
-def wrapper(func, args, kwds):
-    # check argument is unicode, then call original
+
+def basewrapper(func, argtype, enc, dec, args, kwds):
+    # check check already converted, then call original
     for arg in args:
-        if isinstance(arg, unicode):
+        if isinstance(arg, argtype):
             return func(*args, **kwds)
 
     try:
-        # convert arguments to unicode, call func, then convert back
-        return encode(func(*decode(args), **decode(kwds)))
+        # convert string arguments, call func, then convert back the
+        # return value.
+        return enc(func(*dec(args), **dec(kwds)))
     except UnicodeError:
         raise util.Abort(_("[win32mbcs] filename conversion failed with"
                          " %s encoding\n") % (_encoding))
 
+
+def wrapper(func, args, kwds):
+    return basewrapper(func, unicode, encode, decode, args, kwds)
+
+
+def reversewrapper(func, args, kwds):
+    return basewrapper(func, str, decode, encode, args, kwds)
+
+
 def wrapperforlistdir(func, args, kwds):
     # Ensure 'path' argument ends with os.sep to avoids
     # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
@@ -112,18 +127,22 @@
         kwds['path'] = appendsep(kwds['path'])
     return func(*args, **kwds)
 
+
 def wrapname(name, wrapper):
     module, name = name.rsplit('.', 1)
     module = sys.modules[module]
     func = getattr(module, name)
+
     def f(*args, **kwds):
         return wrapper(func, args, kwds)
+
     try:
-        f.__name__ = func.__name__ # fails with Python 2.3
+        f.__name__ = func.__name__             # fails with Python 2.3
     except Exception:
         pass
     setattr(module, name, f)
 
+
 # List of functions to be wrapped.
 # NOTE: os.path.dirname() and os.path.basename() are safe because
 #       they use result of os.path.split()
@@ -133,6 +152,11 @@
  mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
  mercurial.util.checkwinfilename mercurial.util.checkosfilename'''
 
+# These functions are required to be called with local encoded string
+# because they expects argument is local encoded string and cause
+# problem with unicode string.
+rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower'''
+
 # List of Windows specific functions to be wrapped.
 winfuncs = '''os.path.splitunc'''
 
@@ -142,6 +166,7 @@
  sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
  shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
 
+
 def extsetup(ui):
     # TODO: decide use of config section for this extension
     if ((not os.path.supports_unicode_filenames) and
@@ -159,6 +184,9 @@
             for f in winfuncs.split():
                 wrapname(f, wrapper)
         wrapname("mercurial.osutil.listdir", wrapperforlistdir)
+        # wrap functions to be called with local byte string arguments
+        for f in rfuncs.split():
+            wrapname(f, reversewrapper)
         # Check sys.args manually instead of using ui.debug() because
         # command line options is not yet applied when
         # extensions.loadall() is called.


More information about the Mercurial-devel mailing list