[PATCH 2 of 4 RFC] i18n: add hook point to convert MBCS strings before backslash sensitive process

FUJIWARA Katsunori foozy at lares.dti.ne.jp
Thu May 24 12:04:26 CDT 2012


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1337873233 -32400
# Branch stable
# Node ID a85e6240d0ab23191d158390095dd48852dcdc39
# Parent  5b34156d5fec045557bef52ccd661d680090daf1
i18n: add hook point to convert MBCS strings before backslash sensitive process

added hook poit is "filter()" in "mercurial/encoding.py".

when win32mbcs is enabled, "filter()" is replaced with specific
implementation to do below:

  - for byte sequences:
    1. convert to unicode
    2. apply filter function on unicode object
    3. convert from unicode to byte sequence in local encoding
       (with substitution MBCS parts in the string by oneself in
       '\xXX' form for regexp safeness)
    4. return above byte sequence

  - for unicode objects:
    1. apply filter function on unicode object
    2. convert from unicode to byte sequence in local encoding
       (with substitution MBCS parts in the string by oneself in
       '\xXX' form for regexp safeness)
    3. convert byte sequence to unicode
    4. return above unicode

diff -r 5b34156d5fec -r a85e6240d0ab hgext/win32mbcs.py
--- a/hgext/win32mbcs.py	Fri May 25 00:20:45 2012 +0900
+++ b/hgext/win32mbcs.py	Fri May 25 00:27:13 2012 +0900
@@ -148,6 +148,22 @@
         raise util.Abort(_("[win32mbcs] conversion in escaping failed with"
                          " %s encoding\n") % (_encoding))
 
+def safefilter(s, filter, escape=True):
+    try:
+        if isinstance(s, unicode):
+            if escape:
+                return decode(escapeencode(filter(s), None))
+            else:
+                return filter(s)
+        else:
+            if escape:
+                return escapeencode(filter(decode(s)), None)
+            else:
+                return encode(filter(decode(s)))
+    except UnicodeError:
+        raise util.Abort(_("[win32mbcs] conversion in filtering failed with"
+                         " %s encoding\n") % (_encoding))
+
 def replacename(name, replacement):
     module, name = name.rsplit('.', 1)
     module = sys.modules[module]
@@ -194,6 +210,7 @@
                 wrapname(f, wrapper)
         wrapname("mercurial.osutil.listdir", wrapperforlistdir)
         replacename("mercurial.encoding.escape", safeescape)
+        replacename("mercurial.encoding.filter", safefilter)
         # Check sys.args manually instead of using ui.debug() because
         # command line options is not yet applied when
         # extensions.loadall() is called.
diff -r 5b34156d5fec -r a85e6240d0ab mercurial/encoding.py
--- a/mercurial/encoding.py	Fri May 25 00:20:45 2012 +0900
+++ b/mercurial/encoding.py	Fri May 25 00:27:13 2012 +0900
@@ -214,6 +214,15 @@
     else:
         return s
 
+def filter(s, filter, escape=True):
+    """Hook point to apply FILTER func on string S safely,
+    even if current encoding is problematic one.
+
+    MBCS parts of result should be escaped as regexp safe, if ESCAPE is True
+    in problematic encoding.
+    """
+    return filter(s)
+
 def toutf8b(s):
     '''convert a local, possibly-binary string into UTF-8b
 
diff -r 5b34156d5fec -r a85e6240d0ab mercurial/match.py
--- a/mercurial/match.py	Fri May 25 00:20:45 2012 +0900
+++ b/mercurial/match.py	Fri May 25 00:27:13 2012 +0900
@@ -249,14 +249,14 @@
     elif kind == 'path':
         return '^' + encoding.escape(name, re.escape) + '(?:/|$)'
     elif kind == 'relglob':
-        return '(?:|.*/)' + _globre(name) + tail
+        return '(?:|.*/)' + encoding.filter(name, _globre) + tail
     elif kind == 'relpath':
         return encoding.escape(name, re.escape) + '(?:/|$)'
     elif kind == 'relre':
         if name.startswith('^'):
             return encoding.escape(name)
         return '.*' + encoding.escape(name)
-    return _globre(name) + tail
+    return encoding.filter(name, _globre) + tail
 
 def _buildmatch(ctx, pats, tail):
     fset, pats = _expandsets(pats, ctx)


More information about the Mercurial-devel mailing list