[PATCH 1 of 1] replace Python standard textwrap by MBCS sensitive one for i18n text

FUJIWARA Katsunori fujiwara at ascade.co.jp
Sun May 16 06:15:27 CDT 2010


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1274008290 -32400
# Node ID b50622bb9e37a544d98c6f98821a29fe72b18537
# Parent  6e65b451b62e6b5bf57e5df34e56214c78e762d6
replace Python standard textwrap by MBCS sensitive one for i18n text

MBCS sensitive textwrap is originally implemented
by ITO Nobuaki <daydream.trippers at gmail.com>.

diff -r 6e65b451b62e -r b50622bb9e37 mercurial/encoding.py
--- a/mercurial/encoding.py	Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/encoding.py	Sun May 16 20:11:30 2010 +0900
@@ -67,11 +67,21 @@
     except LookupError, k:
         raise error.Abort("%s, please check your locale settings" % k)
 
+# east asian character width
+eacwidth = {
+    "W": 2, # Wide
+    "F": 2, # Full-width
+}
+
+try:
+    eacwidth["A"] = int(os.environ.get("HGUCACWIDTH"))
+except:
+    pass # ignore all error
+
 def colwidth(s):
     "Find the column width of a UTF-8 string for display"
     d = s.decode(encoding, 'replace')
     if hasattr(unicodedata, 'east_asian_width'):
         w = unicodedata.east_asian_width
-        return sum([w(c) in 'WF' and 2 or 1 for c in d])
+        return sum([eacwidth.get(w(c), 1) for c in d])
     return len(d)
-
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/help/environment.txt
--- a/mercurial/help/environment.txt	Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/help/environment.txt	Sun May 16 20:11:30 2010 +0900
@@ -24,6 +24,14 @@
     "ignore", which drops them. This setting can be overridden with
     the --encodingmode command-line option.
 
+HGUCACWIDTH
+    This sets column width of east asian ambiguous characters. This
+    influences line wrapping for help documents. The default is '1'.
+
+    This should be set as 2 for:
+
+    - Japanese
+
 HGMERGE
     An executable to use for resolving merge conflicts. The program
     will be executed with three arguments: local file, remote file,
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/i18n.py
--- a/mercurial/i18n.py	Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/i18n.py	Sun May 16 20:11:30 2010 +0900
@@ -6,7 +6,7 @@
 # GNU General Public License version 2 or any later version.
 
 import encoding
-import gettext, sys, os
+import gettext, sys, os, textwrap, unicodedata
 
 # modelled after templater.templatepath:
 if hasattr(sys, 'frozen'):
@@ -53,3 +53,37 @@
 else:
     _ = gettext
 
+#### naming convention of below implementation follows 'textwrap' module
+
+class MBTextWrapper(textwrap.TextWrapper):
+    def __init__(self, **kwargs):
+        textwrap.TextWrapper.__init__(self, **kwargs)
+
+    def _cutdown(self, str, space_left):
+        l = 0
+        ucstr = unicode(str, encoding.encoding)
+        w = unicodedata.east_asian_width
+        eacwidth = encoding.eacwidth
+        for i in xrange(len(ucstr)):
+            l += eacwidth.get(w(ucstr[i]), 1)
+            if space_left < l:
+                return (ucstr[:i].encode(encoding.encoding),
+                        ucstr[i:].encode(encoding.encoding))
+        return str, ''
+
+    # ----------------------------------------
+    # overriding of base class
+
+    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+        space_left = max(width - cur_len, 1)
+
+        if self.break_long_words:
+            cut, res = self._cutdown(reversed_chunks[-1], space_left)
+            cur_line.append(cut)
+            reversed_chunks[-1] = res
+        elif not cur_line:
+            cur_line.append(reversed_chunks.pop())
+
+# these are overwritable by custom wrapping/filling methods
+wraptext = lambda t, **kwa: MBTextWrapper(**kwa).wrap(t)
+filltext = lambda t, **kwa: '\n'.join(MBTextWrapper(**kwa).wrap(t))
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/minirst.py
--- a/mercurial/minirst.py	Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/minirst.py	Sun May 16 20:11:30 2010 +0900
@@ -35,8 +35,8 @@
 - inline literals (no other inline markup is not recognized)
 """
 
-import re, sys, textwrap
-
+import re, sys
+import i18n
 
 def findblocks(text):
     """Find continuous blocks of lines in text.
@@ -301,7 +301,7 @@
         hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
         defindent = indent + hang * ' '
         text = ' '.join(map(str.strip, block['lines'][1:]))
-        return "%s\n%s" % (term, textwrap.fill(text, width=width,
+        return "%s\n%s" % (term, i18n.filltext(text, width=width,
                                                initial_indent=defindent,
                                                subsequent_indent=defindent))
     subindent = indent
@@ -335,7 +335,7 @@
         subindent = indent + (len(option) + len(arg)) * ' '
 
     text = ' '.join(map(str.strip, block['lines']))
-    return textwrap.fill(text, width=width,
+    return i18n.filltext(text, width=width,
                          initial_indent=indent,
                          subsequent_indent=subindent)
 
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/templatefilters.py
--- a/mercurial/templatefilters.py	Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/templatefilters.py	Sun May 16 20:11:30 2010 +0900
@@ -5,8 +5,8 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import cgi, re, os, time, urllib, textwrap
-import util, encoding
+import cgi, re, os, time, urllib
+import util, encoding, i18n
 
 def stringify(thing):
     '''turn nested template iterator into string.'''
@@ -69,7 +69,7 @@
             yield text[start:m.start(0)], m.group(1)
             start = m.end(1)
 
-    return "".join([space_re.sub(' ', textwrap.fill(para, width)) + rest
+    return "".join([space_re.sub(' ', i18n.filltext(para, width=width)) + rest
                     for para, rest in findparas()])
 
 def firstline(text):
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/util.py
--- a/mercurial/util.py	Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/util.py	Sun May 16 20:11:30 2010 +0900
@@ -14,9 +14,9 @@
 """
 
 from i18n import _
-import error, osutil, encoding
+import error, osutil, encoding, i18n
 import cStringIO, errno, re, shutil, sys, tempfile, traceback
-import os, stat, time, calendar, textwrap, signal
+import os, stat, time, calendar, signal
 import imp
 
 # Python compatibility
@@ -1259,14 +1259,7 @@
         # adjust for weird terminal size
         width = max(78, hangindent + 1)
     padding = '\n' + ' ' * hangindent
-    # To avoid corrupting multi-byte characters in line, we must wrap
-    # a Unicode string instead of a bytestring.
-    try:
-        u = line.decode(encoding.encoding)
-        w = padding.join(textwrap.wrap(u, width=width - hangindent))
-        return w.encode(encoding.encoding)
-    except UnicodeDecodeError:
-        return padding.join(textwrap.wrap(line, width=width - hangindent))
+    return padding.join(i18n.wraptext(line, width=width - hangindent))
 
 def iterlines(iterator):
     for chunk in iterator:


More information about the Mercurial-devel mailing list