[PATCH 1 of 1] replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori
fujiwara at ascade.co.jp
Sun May 16 06:15:27 CDT 2010
# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1274008290 -32400
# Node ID b50622bb9e37a544d98c6f98821a29fe72b18537
# Parent 6e65b451b62e6b5bf57e5df34e56214c78e762d6
replace Python standard textwrap by MBCS sensitive one for i18n text
MBCS sensitive textwrap is originally implemented
by ITO Nobuaki <daydream.trippers at gmail.com>.
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/encoding.py
--- a/mercurial/encoding.py Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/encoding.py Sun May 16 20:11:30 2010 +0900
@@ -67,11 +67,21 @@
except LookupError, k:
raise error.Abort("%s, please check your locale settings" % k)
+# east asian character width
+eacwidth = {
+ "W": 2, # Wide
+ "F": 2, # Full-width
+}
+
+try:
+ eacwidth["A"] = int(os.environ.get("HGUCACWIDTH"))
+except:
+ pass # ignore all error
+
def colwidth(s):
"Find the column width of a UTF-8 string for display"
d = s.decode(encoding, 'replace')
if hasattr(unicodedata, 'east_asian_width'):
w = unicodedata.east_asian_width
- return sum([w(c) in 'WF' and 2 or 1 for c in d])
+ return sum([eacwidth.get(w(c), 1) for c in d])
return len(d)
-
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/help/environment.txt
--- a/mercurial/help/environment.txt Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/help/environment.txt Sun May 16 20:11:30 2010 +0900
@@ -24,6 +24,14 @@
"ignore", which drops them. This setting can be overridden with
the --encodingmode command-line option.
+HGUCACWIDTH
+ This sets column width of east asian ambiguous characters. This
+ influences line wrapping for help documents. The default is '1'.
+
+ This should be set as 2 for:
+
+ - Japanese
+
HGMERGE
An executable to use for resolving merge conflicts. The program
will be executed with three arguments: local file, remote file,
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/i18n.py
--- a/mercurial/i18n.py Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/i18n.py Sun May 16 20:11:30 2010 +0900
@@ -6,7 +6,7 @@
# GNU General Public License version 2 or any later version.
import encoding
-import gettext, sys, os
+import gettext, sys, os, textwrap, unicodedata
# modelled after templater.templatepath:
if hasattr(sys, 'frozen'):
@@ -53,3 +53,37 @@
else:
_ = gettext
+#### naming convention of below implementation follows 'textwrap' module
+
+class MBTextWrapper(textwrap.TextWrapper):
+ def __init__(self, **kwargs):
+ textwrap.TextWrapper.__init__(self, **kwargs)
+
+ def _cutdown(self, str, space_left):
+ l = 0
+ ucstr = unicode(str, encoding.encoding)
+ w = unicodedata.east_asian_width
+ eacwidth = encoding.eacwidth
+ for i in xrange(len(ucstr)):
+ l += eacwidth.get(w(ucstr[i]), 1)
+ if space_left < l:
+ return (ucstr[:i].encode(encoding.encoding),
+ ucstr[i:].encode(encoding.encoding))
+ return str, ''
+
+ # ----------------------------------------
+ # overriding of base class
+
+ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
+ space_left = max(width - cur_len, 1)
+
+ if self.break_long_words:
+ cut, res = self._cutdown(reversed_chunks[-1], space_left)
+ cur_line.append(cut)
+ reversed_chunks[-1] = res
+ elif not cur_line:
+ cur_line.append(reversed_chunks.pop())
+
+# these are overwritable by custom wrapping/filling methods
+wraptext = lambda t, **kwa: MBTextWrapper(**kwa).wrap(t)
+filltext = lambda t, **kwa: '\n'.join(MBTextWrapper(**kwa).wrap(t))
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/minirst.py
--- a/mercurial/minirst.py Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/minirst.py Sun May 16 20:11:30 2010 +0900
@@ -35,8 +35,8 @@
- inline literals (no other inline markup is not recognized)
"""
-import re, sys, textwrap
-
+import re, sys
+import i18n
def findblocks(text):
"""Find continuous blocks of lines in text.
@@ -301,7 +301,7 @@
hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
defindent = indent + hang * ' '
text = ' '.join(map(str.strip, block['lines'][1:]))
- return "%s\n%s" % (term, textwrap.fill(text, width=width,
+ return "%s\n%s" % (term, i18n.filltext(text, width=width,
initial_indent=defindent,
subsequent_indent=defindent))
subindent = indent
@@ -335,7 +335,7 @@
subindent = indent + (len(option) + len(arg)) * ' '
text = ' '.join(map(str.strip, block['lines']))
- return textwrap.fill(text, width=width,
+ return i18n.filltext(text, width=width,
initial_indent=indent,
subsequent_indent=subindent)
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/templatefilters.py
--- a/mercurial/templatefilters.py Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/templatefilters.py Sun May 16 20:11:30 2010 +0900
@@ -5,8 +5,8 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
-import cgi, re, os, time, urllib, textwrap
-import util, encoding
+import cgi, re, os, time, urllib
+import util, encoding, i18n
def stringify(thing):
'''turn nested template iterator into string.'''
@@ -69,7 +69,7 @@
yield text[start:m.start(0)], m.group(1)
start = m.end(1)
- return "".join([space_re.sub(' ', textwrap.fill(para, width)) + rest
+ return "".join([space_re.sub(' ', i18n.filltext(para, width=width)) + rest
for para, rest in findparas()])
def firstline(text):
diff -r 6e65b451b62e -r b50622bb9e37 mercurial/util.py
--- a/mercurial/util.py Sat May 15 21:24:23 2010 -0500
+++ b/mercurial/util.py Sun May 16 20:11:30 2010 +0900
@@ -14,9 +14,9 @@
"""
from i18n import _
-import error, osutil, encoding
+import error, osutil, encoding, i18n
import cStringIO, errno, re, shutil, sys, tempfile, traceback
-import os, stat, time, calendar, textwrap, signal
+import os, stat, time, calendar, signal
import imp
# Python compatibility
@@ -1259,14 +1259,7 @@
# adjust for weird terminal size
width = max(78, hangindent + 1)
padding = '\n' + ' ' * hangindent
- # To avoid corrupting multi-byte characters in line, we must wrap
- # a Unicode string instead of a bytestring.
- try:
- u = line.decode(encoding.encoding)
- w = padding.join(textwrap.wrap(u, width=width - hangindent))
- return w.encode(encoding.encoding)
- except UnicodeDecodeError:
- return padding.join(textwrap.wrap(line, width=width - hangindent))
+ return padding.join(i18n.wraptext(line, width=width - hangindent))
def iterlines(iterator):
for chunk in iterator:
More information about the Mercurial-devel
mailing list