[PATCH] util: wrap lines with multi-byte characters correctly (issue2943)
Mads Kiilerich
mads at kiilerich.com
Sat Aug 6 16:53:10 CDT 2011
# HG changeset patch
# User Mads Kiilerich <mads at kiilerich.com>
# Date 1312667540 -7200
# Branch stable
# Node ID 522ef2a25786c3666d4381d38944fe6d3aa64e5d
# Parent f32a2989ff585f0f452f25806750477fc631fc9a
util: wrap lines with multi-byte characters correctly (issue2943)
This re-introduces the unicode conversion what was lost in d320e70442a5 5 years
ago and had the comment:
To avoid corrupting multi-byte characters in line, we must wrap
a Unicode string instead of a bytestring.
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1148,16 +1148,14 @@
def __init__(self, **kwargs):
textwrap.TextWrapper.__init__(self, **kwargs)
- def _cutdown(self, str, space_left):
+ def _cutdown(self, ucstr, space_left):
l = 0
- ucstr = unicode(str, encoding.encoding)
colwidth = unicodedata.east_asian_width
for i in xrange(len(ucstr)):
l += colwidth(ucstr[i]) in 'WFA' and 2 or 1
if space_left < l:
- return (ucstr[:i].encode(encoding.encoding),
- ucstr[i:].encode(encoding.encoding))
- return str, ''
+ return (ucstr[:i], ucstr[i:])
+ return ucstr, ''
# overriding of base class
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
@@ -1179,10 +1177,13 @@
if width <= maxindent:
# adjust for weird terminal size
width = max(78, maxindent + 1)
+ line = line.decode(encoding.encoding, encoding.encodingmode)
+ initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
+ hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
wrapper = MBTextWrapper(width=width,
initial_indent=initindent,
subsequent_indent=hangindent)
- return wrapper.fill(line)
+ return wrapper.fill(line).encode(encoding.encoding)
def iterlines(iterator):
for chunk in iterator:
diff --git a/tests/test-encoding-align.t b/tests/test-encoding-align.t
--- a/tests/test-encoding-align.t
+++ b/tests/test-encoding-align.t
@@ -22,14 +22,14 @@
> cmdtable = {
> 'showoptlist':
> (showoptlist,
- > [('s', 'opt1', '', 'short width', '""" + s + """'),
- > ('m', 'opt2', '', 'middle width', '""" + m + """'),
- > ('l', 'opt3', '', 'long width', '""" + l + """')
+ > [('s', 'opt1', '', 'short width' + ' %(s)s' * 8, '%(s)s'),
+ > ('m', 'opt2', '', 'middle width' + ' %(m)s' * 8, '%(m)s'),
+ > ('l', 'opt3', '', 'long width' + ' %(l)s' * 8, '%(l)s')
> ],
> ""
> )
> }
- > """)
+ > """ % globals())
> f.close()
> EOF
$ S=`cat s`
@@ -52,9 +52,11 @@
options:
- -s --opt1 \xe7\x9f\xad\xe5\x90\x8d short width (esc)
- -m --opt2 MIDDLE_ middle width
- -l --opt3 \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d long width (esc)
+ -s --opt1 \xe7\x9f\xad\xe5\x90\x8d short width \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d (esc)
+ -m --opt2 MIDDLE_ middle width MIDDLE_ MIDDLE_ MIDDLE_ MIDDLE_ MIDDLE_
+ MIDDLE_ MIDDLE_ MIDDLE_
+ -l --opt3 \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d long width \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d (esc)
+ \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d (esc)
use "hg -v help showoptlist" to show global options
More information about the Mercurial-devel
mailing list