[PATCH] util: wrap lines with multi-byte characters correctly (issue2943)

Mads Kiilerich mads at kiilerich.com
Sat Aug 6 16:53:10 CDT 2011


# HG changeset patch
# User Mads Kiilerich <mads at kiilerich.com>
# Date 1312667540 -7200
# Branch stable
# Node ID 522ef2a25786c3666d4381d38944fe6d3aa64e5d
# Parent  f32a2989ff585f0f452f25806750477fc631fc9a
util: wrap lines with multi-byte characters correctly (issue2943)

This re-introduces the unicode conversion what was lost in d320e70442a5 5 years
ago and had the comment:
  To avoid corrupting multi-byte characters in line, we must wrap
  a Unicode string instead of a bytestring.

diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1148,16 +1148,14 @@
         def __init__(self, **kwargs):
             textwrap.TextWrapper.__init__(self, **kwargs)
 
-        def _cutdown(self, str, space_left):
+        def _cutdown(self, ucstr, space_left):
             l = 0
-            ucstr = unicode(str, encoding.encoding)
             colwidth = unicodedata.east_asian_width
             for i in xrange(len(ucstr)):
                 l += colwidth(ucstr[i]) in 'WFA' and 2 or 1
                 if space_left < l:
-                    return (ucstr[:i].encode(encoding.encoding),
-                            ucstr[i:].encode(encoding.encoding))
-            return str, ''
+                    return (ucstr[:i], ucstr[i:])
+            return ucstr, ''
 
         # overriding of base class
         def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
@@ -1179,10 +1177,13 @@
     if width <= maxindent:
         # adjust for weird terminal size
         width = max(78, maxindent + 1)
+    line = line.decode(encoding.encoding, encoding.encodingmode)
+    initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
+    hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
     wrapper = MBTextWrapper(width=width,
                             initial_indent=initindent,
                             subsequent_indent=hangindent)
-    return wrapper.fill(line)
+    return wrapper.fill(line).encode(encoding.encoding)
 
 def iterlines(iterator):
     for chunk in iterator:
diff --git a/tests/test-encoding-align.t b/tests/test-encoding-align.t
--- a/tests/test-encoding-align.t
+++ b/tests/test-encoding-align.t
@@ -22,14 +22,14 @@
   > cmdtable = {
   >     'showoptlist':
   >         (showoptlist,
-  >          [('s', 'opt1', '', 'short width',  '""" + s + """'),
-  >           ('m', 'opt2', '', 'middle width', '""" + m + """'),
-  >           ('l', 'opt3', '', 'long width',   '""" + l + """')
+  >          [('s', 'opt1', '', 'short width'  + ' %(s)s' * 8, '%(s)s'),
+  >           ('m', 'opt2', '', 'middle width' + ' %(m)s' * 8, '%(m)s'),
+  >           ('l', 'opt3', '', 'long width'   + ' %(l)s' * 8, '%(l)s')
   >          ],
   >          ""
   >         )
   > }
-  > """)
+  > """ % globals())
   > f.close()
   > EOF
   $ S=`cat s`
@@ -52,9 +52,11 @@
   
   options:
   
-   -s --opt1 \xe7\x9f\xad\xe5\x90\x8d          short width (esc)
-   -m --opt2 MIDDLE_       middle width
-   -l --opt3 \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d  long width (esc)
+   -s --opt1 \xe7\x9f\xad\xe5\x90\x8d          short width \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d (esc)
+   -m --opt2 MIDDLE_       middle width MIDDLE_ MIDDLE_ MIDDLE_ MIDDLE_ MIDDLE_
+                           MIDDLE_ MIDDLE_ MIDDLE_
+   -l --opt3 \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d  long width \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d (esc)
+                           \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d (esc)
   
   use "hg -v help showoptlist" to show global options
 


More information about the Mercurial-devel mailing list