[PATCH] i18n: use encoding.colwidth() instead of len() for correct column width

FUJIWARA Katsunori fujiwara at ascade.co.jp
Sat Jul 17 11:09:34 CDT 2010


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1279382810 -32400
# Branch stable
# Node ID d342305f7635e607eef62fbfc9dc6f0e13be34d4
# Parent  76454cbc11e48d692262843d0c87e81bbb2c053c
i18n: use encoding.colwidth() instead of len() for correct column width

Some encoding and language combinations (e.g.: UTF-8 and Japanese)
cause encoding characters into sequence of bytes more than column
width of them.

So, encoding.colwidth() should be applied instread of len() on i18n
strings.

In addition to it, formatting by '%*s'/'%-*s' also uses "number of
bytes" to calculate space padding size, and should be fixed, too.

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -152,8 +152,9 @@
         for f in funcmap:
             l = [f(n) for n, dummy in lines]
             if l:
-                ml = max(map(len, l))
-                pieces.append(["%*s" % (ml, x) for x in l])
+                sized = [(x, encoding.colwidth(x)) for x in l]
+                ml = max([w for x, w in sized])
+                pieces.append(["%s%s" % (' ' * (ml - w), x) for x, w in sized])
 
         if pieces:
             for p, l in zip(zip(*pieces), lines):
@@ -2053,9 +2054,9 @@
     if multioccur:
         msg = _("\n[+] marked option can be specified multiple times")
         if ui.verbose and name != 'shortlist':
-            opt_output.append((msg, ()))
+            opt_output.append((msg, None))
         else:
-            opt_output.insert(-1, (msg, ()))
+            opt_output.insert(-1, (msg, None))
 
     if not name:
         ui.write(_("\nadditional help topics:\n\n"))
@@ -2067,16 +2068,20 @@
             ui.write(" %-*s  %s\n" % (topics_len, t, desc))
 
     if opt_output:
-        opts_len = max([len(line[0]) for line in opt_output if line[1]] or [0])
-        for first, second in opt_output:
-            if second:
-                initindent = ' %-*s  ' % (opts_len, first)
-                hangindent = ' ' * (opts_len + 3)
-                ui.write('%s\n' % (util.wrap(second,
+        colwidth = encoding.colwidth
+        # normalize: (opt or message, desc or None, width of opt)
+        entries = [desc and (opt, desc, colwidth(opt)) or (opt, None, 0)
+                   for opt, desc in opt_output]
+        hanging = max([e[2] for e in entries])
+        for opt, desc, width in entries:
+            if desc:
+                initindent = ' %s%s  ' % (opt, ' ' * (hanging - width))
+                hangindent = ' ' * (hanging + 3)
+                ui.write('%s\n' % (util.wrap(desc,
                                              initindent=initindent,
                                              hangindent=hangindent)))
             else:
-                ui.write("%s\n" % first)
+                ui.write("%s\n" % opt)
 
 def identify(ui, repo, source=None,
              rev=None, num=None, id=None, branch=None, tags=None):
diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -11,7 +11,7 @@
 
 from i18n import _
 from node import hex, nullid, short
-import base85, cmdutil, mdiff, util, diffhelpers, copies
+import base85, cmdutil, mdiff, util, diffhelpers, copies, encoding
 
 gitre = re.compile('diff --git a/(.*) b/(.*)')
 
@@ -1644,10 +1644,14 @@
     maxtotal, maxname = 0, 0
     totaladds, totalremoves = 0, 0
     hasbinary = False
-    for filename, adds, removes, isbinary in stats:
+
+    sized = [(filename, adds, removes, isbinary, encoding.colwidth(filename))
+             for filename, adds, removes, isbinary in stats]
+
+    for filename, adds, removes, isbinary, namewidth in sized:
         totaladds += adds
         totalremoves += removes
-        maxname = max(maxname, len(filename))
+        maxname = max(maxname, namewidth)
         maxtotal = max(maxtotal, adds + removes)
         if isbinary:
             hasbinary = True
@@ -1667,15 +1671,17 @@
         # if there were at least some changes.
         return max(i * graphwidth // maxtotal, int(bool(i)))
 
-    for filename, adds, removes, isbinary in stats:
+    for filename, adds, removes, isbinary, namewidth in sized:
         if git and isbinary:
             count = 'Bin'
         else:
             count = adds + removes
         pluses = '+' * scale(adds)
         minuses = '-' * scale(removes)
-        output.append(' %-*s |  %*s %s%s\n' % (maxname, filename, countwidth,
-                                               count, pluses, minuses))
+        output.append(' %s%s |  %*s %s%s\n' %
+                      (filename, ' ' * (maxname - namewidth),
+                       countwidth, count,
+                       pluses, minuses))
 
     if stats:
         output.append(_(' %d files changed, %d insertions(+), %d deletions(-)\n')
diff --git a/tests/test-encoding-align b/tests/test-encoding-align
new file mode 100755
--- /dev/null
+++ b/tests/test-encoding-align
@@ -0,0 +1,134 @@
+#!/bin/sh
+
+########################################
+
+hg init t
+cd t
+
+python << EOF
+# (byte, width) = (6, 4)
+s = "\xe7\x9f\xad\xe5\x90\x8d"
+# (byte, width) = (7, 7): odd width is good for alignment test
+m = "MIDDLE_"
+# (byte, width) = (18, 12)
+l = "\xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d"
+
+f = file('s', 'w'); f.write(s); f.close()
+f = file('m', 'w'); f.write(m); f.close()
+f = file('l', 'w'); f.write(l); f.close()
+
+# instant extension to show list of options
+f = file('showoptlist.py', 'w'); f.write("""# encoding: utf-8
+def showoptlist(ui, repo, *pats, **opts):
+    '''dummy command to show option descriptions'''
+    return 0
+
+cmdtable = {
+    'showoptlist':
+        (showoptlist,
+         [('s', 'opt1', '', 'short width',  '""" + s + """'),
+          ('m', 'opt2', '', 'middle width', '""" + m + """'),
+          ('l', 'opt3', '', 'long width',   '""" + l + """')
+         ],
+         ""
+        )
+}
+""")
+f.close()
+EOF
+
+########################################
+#### alignment of:
+####     - option descriptions in help
+
+cat <<EOF > .hg/hgrc
+[extensions]
+ja_ext = ${PWD}/showoptlist.py
+EOF
+echo '% check alignment of option descriptions in help'
+HGENCODING=utf-8 hg help showoptlist
+
+########################################
+#### alignment of:
+####     - user names in annotate
+####     - file names in diffstat
+
+#### add files
+
+touch `cat s`
+HGENCODING=utf-8 hg add `cat s`
+touch `cat m`
+HGENCODING=utf-8 hg add `cat m`
+touch `cat l`
+HGENCODING=utf-8 hg add `cat l`
+
+#### commit(1)
+
+cat <<EOF >> `cat s`
+first line(1)
+EOF
+cat <<EOF >> `cat m`
+first line(2)
+EOF
+cat <<EOF >> `cat l`
+first line(3)
+EOF
+HGENCODING=utf-8 hg commit -m 'first commit' -u `cat s` -d "1000000 0"
+
+#### commit(2)
+
+cat <<EOF >> `cat s`
+second line(1)
+EOF
+cat <<EOF >> `cat m`
+second line(2)
+EOF
+cat <<EOF >> `cat l`
+second line(3)
+EOF
+HGENCODING=utf-8 hg commit -m 'second commit' -u `cat m` -d "1000000 0"
+
+#### commit(3)
+
+cat <<EOF >> `cat s`
+third line(1)
+EOF
+cat <<EOF >> `cat m`
+third line(2)
+EOF
+cat <<EOF >> `cat l`
+third line(3)
+EOF
+HGENCODING=utf-8 hg commit -m 'third commit' -u `cat l` -d "1000000 0"
+
+#### check
+
+echo '% check alignment of user names in annotate'
+HGENCODING=utf-8 hg annotate -u `cat m`
+echo '% check alignment of filenames in diffstat'
+HGENCODING=utf-8 hg diff -c tip --stat
+
+########################################
+#### alignment of:
+####     - branch names in list
+####     - tag names in list
+
+#### add branches/tags
+
+HGENCODING=utf-8 hg branch `cat s`
+HGENCODING=utf-8 hg tag -d "1000000 0" `cat s`
+HGENCODING=utf-8 hg branch `cat m`
+HGENCODING=utf-8 hg tag -d "1000000 0" `cat m`
+HGENCODING=utf-8 hg branch `cat l`
+HGENCODING=utf-8 hg tag -d "1000000 0" `cat l`
+
+#### check
+
+echo '% check alignment of branches'
+HGENCODING=utf-8 hg tags
+echo '% check alignment of tags'
+HGENCODING=utf-8 hg tags
+
+########################################
+
+exit 0
diff --git a/tests/test-encoding-align.out b/tests/test-encoding-align.out
new file mode 100644
--- /dev/null
+++ b/tests/test-encoding-align.out
@@ -0,0 +1,34 @@
+% check alignment of option descriptions in help
+hg showoptlist 
+
+dummy command to show option descriptions
+
+options:
+
+ -s --opt1 短名          short width
+ -m --opt2 MIDDLE_       middle width
+ -l --opt3 長い長い名前  long width
+
+use "hg -v help showoptlist" to show global options
+% check alignment of user names in annotate
+        短名: first line(2)
+     MIDDLE_: second line(2)
+長い長い名前: third line(2)
+% check alignment of filenames in diffstat
+ MIDDLE_      |  1 +
+ 短名         |  1 +
+ 長い長い名前 |  1 +
+ 3 files changed, 3 insertions(+), 0 deletions(-)
+marked working directory as branch 短名
+marked working directory as branch MIDDLE_
+marked working directory as branch 長い長い名前
+% check alignment of branches
+tip                                5:afc60d8eed19
+長い長い名前                       4:19fe74d09ba0
+MIDDLE_                            3:8a20997d2281
+短名                               2:0cc06ffa3461
+% check alignment of tags
+tip                                5:afc60d8eed19
+長い長い名前                       4:19fe74d09ba0
+MIDDLE_                            3:8a20997d2281
+短名                               2:0cc06ffa3461


More information about the Mercurial-devel mailing list