[PATCH] i18n: use encoding.colwidth() instead of len() for correct column width
Nicolas Dumazet
nicdumz at gmail.com
Sat Jul 17 20:29:26 CDT 2010
Hello Katsunori!
On Sun, 18 Jul 2010 01:09:34 +0900
FUJIWARA Katsunori <fujiwara at ascade.co.jp> wrote:
> # HG changeset patch
> # User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
> # Date 1279382810 -32400
> # Branch stable
> # Node ID d342305f7635e607eef62fbfc9dc6f0e13be34d4
> # Parent 76454cbc11e48d692262843d0c87e81bbb2c053c
> i18n: use encoding.colwidth() instead of len() for correct column width
This looks like a well-rounded patch, thank you!
> Some encoding and language combinations (e.g.: UTF-8 and Japanese)
> cause encoding characters into sequence of bytes more than column
> width of them.
>
> So, encoding.colwidth() should be applied instread of len() on i18n
> strings.
>
> In addition to it, formatting by '%*s'/'%-*s' also uses "number of
> bytes" to calculate space padding size, and should be fixed, too.
And I actually like the last bit, because ' ' * spacewidth seems more
readable to me than '%*s'.
I looked at the patch, and have no complaints. The test actually checks
a lot of places where we assume that len(s) would just work, and not
only one.
Another pair of eyes maybe?
-Nicolas.
>
> diff --git a/mercurial/commands.py b/mercurial/commands.py
> --- a/mercurial/commands.py
> +++ b/mercurial/commands.py
> @@ -152,8 +152,9 @@
> for f in funcmap:
> l = [f(n) for n, dummy in lines]
> if l:
> - ml = max(map(len, l))
> - pieces.append(["%*s" % (ml, x) for x in l])
> + sized = [(x, encoding.colwidth(x)) for x in l]
> + ml = max([w for x, w in sized])
> + pieces.append(["%s%s" % (' ' * (ml - w), x) for x, w in sized])
>
> if pieces:
> for p, l in zip(zip(*pieces), lines):
> @@ -2053,9 +2054,9 @@
> if multioccur:
> msg = _("\n[+] marked option can be specified multiple times")
> if ui.verbose and name != 'shortlist':
> - opt_output.append((msg, ()))
> + opt_output.append((msg, None))
> else:
> - opt_output.insert(-1, (msg, ()))
> + opt_output.insert(-1, (msg, None))
>
> if not name:
> ui.write(_("\nadditional help topics:\n\n"))
> @@ -2067,16 +2068,20 @@
> ui.write(" %-*s %s\n" % (topics_len, t, desc))
>
> if opt_output:
> - opts_len = max([len(line[0]) for line in opt_output if line[1]] or [0])
> - for first, second in opt_output:
> - if second:
> - initindent = ' %-*s ' % (opts_len, first)
> - hangindent = ' ' * (opts_len + 3)
> - ui.write('%s\n' % (util.wrap(second,
> + colwidth = encoding.colwidth
> + # normalize: (opt or message, desc or None, width of opt)
> + entries = [desc and (opt, desc, colwidth(opt)) or (opt, None, 0)
> + for opt, desc in opt_output]
> + hanging = max([e[2] for e in entries])
> + for opt, desc, width in entries:
> + if desc:
> + initindent = ' %s%s ' % (opt, ' ' * (hanging - width))
> + hangindent = ' ' * (hanging + 3)
> + ui.write('%s\n' % (util.wrap(desc,
> initindent=initindent,
> hangindent=hangindent)))
> else:
> - ui.write("%s\n" % first)
> + ui.write("%s\n" % opt)
>
> def identify(ui, repo, source=None,
> rev=None, num=None, id=None, branch=None, tags=None):
> diff --git a/mercurial/patch.py b/mercurial/patch.py
> --- a/mercurial/patch.py
> +++ b/mercurial/patch.py
> @@ -11,7 +11,7 @@
>
> from i18n import _
> from node import hex, nullid, short
> -import base85, cmdutil, mdiff, util, diffhelpers, copies
> +import base85, cmdutil, mdiff, util, diffhelpers, copies, encoding
>
> gitre = re.compile('diff --git a/(.*) b/(.*)')
>
> @@ -1644,10 +1644,14 @@
> maxtotal, maxname = 0, 0
> totaladds, totalremoves = 0, 0
> hasbinary = False
> - for filename, adds, removes, isbinary in stats:
> +
> + sized = [(filename, adds, removes, isbinary, encoding.colwidth(filename))
> + for filename, adds, removes, isbinary in stats]
> +
> + for filename, adds, removes, isbinary, namewidth in sized:
> totaladds += adds
> totalremoves += removes
> - maxname = max(maxname, len(filename))
> + maxname = max(maxname, namewidth)
> maxtotal = max(maxtotal, adds + removes)
> if isbinary:
> hasbinary = True
> @@ -1667,15 +1671,17 @@
> # if there were at least some changes.
> return max(i * graphwidth // maxtotal, int(bool(i)))
>
> - for filename, adds, removes, isbinary in stats:
> + for filename, adds, removes, isbinary, namewidth in sized:
> if git and isbinary:
> count = 'Bin'
> else:
> count = adds + removes
> pluses = '+' * scale(adds)
> minuses = '-' * scale(removes)
> - output.append(' %-*s | %*s %s%s\n' % (maxname, filename, countwidth,
> - count, pluses, minuses))
> + output.append(' %s%s | %*s %s%s\n' %
> + (filename, ' ' * (maxname - namewidth),
> + countwidth, count,
> + pluses, minuses))
>
> if stats:
> output.append(_(' %d files changed, %d insertions(+), %d deletions(-)\n')
> diff --git a/tests/test-encoding-align b/tests/test-encoding-align
> new file mode 100755
> --- /dev/null
> +++ b/tests/test-encoding-align
> @@ -0,0 +1,134 @@
> +#!/bin/sh
> +
> +########################################
> +
> +hg init t
> +cd t
> +
> +python << EOF
> +# (byte, width) = (6, 4)
> +s = "\xe7\x9f\xad\xe5\x90\x8d"
> +# (byte, width) = (7, 7): odd width is good for alignment test
> +m = "MIDDLE_"
> +# (byte, width) = (18, 12)
> +l = "\xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d"
> +
> +f = file('s', 'w'); f.write(s); f.close()
> +f = file('m', 'w'); f.write(m); f.close()
> +f = file('l', 'w'); f.write(l); f.close()
> +
> +# instant extension to show list of options
> +f = file('showoptlist.py', 'w'); f.write("""# encoding: utf-8
> +def showoptlist(ui, repo, *pats, **opts):
> + '''dummy command to show option descriptions'''
> + return 0
> +
> +cmdtable = {
> + 'showoptlist':
> + (showoptlist,
> + [('s', 'opt1', '', 'short width', '""" + s + """'),
> + ('m', 'opt2', '', 'middle width', '""" + m + """'),
> + ('l', 'opt3', '', 'long width', '""" + l + """')
> + ],
> + ""
> + )
> +}
> +""")
> +f.close()
> +EOF
> +
> +########################################
> +#### alignment of:
> +#### - option descriptions in help
> +
> +cat <<EOF > .hg/hgrc
> +[extensions]
> +ja_ext = ${PWD}/showoptlist.py
> +EOF
> +echo '% check alignment of option descriptions in help'
> +HGENCODING=utf-8 hg help showoptlist
> +
> +########################################
> +#### alignment of:
> +#### - user names in annotate
> +#### - file names in diffstat
> +
> +#### add files
> +
> +touch `cat s`
> +HGENCODING=utf-8 hg add `cat s`
> +touch `cat m`
> +HGENCODING=utf-8 hg add `cat m`
> +touch `cat l`
> +HGENCODING=utf-8 hg add `cat l`
> +
> +#### commit(1)
> +
> +cat <<EOF >> `cat s`
> +first line(1)
> +EOF
> +cat <<EOF >> `cat m`
> +first line(2)
> +EOF
> +cat <<EOF >> `cat l`
> +first line(3)
> +EOF
> +HGENCODING=utf-8 hg commit -m 'first commit' -u `cat s` -d "1000000 0"
> +
> +#### commit(2)
> +
> +cat <<EOF >> `cat s`
> +second line(1)
> +EOF
> +cat <<EOF >> `cat m`
> +second line(2)
> +EOF
> +cat <<EOF >> `cat l`
> +second line(3)
> +EOF
> +HGENCODING=utf-8 hg commit -m 'second commit' -u `cat m` -d "1000000 0"
> +
> +#### commit(3)
> +
> +cat <<EOF >> `cat s`
> +third line(1)
> +EOF
> +cat <<EOF >> `cat m`
> +third line(2)
> +EOF
> +cat <<EOF >> `cat l`
> +third line(3)
> +EOF
> +HGENCODING=utf-8 hg commit -m 'third commit' -u `cat l` -d "1000000 0"
> +
> +#### check
> +
> +echo '% check alignment of user names in annotate'
> +HGENCODING=utf-8 hg annotate -u `cat m`
> +echo '% check alignment of filenames in diffstat'
> +HGENCODING=utf-8 hg diff -c tip --stat
> +
> +########################################
> +#### alignment of:
> +#### - branch names in list
> +#### - tag names in list
> +
> +#### add branches/tags
> +
> +HGENCODING=utf-8 hg branch `cat s`
> +HGENCODING=utf-8 hg tag -d "1000000 0" `cat s`
> +HGENCODING=utf-8 hg branch `cat m`
> +HGENCODING=utf-8 hg tag -d "1000000 0" `cat m`
> +HGENCODING=utf-8 hg branch `cat l`
> +HGENCODING=utf-8 hg tag -d "1000000 0" `cat l`
> +
> +#### check
> +
> +echo '% check alignment of branches'
> +HGENCODING=utf-8 hg tags
> +echo '% check alignment of tags'
> +HGENCODING=utf-8 hg tags
> +
> +########################################
> +
> +exit 0
> diff --git a/tests/test-encoding-align.out b/tests/test-encoding-align.out
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-encoding-align.out
> @@ -0,0 +1,34 @@
> +% check alignment of option descriptions in help
> +hg showoptlist
> +
> +dummy command to show option descriptions
> +
> +options:
> +
> + -s --opt1 短名 short width
> + -m --opt2 MIDDLE_ middle width
> + -l --opt3 長い長い名前 long width
> +
> +use "hg -v help showoptlist" to show global options
> +% check alignment of user names in annotate
> + 短名: first line(2)
> + MIDDLE_: second line(2)
> +長い長い名前: third line(2)
> +% check alignment of filenames in diffstat
> + MIDDLE_ | 1 +
> + 短名 | 1 +
> + 長い長い名前 | 1 +
> + 3 files changed, 3 insertions(+), 0 deletions(-)
> +marked working directory as branch 短名
> +marked working directory as branch MIDDLE_
> +marked working directory as branch 長い長い名前
> +% check alignment of branches
> +tip 5:afc60d8eed19
> +長い長い名前 4:19fe74d09ba0
> +MIDDLE_ 3:8a20997d2281
> +短名 2:0cc06ffa3461
> +% check alignment of tags
> +tip 5:afc60d8eed19
> +長い長い名前 4:19fe74d09ba0
> +MIDDLE_ 3:8a20997d2281
> +短名 2:0cc06ffa3461
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
--
Nicolas Dumazet — NicDumZ
More information about the Mercurial-devel
mailing list