[PATCH 1 of 1] minirst: use unicode string as intermediate form for replacement
FUJIWARA Katsunori
foozy at lares.dti.ne.jp
Mon Oct 31 07:07:41 CDT 2011
# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1320062778 -32400
# Branch stable
# Node ID b5d104caf385c77c57413af34ef62fdf8b7ac15e
# Parent 84980b00fbcbbb735e4112751d9e162a86319ebc
minirst: use unicode string as intermediate form for replacement
# this change redones part of 521c8e0c93bf, backed out by 0ad0ebe67815
Some character encodings use ASCII characters other than
control/alphabet/digit as a part of multi-bytes characters, so direct
replacing with such characters on strings in local encoding causes
invalid byte sequences.
diff -r 84980b00fbcbbb735e4112751d9e162a86319ebc -r b5d104caf385c77c57413af34ef62fdf8b7ac15e mercurial/minirst.py
--- a/mercurial/minirst.py Mon Oct 31 20:58:49 2011 +0900
+++ b/mercurial/minirst.py Mon Oct 31 21:06:18 2011 +0900
@@ -23,9 +23,14 @@
from i18n import _
def replace(text, substs):
+ # some character encodings (cp932 for Japanese, at least) use
+ # ASCII characters other than control/alphabet/digit as a part of
+ # multi-bytes characters, so direct replacing with such characters
+ # on strings in local encoding causes invalid byte sequences.
+ utext = text.decode(encoding.encoding)
for f, t in substs:
- text = text.replace(f, t)
- return text
+ utext = utext.replace(f, t)
+ return utext.encode(encoding.encoding)
_blockre = re.compile(r"\n(?:\s*\n)+")
diff -r 84980b00fbcbbb735e4112751d9e162a86319ebc -r b5d104caf385c77c57413af34ef62fdf8b7ac15e tests/test-help-i18n.t
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-help-i18n.t Mon Oct 31 21:06:18 2011 +0900
@@ -0,0 +1,38 @@
+test help formatting for i18n text
+
+ $ mkdir t
+ $ cd t
+
+define commands to display help text
+
+ $ cat << EOF > help.py
+ > # help text with ambiguous characters ('`' for minirst, for example)
+ > # in CP932 (Japanese Shift-JIS)
+ > def show_ambig_chars(ui, **opts):
+ > u'''\u30a1\u30a2\u30a3\u30a4\u30a5\u30a6\u30a7\u30a8
+ >
+ > \u30a9\u30aa\u30ab\u30ac\u30ad\u30ae\u30af\u30b0
+ > \u30b1\u30b2\u30b3\u30b4\u30b5\u30b6\u30b7\u30b8
+ > \u30b9\u30ba\u30bb\u30bc\u30bd\u30be\u30bf\u30c0
+ > \u30c1\u30c2\u30c3\u30c4\u30c5\u30c6\u30c7\u30c8
+ > \u30c9\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d0
+ > \u30d1\u30d2\u30d3\u30d4\u30d5\u30d6\u30d7\u30d8
+ > \u30d9\u30da\u30db\u30dc\u30dd\u30de\u30df
+ > '''
+ >
+ > cmdtable = {
+ > 'show_ambig_chars': (show_ambig_chars, [], ""),
+ > }
+ > EOF
+
+test help formatting
+
+ $ hg --encoding cp932 --config extensions.show=./help.py help show_ambig_chars
+ hg show_ambig_chars
+
+ \x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G (esc)
+
+ \x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O \x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W \x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_ \x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g (esc)
+ \x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o \x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w \x83x\x83y\x83z\x83{\x83|\x83}\x83~ (esc)
+
+ use "hg -v help show_ambig_chars" to show more info
More information about the Mercurial-devel
mailing list