[PATCH 1 of 1] minirst: use unicode string as intermediate form for replacement

FUJIWARA Katsunori foozy at lares.dti.ne.jp
Mon Oct 31 07:07:41 CDT 2011


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1320062778 -32400
# Branch stable
# Node ID b5d104caf385c77c57413af34ef62fdf8b7ac15e
# Parent  84980b00fbcbbb735e4112751d9e162a86319ebc
minirst: use unicode string as intermediate form for replacement

# this change redones part of 521c8e0c93bf, backed out by 0ad0ebe67815

Some character encodings use ASCII characters other than
control/alphabet/digit as a part of multi-bytes characters, so direct
replacing with such characters on strings in local encoding causes
invalid byte sequences.

diff -r 84980b00fbcbbb735e4112751d9e162a86319ebc -r b5d104caf385c77c57413af34ef62fdf8b7ac15e mercurial/minirst.py
--- a/mercurial/minirst.py	Mon Oct 31 20:58:49 2011 +0900
+++ b/mercurial/minirst.py	Mon Oct 31 21:06:18 2011 +0900
@@ -23,9 +23,14 @@
 from i18n import _
 
 def replace(text, substs):
+    # some character encodings (cp932 for Japanese, at least) use
+    # ASCII characters other than control/alphabet/digit as a part of
+    # multi-bytes characters, so direct replacing with such characters
+    # on strings in local encoding causes invalid byte sequences.
+    utext = text.decode(encoding.encoding)
     for f, t in substs:
-        text = text.replace(f, t)
-    return text
+        utext = utext.replace(f, t)
+    return utext.encode(encoding.encoding)
 
 _blockre = re.compile(r"\n(?:\s*\n)+")
 
diff -r 84980b00fbcbbb735e4112751d9e162a86319ebc -r b5d104caf385c77c57413af34ef62fdf8b7ac15e tests/test-help-i18n.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-help-i18n.t	Mon Oct 31 21:06:18 2011 +0900
@@ -0,0 +1,38 @@
+test help formatting for i18n text
+
+  $ mkdir t
+  $ cd t
+
+define commands to display help text
+
+  $ cat << EOF > help.py
+  > # help text with ambiguous characters ('`' for minirst, for example)
+  > # in CP932 (Japanese Shift-JIS)
+  > def show_ambig_chars(ui, **opts):
+  >     u'''\u30a1\u30a2\u30a3\u30a4\u30a5\u30a6\u30a7\u30a8
+  > 
+  >     \u30a9\u30aa\u30ab\u30ac\u30ad\u30ae\u30af\u30b0
+  >     \u30b1\u30b2\u30b3\u30b4\u30b5\u30b6\u30b7\u30b8
+  >     \u30b9\u30ba\u30bb\u30bc\u30bd\u30be\u30bf\u30c0
+  >     \u30c1\u30c2\u30c3\u30c4\u30c5\u30c6\u30c7\u30c8
+  >     \u30c9\u30ca\u30cb\u30cc\u30cd\u30ce\u30cf\u30d0
+  >     \u30d1\u30d2\u30d3\u30d4\u30d5\u30d6\u30d7\u30d8
+  >     \u30d9\u30da\u30db\u30dc\u30dd\u30de\u30df
+  >     '''
+  > 
+  > cmdtable = {
+  >     'show_ambig_chars': (show_ambig_chars, [], ""),
+  > }
+  > EOF
+
+test help formatting
+
+  $ hg --encoding cp932 --config extensions.show=./help.py help show_ambig_chars
+  hg show_ambig_chars
+  
+  \x83@\x83A\x83B\x83C\x83D\x83E\x83F\x83G (esc)
+  
+      \x83H\x83I\x83J\x83K\x83L\x83M\x83N\x83O \x83P\x83Q\x83R\x83S\x83T\x83U\x83V\x83W \x83X\x83Y\x83Z\x83[\x83\\\x83]\x83^\x83_ \x83`\x83a\x83b\x83c\x83d\x83e\x83f\x83g (esc)
+      \x83h\x83i\x83j\x83k\x83l\x83m\x83n\x83o \x83p\x83q\x83r\x83s\x83t\x83u\x83v\x83w \x83x\x83y\x83z\x83{\x83|\x83}\x83~ (esc)
+  
+  use "hg -v help show_ambig_chars" to show more info


More information about the Mercurial-devel mailing list