[PATCH 1 of 1 stable resend] util: fix ellipsis() not to break multi-byte sequence (issue2564)

Yuya Nishihara yuya at tcha.org
Sat Dec 25 07:17:17 CST 2010


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1293281940 -32400
# Node ID 77f7d2b7bf1a6ce8247ccb7a5924b06cb6396e66
# Parent  ef243b9ef5e11b984f9d1c23e60acc3dfbb26b6c
util: fix ellipsis() not to break multi-byte sequence (issue2564)

It tries to convert localstr to unicode before truncating.
Because we cannot assume that the given text is encoded in local encoding,
it falls back to raw string in case of unicode error.

diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1196,12 +1196,23 @@ def email(author):
         r = None
     return author[author.find('<') + 1:r]
 
+def _ellipsis(text, maxlength):
+    if len(text) <= maxlength:
+        return text, False
+    else:
+        return "%s..." % (text[:maxlength - 3]), True
+
 def ellipsis(text, maxlength=400):
     """Trim string to at most maxlength (default: 400) characters."""
-    if len(text) <= maxlength:
-        return text
-    else:
-        return "%s..." % (text[:maxlength - 3])
+    try:
+        # use unicode not to split at intermediate multi-byte sequence
+        utext, truncated = _ellipsis(text.decode(encoding.encoding),
+                                     maxlength)
+        if not truncated:
+            return text
+        return utext.encode(encoding.encoding)
+    except (UnicodeDecodeError, UnicodeEncodeError):
+        return _ellipsis(text, maxlength)[0]
 
 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
     '''yield every hg repository under path, recursively.'''
diff --git a/tests/test-notify.t b/tests/test-notify.t
--- a/tests/test-notify.t
+++ b/tests/test-notify.t
@@ -302,3 +302,49 @@ test merge
   changeset 22c88b85aa27 in b
   description: merge
   (run 'hg update' to get a working copy)
+
+truncate multi-byte subject
+
+  $ cat <<EOF >> $HGRCPATH
+  > [notify]
+  > maxsubject = 4
+  > EOF
+  $ echo a >> a/a
+  $ hg --cwd a --encoding utf-8 commit -A -d '0 0' \
+  >   -m `python -c 'print "\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4"'`
+  $ hg --traceback --cwd b --encoding utf-8 pull ../a | \
+  >   python -c 'import sys,re; print re.sub("\n\t", " ", sys.stdin.read()),'
+  pulling from ../a
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 1 changesets with 1 changes to 1 files
+  Content-Type: text/plain; charset="us-ascii"
+  MIME-Version: 1.0
+  Content-Transfer-Encoding: 8bit
+  X-Test: foo
+  Date: * (glob)
+  Subject: \xc3\xa0... (esc)
+  From: test at test.com
+  X-Hg-Notification: changeset 4a47f01c1356
+  Message-Id: <*> (glob)
+  To: baz at test.com, foo at bar
+  
+  changeset 4a47f01c1356 in b
+  description: \xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4 (esc)
+  diffstat:
+  
+   a |  1 +
+   1 files changed, 1 insertions(+), 0 deletions(-)
+  
+  diffs (7 lines):
+  
+  diff -r 22c88b85aa27 -r 4a47f01c1356 a
+  --- a/a	Thu Jan 01 00:00:03 1970 +0000
+  +++ b/a	Thu Jan 01 00:00:00 1970 +0000
+  @@ -1,2 +1,3 @@
+   a
+   a
+  +a
+  (run 'hg update' to get a working copy)


More information about the Mercurial-devel mailing list