[PATCH 1 of 3] mail: add methods to handle non-ascii chars

Christian Ebert blacktrash at gmx.net
Wed Sep 10 20:44:37 CDT 2008


# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1215886319 -3600
# Node ID 959f46039fcdfcd7869eb903c8122f6d2a76745e
# Parent  ab57069232b4d9ce95e5898d9154410988008ba2
mail: add methods to handle non-ascii chars

- headencode, addressencode: encode headers
- mimeencode: encode message parts not containing patches

non-ascii input is converted to utf-8 in a way that fits
the corresponding message part (header or body).

diff --git a/mercurial/mail.py b/mercurial/mail.py
--- a/mercurial/mail.py
+++ b/mercurial/mail.py
@@ -7,6 +7,7 @@
 
 from i18n import _
 import os, smtplib, socket
+import email.Header, email.MIMEText, email.Utils
 import util
 
 def _smtp(ui):
@@ -84,3 +85,68 @@
         if not util.find_exe(method):
             raise util.Abort(_('%r specified as email transport, '
                                'but not in PATH') % method)
+
+def _encode(ui, s):
+    '''Encodes non-ascii string to utf-8, returns tuple of string
+    and either us-ascii or utf-8.
+    Tries both _encoding and _fallbackencoding for input. Only as
+    last resort send as is in fake ascii.
+    Caveat: Do not use for mail parts containing patches!'''
+    try:
+        s.decode('ascii')
+        return s, 'us-ascii'
+    except UnicodeDecodeError:
+        def strconv(inputcs):
+            try:
+                u = s.decode(inputcs)
+            except UnicodeDecodeError:
+                raise
+            try:
+                return u.encode('utf-8'), 'utf-8'
+            except UnicodeEncodeError:
+                pass
+            # if all conversion attempts fail, send broken ascii
+            return s, 'us-ascii'
+
+        try:
+            return strconv(util._encoding)
+        except UnicodeDecodeError:
+            try:
+                return strconv(util._fallbackencoding)
+            except UnicodeDecodeError:
+                return s, 'us-ascii'
+
+def headencode(ui, s, display=False):
+    '''Returns RFC-2047 compliant header from given string.'''
+    if not display:
+        s, cs = _encode(ui, s)
+        return str(email.Header.Header(s, cs))
+    return s
+
+def addressencode(ui, address, display=False):
+    '''turn address into RFC-2047 compliant header.'''
+    if display or not address:
+        return address or ''
+    name, addr = email.Utils.parseaddr(address)
+    name = headencode(ui, name)
+    try:
+        acc, dom = addr.split('@')
+        acc = acc.encode('ascii')
+        dom = dom.encode('idna')
+        addr = '%s@%s' % (acc, dom)
+    except UnicodeDecodeError:
+        raise util.Abort(_('invalid email address: %s') % addr)
+    except ValueError:
+        try:
+            addr = addr.encode('ascii')
+        except UnicodeDecodeError:
+            raise util.Abort(_('invalid local address: %s') % addr)
+    return email.Utils.formataddr((name, addr))
+
+def mimeencode(ui, s, display=False):
+    '''Creates mime text object, encodes to utf-8 if needed, and sets
+    charset and transfer-encoding accordingly.'''
+    cs = 'us-ascii'
+    if not display:
+        s, cs = _encode(ui, s)
+    return email.MIMEText.MIMEText(s, 'plain', cs or 'us-ascii')


More information about the Mercurial-devel mailing list