[PATCH 1 of 6] mailcharset: module for handling non-ascii chars in mails

Christian Ebert blacktrash at gmx.net
Wed Jul 9 11:33:42 CDT 2008


# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1205141667 -3600
# Node ID 1e5ec68b102fc39ba77d3761548c1e766ae37389
# Parent  694223a29ad494fbd2cee69ebdceadfc6ac127cf
mailcharset: module for handling non-ascii chars in mails

Module provides class convert for message parts that do not
contain patches.

- methods to encode headers
- method to create mime text object with proper charset
- new email config "sendcharsets"

Users may configure email.sendcharsets as a list of charsets they
consider appropriate for the recipients of their outgoing mails.

By default the list is empty, and only us-ascii and ui.encoding
(util._encoding) are tried; they are always contained in
email.sendcharsets.

diff --git a/mercurial/mailcharset.py b/mercurial/mailcharset.py
new file mode 100644
--- /dev/null
+++ b/mercurial/mailcharset.py
@@ -0,0 +1,99 @@
+# Copyright (C) 2008 Christian Ebert <blacktrash at gmx.net>
+# This file is published under the GNU GPL.
+
+import util
+from i18n import _
+import email.Header, email.MIMEText, email.Utils
+
+class converter(object):
+    '''
+    Provides methods to encode mails containing non-ascii chars into charsets
+    considered convenient for recipients.
+
+    Methods of this class must only be applied on message parts that do not
+    contain patches.
+    '''
+    def __init__(self, ui):
+        self.ui = ui
+        sendcharsets = self.ui.configlist('email', 'sendcharsets')
+        # remove ascii, as it always tried first
+        self.sendcharsets = [cs for cs in sendcharsets if
+                             cs.lower() not in ('ascii', 'us-ascii')]
+        if util._encoding not in self.sendcharsets:
+            # fallback
+            self.sendcharsets.append(util._encoding)
+
+    def encode(self, s):
+        '''Returns (converted) string, charset tuple.
+
+        Finds out best charset by cycling through sendcharsets in descending
+        order.  Tries both _encoding and _fallbackencoding for input.  Only as
+        last resort send as is in fake ascii.'''
+        try:
+            s.decode('ascii')
+            return s, None
+        except UnicodeDecodeError:
+
+            def strconv(inputcs):
+                try:
+                    u = s.decode(inputcs)
+                except UnicodeDecodeError:
+                    raise
+                for cs in self.sendcharsets:
+                    try:
+                        return u.encode(cs), cs
+                    except UnicodeEncodeError:
+                        pass
+                    except LookupError:
+                        self.ui.warn(_('ignoring invalid sendcharset: %s\n')
+                                     % cs)
+                # if all conversion attempts fail, send broken ascii
+                return s, None
+
+            try:
+                s, scs = strconv(util._encoding)
+            except UnicodeDecodeError:
+                try:
+                    s, scs = strconv(util._fallbackencoding)
+                except UnicodeDecodeError:
+                    scs = None
+        return s, scs
+
+    def headencode(self, s, **opts):
+        '''Returns RFC-2047 compliant header from given string.'''
+        if not opts.get('test'):
+            # split into words?
+            s, cs = self.encode(s)
+            if cs:
+                return str(email.Header.Header(s, cs))
+        return s
+
+    def addressencode(self, address, **opts):
+        '''Turn address into RFC-2047 compliant header.'''
+        if opts.get('test') or not address:
+            return address or ''
+        name, addr = email.Utils.parseaddr(address)
+        name = self.headencode(name)
+        try:
+            acc, dom = addr.split('@')
+            acc = acc.encode('ascii')
+            dom = dom.encode('idna')
+            addr = '%s@%s' % (acc, dom)
+        except UnicodeDecodeError:
+            raise util.Abort(_('invalid email address: %s') % addr)
+        except ValueError:
+            try:
+                # too strict?
+                addr = addr.encode('ascii')
+            except UnicodeDecodeError:
+                raise util.Abort(_('invalid local address: %s') % addr)
+        return email.Utils.formataddr((name, addr))
+
+    def mimeencode(self, s, **opts):
+        '''Creates mime text object, encodes it if needed, and sets
+        charset and transfer-encoding accordingly.'''
+        if not opts.get('test'):
+            s, cs = self.encode(s)
+        else:
+            cs = None
+        return email.MIMEText.MIMEText(s, 'plain', cs or 'us-ascii')


More information about the Mercurial-devel mailing list