[PATCH 1 of 6] mailutil: module for handling non-ascii chars in mails

Christian Ebert blacktrash at gmx.net
Sat Mar 8 10:01:49 CST 2008


# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1204990641 -3600
# Node ID 7e43019601e5f4b597e91aaa6498ef15ac14462f
# Parent  1939e29151ca81bc9999bc1e1b57240a4c55dc4c
mailutil: module for handling non-ascii chars in mails

- methods to encode headers
- method to create mime text object with proper charset

diff --git a/hgext/mailutil.py b/hgext/mailutil.py
new file mode 100644
--- /dev/null
+++ b/hgext/mailutil.py
@@ -0,0 +1,88 @@
+from mercurial import util
+from mercurial.i18n import _
+import email.Header, email.MIMEText, email.Utils
+
+class converter(object):
+    '''
+    Provides methods to encode mails containing non-ascii chars.
+    '''
+    defcharsets = ['iso-8859-1', 'iso-8859-15', 'windows-1252']
+
+    def __init__(self, ui):
+        self.ui = ui
+        sendcharsets = self.ui.configlist('email', 'sendcharsets',
+                                          default=self.defcharsets)
+        # remove charsets that are always tried
+        self.sendcharsets = [cs for cs in sendcharsets if
+                             cs.lower() not in ('ascii', 'us-ascii', 'utf-8')]
+        # ensure utf-8 is last
+        self.sendcharsets.append('utf-8')
+
+    def encode(self, s):
+        '''Returns (converted) string, charset tuple.
+
+        Finds out best charset by cycling through sendcharsets in descending
+        order.  Tries both _encoding and _fallbackencoding for input.  Uses
+        util.tolocal as last resort.
+
+        Do not use for patches!'''
+        try:
+            s.decode('ascii')
+            return s, 'us-ascii'
+        except UnicodeDecodeError:
+
+            def strconv(inputcs):
+                try:
+                    u = s.decode(inputcs)
+                except UnicodeDecodeError:
+                    raise
+                for cs in self.sendcharsets:
+                    try:
+                        return u.encode(cs), cs
+                    except UnicodeEncodeError:
+                        pass
+                    except LookupError:
+                        self.ui.warn(_('ignoring invalid sendcharset: %s\n')
+                                     % cs)
+                        self.sendcharsets.remove(cs)
+
+            try:
+                s, scs = strconv(util._encoding)
+            except UnicodeDecodeError:
+                try:
+                    s, scs = strconv(util._fallbackencoding)
+                except UnicodeDecodeError:
+                    pass
+            if scs:
+                return s, scs
+            return util.tolocal(s), util._encoding
+
+    def headencode(self, s):
+        '''Returns RFC-2047 compliant header from given string.'''
+        # split into words?
+        s, cs = self.encode(s)
+        return str(email.Header.Header(s, cs))
+
+    def addressencode(self, address):
+        '''Turn address into RFC-2047 compliant header.'''
+        if not address:
+            return ''
+        name, addr = email.Utils.parseaddr(address)
+        name = self.headencode(name)
+        try:
+            acc, dom = addr.split('@')
+            acc = acc.encode('ascii')
+            dom = dom.encode('idna')
+            addr = '%s@%s' % (acc, dom)
+        except UnicodeDecodeError:
+            raise util.Abort(_('invalid email address: %s') % addr)
+        except ValueError:
+            # too strict for local user names?
+            addr = addr.encode('ascii')
+        return email.Utils.formataddr((name, addr))
+
+    def mimeencode(self, s):
+        '''Creates mime text object, encodes it if needed, and sets
+        charset and transfer-encoding accordingly.'''
+        s, cs = self.encode(s)
+        return email.MIMEText.MIMEText(s, 'plain', cs)


More information about the Mercurial-devel mailing list