[PATCH 1 of 4] mailutil: module for handling non-ascii chars in mails

Christian Ebert blacktrash at gmx.net
Wed Mar 5 06:03:10 CST 2008


# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1204716940 -3600
# Node ID ccaf1af3177b687af56f9ba56300dc573883ea44
# Parent  b193a6e591319726e8ccd0b4322f9b4785f59390
mailutil: module for handling non-ascii chars in mails

- methods to encode headers
- method to create mime text object with proper charset

diff --git a/hgext/mailutil.py b/hgext/mailutil.py
new file mode 100644
--- /dev/null
+++ b/hgext/mailutil.py
@@ -0,0 +1,82 @@
+from mercurial import util
+from mercurial.i18n import _
+import email.Charset, email.Header, email.MIMEText, email.Utils
+
+def isutf(s, cs='utf-8'):
+    '''Checks whether string is valid utf.
+    Charset is optional for ascii probe.'''
+    try:
+        s.decode(cs)
+        return True
+    except UnicodeDecodeError:
+        return False
+
+class converter(email.Charset.Charset):
+    '''
+    Provides methods to encode mails containing non-ascii chars.
+    '''
+    defcharsets = ['iso-8859-1', 'iso-8859-15', 'windows-1252']
+
+    def __init__(self, ui):
+        email.Charset.Charset.__init__(self, util._encoding)
+        self.ui = ui
+        sendcharsets = self.ui.configlist('email', 'sendcharsets',
+                                          default=self.defcharsets)
+        # remove charsets that are always tried
+        self.sendcharsets = [cs for cs in sendcharsets
+                             if cs not in ('ascii', 'us-ascii', 'utf-8')]
+        # ensure utf-8 is last
+        self.sendcharsets.append('utf-8')
+
+    def encode(self, s):
+        '''Encodes string to first charset in sendcharsets,
+        after trying ascii before and resorting finally to utf.'''
+        if isutf(s, 'ascii'):
+            self.output_codec = 'us-ascii'
+            return s
+        for cs in self.sendcharsets:
+            try:
+                self.output_codec = cs
+                return self.convert(s)
+            except UnicodeEncodeError:
+                pass
+            except UnicodeDecodeError:
+                # broken input
+                # or manually set HGENCODING incompatible with locale
+                self.output_codec = 'us-ascii'
+                return s.decode('us-ascii', 'replace')
+            except LookupError:
+                self.ui.warn(_('skipping invalid sendcharset: %s\n') % cs)
+        # last exit
+        self.output_codec = util._encoding
+        return util.tolocal(s)
+
+    def headencode(self, s):
+        '''Returns RFC-2047 compliant header from given string.'''
+        # split into words?
+        s = self.encode(s)
+        return str(email.Header.Header(s, self.output_codec))
+
+    def addressencode(self, address):
+        '''Turn address into RFC-2047 compliant header.'''
+        if not address:
+            return ''
+        name, addr = email.Utils.parseaddr(address)
+        name = self.headencode(name)
+        try:
+            acc, dom = addr.split('@')
+            acc = acc.encode('ascii')
+            dom = dom.encode('idna')
+            addr = '%s@%s' % (acc, dom)
+        except UnicodeDecodeError:
+            raise util.Abort(_('invalid email address: %s') % addr)
+        except ValueError:
+            # too strict for local user names?
+            addr = addr.encode('ascii')
+        return email.Utils.formataddr((name, addr))
+
+    def mimeencode(self, s):
+        '''Creates mime text object, encodes it if needed, and sets
+        charset and transfer-encoding accordingly.'''
+        s = self.encode(s)
+        return email.MIMEText.MIMEText(s, 'plain', self.output_codec)


More information about the Mercurial-devel mailing list