[PATCH 1 of 6] mailutil: module for handling non-ascii chars in mails
Christian Ebert
blacktrash at gmx.net
Sat Mar 8 10:01:49 CST 2008
# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1204990641 -3600
# Node ID 7e43019601e5f4b597e91aaa6498ef15ac14462f
# Parent 1939e29151ca81bc9999bc1e1b57240a4c55dc4c
mailutil: module for handling non-ascii chars in mails
- methods to encode headers
- method to create mime text object with proper charset
diff --git a/hgext/mailutil.py b/hgext/mailutil.py
new file mode 100644
--- /dev/null
+++ b/hgext/mailutil.py
@@ -0,0 +1,88 @@
+from mercurial import util
+from mercurial.i18n import _
+import email.Header, email.MIMEText, email.Utils
+
+class converter(object):
+ '''
+ Provides methods to encode mails containing non-ascii chars.
+ '''
+ defcharsets = ['iso-8859-1', 'iso-8859-15', 'windows-1252']
+
+ def __init__(self, ui):
+ self.ui = ui
+ sendcharsets = self.ui.configlist('email', 'sendcharsets',
+ default=self.defcharsets)
+ # remove charsets that are always tried
+ self.sendcharsets = [cs for cs in sendcharsets if
+ cs.lower() not in ('ascii', 'us-ascii', 'utf-8')]
+ # ensure utf-8 is last
+ self.sendcharsets.append('utf-8')
+
+ def encode(self, s):
+ '''Returns (converted) string, charset tuple.
+
+ Finds out best charset by cycling through sendcharsets in descending
+ order. Tries both _encoding and _fallbackencoding for input. Uses
+ util.tolocal as last resort.
+
+ Do not use for patches!'''
+ try:
+ s.decode('ascii')
+ return s, 'us-ascii'
+ except UnicodeDecodeError:
+
+ def strconv(inputcs):
+ try:
+ u = s.decode(inputcs)
+ except UnicodeDecodeError:
+ raise
+ for cs in self.sendcharsets:
+ try:
+ return u.encode(cs), cs
+ except UnicodeEncodeError:
+ pass
+ except LookupError:
+ self.ui.warn(_('ignoring invalid sendcharset: %s\n')
+ % cs)
+ self.sendcharsets.remove(cs)
+
+ try:
+ s, scs = strconv(util._encoding)
+ except UnicodeDecodeError:
+ try:
+ s, scs = strconv(util._fallbackencoding)
+ except UnicodeDecodeError:
+ pass
+ if scs:
+ return s, scs
+ return util.tolocal(s), util._encoding
+
+ def headencode(self, s):
+ '''Returns RFC-2047 compliant header from given string.'''
+ # split into words?
+ s, cs = self.encode(s)
+ return str(email.Header.Header(s, cs))
+
+ def addressencode(self, address):
+ '''Turn address into RFC-2047 compliant header.'''
+ if not address:
+ return ''
+ name, addr = email.Utils.parseaddr(address)
+ name = self.headencode(name)
+ try:
+ acc, dom = addr.split('@')
+ acc = acc.encode('ascii')
+ dom = dom.encode('idna')
+ addr = '%s@%s' % (acc, dom)
+ except UnicodeDecodeError:
+ raise util.Abort(_('invalid email address: %s') % addr)
+ except ValueError:
+ # too strict for local user names?
+ addr = addr.encode('ascii')
+ return email.Utils.formataddr((name, addr))
+
+ def mimeencode(self, s):
+ '''Creates mime text object, encodes it if needed, and sets
+ charset and transfer-encoding accordingly.'''
+ s, cs = self.encode(s)
+ return email.MIMEText.MIMEText(s, 'plain', cs)
More information about the Mercurial-devel
mailing list