[PATCH 1 of 6] mailcharset: module for handling non-ascii chars in mails
Christian Ebert
blacktrash at gmx.net
Wed Jul 9 11:33:42 CDT 2008
# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1205141667 -3600
# Node ID 1e5ec68b102fc39ba77d3761548c1e766ae37389
# Parent 694223a29ad494fbd2cee69ebdceadfc6ac127cf
mailcharset: module for handling non-ascii chars in mails
Module provides class convert for message parts that do not
contain patches.
- methods to encode headers
- method to create mime text object with proper charset
- new email config "sendcharsets"
Users may configure email.sendcharsets as a list of charsets they
consider appropriate for the recipients of their outgoing mails.
By default the list is empty, and only us-ascii and ui.encoding
(util._encoding) are tried; they are always contained in
email.sendcharsets.
diff --git a/mercurial/mailcharset.py b/mercurial/mailcharset.py
new file mode 100644
--- /dev/null
+++ b/mercurial/mailcharset.py
@@ -0,0 +1,99 @@
+# Copyright (C) 2008 Christian Ebert <blacktrash at gmx.net>
+# This file is published under the GNU GPL.
+
+import util
+from i18n import _
+import email.Header, email.MIMEText, email.Utils
+
+class converter(object):
+ '''
+ Provides methods to encode mails containing non-ascii chars into charsets
+ considered convenient for recipients.
+
+ Methods of this class must only be applied on message parts that do not
+ contain patches.
+ '''
+ def __init__(self, ui):
+ self.ui = ui
+ sendcharsets = self.ui.configlist('email', 'sendcharsets')
+ # remove ascii, as it always tried first
+ self.sendcharsets = [cs for cs in sendcharsets if
+ cs.lower() not in ('ascii', 'us-ascii')]
+ if util._encoding not in self.sendcharsets:
+ # fallback
+ self.sendcharsets.append(util._encoding)
+
+ def encode(self, s):
+ '''Returns (converted) string, charset tuple.
+
+ Finds out best charset by cycling through sendcharsets in descending
+ order. Tries both _encoding and _fallbackencoding for input. Only as
+ last resort send as is in fake ascii.'''
+ try:
+ s.decode('ascii')
+ return s, None
+ except UnicodeDecodeError:
+
+ def strconv(inputcs):
+ try:
+ u = s.decode(inputcs)
+ except UnicodeDecodeError:
+ raise
+ for cs in self.sendcharsets:
+ try:
+ return u.encode(cs), cs
+ except UnicodeEncodeError:
+ pass
+ except LookupError:
+ self.ui.warn(_('ignoring invalid sendcharset: %s\n')
+ % cs)
+ # if all conversion attempts fail, send broken ascii
+ return s, None
+
+ try:
+ s, scs = strconv(util._encoding)
+ except UnicodeDecodeError:
+ try:
+ s, scs = strconv(util._fallbackencoding)
+ except UnicodeDecodeError:
+ scs = None
+ return s, scs
+
+ def headencode(self, s, **opts):
+ '''Returns RFC-2047 compliant header from given string.'''
+ if not opts.get('test'):
+ # split into words?
+ s, cs = self.encode(s)
+ if cs:
+ return str(email.Header.Header(s, cs))
+ return s
+
+ def addressencode(self, address, **opts):
+ '''Turn address into RFC-2047 compliant header.'''
+ if opts.get('test') or not address:
+ return address or ''
+ name, addr = email.Utils.parseaddr(address)
+ name = self.headencode(name)
+ try:
+ acc, dom = addr.split('@')
+ acc = acc.encode('ascii')
+ dom = dom.encode('idna')
+ addr = '%s@%s' % (acc, dom)
+ except UnicodeDecodeError:
+ raise util.Abort(_('invalid email address: %s') % addr)
+ except ValueError:
+ try:
+ # too strict?
+ addr = addr.encode('ascii')
+ except UnicodeDecodeError:
+ raise util.Abort(_('invalid local address: %s') % addr)
+ return email.Utils.formataddr((name, addr))
+
+ def mimeencode(self, s, **opts):
+ '''Creates mime text object, encodes it if needed, and sets
+ charset and transfer-encoding accordingly.'''
+ if not opts.get('test'):
+ s, cs = self.encode(s)
+ else:
+ cs = None
+ return email.MIMEText.MIMEText(s, 'plain', cs or 'us-ascii')
More information about the Mercurial-devel
mailing list