[PATCH 1 of 3] mail: add methods to handle non-ascii chars
Christian Ebert
blacktrash at gmx.net
Wed Sep 10 20:44:37 CDT 2008
# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1215886319 -3600
# Node ID 959f46039fcdfcd7869eb903c8122f6d2a76745e
# Parent ab57069232b4d9ce95e5898d9154410988008ba2
mail: add methods to handle non-ascii chars
- headencode, addressencode: encode headers
- mimeencode: encode message parts not containing patches
non-ascii input is converted to utf-8 in a way that fits
the corresponding message part (header or body).
diff --git a/mercurial/mail.py b/mercurial/mail.py
--- a/mercurial/mail.py
+++ b/mercurial/mail.py
@@ -7,6 +7,7 @@
from i18n import _
import os, smtplib, socket
+import email.Header, email.MIMEText, email.Utils
import util
def _smtp(ui):
@@ -84,3 +85,68 @@
if not util.find_exe(method):
raise util.Abort(_('%r specified as email transport, '
'but not in PATH') % method)
+
+def _encode(ui, s):
+ '''Encodes non-ascii string to utf-8, returns tuple of string
+ and either us-ascii or utf-8.
+ Tries both _encoding and _fallbackencoding for input. Only as
+ last resort send as is in fake ascii.
+ Caveat: Do not use for mail parts containing patches!'''
+ try:
+ s.decode('ascii')
+ return s, 'us-ascii'
+ except UnicodeDecodeError:
+ def strconv(inputcs):
+ try:
+ u = s.decode(inputcs)
+ except UnicodeDecodeError:
+ raise
+ try:
+ return u.encode('utf-8'), 'utf-8'
+ except UnicodeEncodeError:
+ pass
+ # if all conversion attempts fail, send broken ascii
+ return s, 'us-ascii'
+
+ try:
+ return strconv(util._encoding)
+ except UnicodeDecodeError:
+ try:
+ return strconv(util._fallbackencoding)
+ except UnicodeDecodeError:
+ return s, 'us-ascii'
+
+def headencode(ui, s, display=False):
+ '''Returns RFC-2047 compliant header from given string.'''
+ if not display:
+ s, cs = _encode(ui, s)
+ return str(email.Header.Header(s, cs))
+ return s
+
+def addressencode(ui, address, display=False):
+ '''turn address into RFC-2047 compliant header.'''
+ if display or not address:
+ return address or ''
+ name, addr = email.Utils.parseaddr(address)
+ name = headencode(ui, name)
+ try:
+ acc, dom = addr.split('@')
+ acc = acc.encode('ascii')
+ dom = dom.encode('idna')
+ addr = '%s@%s' % (acc, dom)
+ except UnicodeDecodeError:
+ raise util.Abort(_('invalid email address: %s') % addr)
+ except ValueError:
+ try:
+ addr = addr.encode('ascii')
+ except UnicodeDecodeError:
+ raise util.Abort(_('invalid local address: %s') % addr)
+ return email.Utils.formataddr((name, addr))
+
+def mimeencode(ui, s, display=False):
+ '''Creates mime text object, encodes to utf-8 if needed, and sets
+ charset and transfer-encoding accordingly.'''
+ cs = 'us-ascii'
+ if not display:
+ s, cs = _encode(ui, s)
+ return email.MIMEText.MIMEText(s, 'plain', cs or 'us-ascii')
More information about the Mercurial-devel
mailing list