[PATCH] Only HTML-entitify characters less than 128 in the template
Brendan Cully
brendan at kublai.com
Mon Jun 5 18:36:48 CDT 2006
# HG changeset patch
# User Brendan Cully <brendan at kublai.com>
# Node ID a600c780e8d20eb16209a05729dfe1ff8d449699
# Parent fa4c11751367506265deb5b68fc76c10723a6884
Only HTML-entitify characters less than 128 in the template
obfuscation code, to avoid accidentally splitting apart multi-byte
UTF-8 characters.
diff -r fa4c11751367 -r a600c780e8d2 mercurial/templater.py
--- a/mercurial/templater.py Sun Jun 04 18:05:52 2006 +0100
+++ b/mercurial/templater.py Mon Jun 05 14:39:04 2006 -0700
@@ -226,7 +226,15 @@ def nl2br(text):
return text.replace('\n', '<br/>\n')
def obfuscate(text):
- return ''.join(['&#%d;' % ord(c) for c in text])
+ '''encode characters as HTML entities to confuse address harvesters'''
+ # Only encodes characters < 128 to avoid breaking UTF-8.
+ def obfuscate_char(char):
+ c = ord(char)
+ if c < 128:
+ char = '&#%d;' % (c,)
+ return char
+
+ return ''.join([obfuscate_char(c) for c in text])
def domain(author):
'''get domain of author, or empty string if none.'''
More information about the Mercurial
mailing list