[PATCH] Only HTML-entitify characters less than 128 in the template

Brendan Cully brendan at kublai.com
Mon Jun 5 18:36:48 CDT 2006


# HG changeset patch
# User Brendan Cully <brendan at kublai.com>
# Node ID a600c780e8d20eb16209a05729dfe1ff8d449699
# Parent  fa4c11751367506265deb5b68fc76c10723a6884
Only HTML-entitify characters less than 128 in the template
obfuscation code, to avoid accidentally splitting apart multi-byte
UTF-8 characters.

diff -r fa4c11751367 -r a600c780e8d2 mercurial/templater.py
--- a/mercurial/templater.py	Sun Jun 04 18:05:52 2006 +0100
+++ b/mercurial/templater.py	Mon Jun 05 14:39:04 2006 -0700
@@ -226,7 +226,15 @@ def nl2br(text):
     return text.replace('\n', '<br/>\n')
 
 def obfuscate(text):
-    return ''.join(['&#%d;' % ord(c) for c in text])
+    '''encode characters as HTML entities to confuse address harvesters'''
+    # Only encodes characters < 128 to avoid breaking UTF-8.
+    def obfuscate_char(char):
+        c = ord(char)
+        if c < 128:
+            char = '&#%d;' % (c,)
+        return char
+
+    return ''.join([obfuscate_char(c) for c in text])
 
 def domain(author):
     '''get domain of author, or empty string if none.'''


More information about the Mercurial mailing list