[PATCH 2 of 2] highlight: pass encoding to lexers and formatter

Christian Ebert blacktrash at gmx.net
Wed Dec 12 17:03:33 CST 2007


# HG changeset patch
# User Christian Ebert <blacktrash at gmx.net>
# Date 1197500558 -3600
# Node ID 1f9b54d98964063980af09d1cb4c860718b01640
# Parent  347d461bca84f3073c54b0ffefc30967724f4994
highlight: pass encoding to lexers and formatter

Try to avoid UnicodeDecodeError by:
- setting util._encoding (required for tolocal) to hgweb.encoding
- encoding to local
- passing util._encoding as lexer and formatter encoding

diff --git a/hgext/highlight.py b/hgext/highlight.py
--- a/hgext/highlight.py
+++ b/hgext/highlight.py
@@ -66,19 +66,20 @@ class StripedHtmlFormatter(HtmlFormatter
         yield 0, "</div>"
 
 
-def pygments_format(filename, rawtext, forcetext, stripecount, style):
+def pygments_format(filename, text, forcetext, stripecount, style):
     if not forcetext:
         try:
-            lexer = guess_lexer_for_filename(filename, rawtext)
+            lexer = guess_lexer_for_filename(filename, text,
+                                             encoding=util._encoding)
         except ClassNotFound:
-            lexer = TextLexer()
+            lexer = TextLexer(encoding=util._encoding)
     else:
-        lexer = TextLexer()
+        lexer = TextLexer(encoding=util._encoding)
 
     formatter = StripedHtmlFormatter(stripecount, style=style,
-                                     linenos='inline')
+                                     linenos='inline', encoding=util._encoding)
 
-    return highlight(rawtext, lexer, formatter)
+    return highlight(text, lexer, formatter)
 
 
 def filerevision_pygments(self, tmpl, fctx):
@@ -93,6 +94,9 @@ def filerevision_pygments(self, tmpl, fc
 
     mt = mimetypes.guess_type(f)[0]
 
+    # we always want hgweb.encoding
+    util._encoding = self.encoding
+
     if util.binary(text):
         mt = mt or 'application/octet-stream'
         text = "(binary:%s)" % mt
@@ -101,6 +105,9 @@ def filerevision_pygments(self, tmpl, fc
         forcetext = True
     else:
         mt = mt or 'text/plain'
+
+        # encode to hgweb.encoding for lexers and formatter
+        text = util.tolocal(text)
         forcetext = False
 
     def lines(text):


More information about the Mercurial-devel mailing list