[PATCH] highlight: Support fallback character encodings

OHASHI Hideya ohachige at gmail.com
Wed Apr 2 03:37:26 CDT 2008


# HG changeset patch
# User OHASHI Hideya <ohachige at gmail.com>
# Date 1207125348 -32400
# Node ID cf3eb33457d587f6352d9ec33b9768b63c37f499
# Parent  83d1308b2e593d6daef0fbf9dd8aa11fa3455f11
highlight: Support fallback character encodings

diff -r 83d1308b2e59 -r cf3eb33457d5 hgext/highlight.py
--- a/hgext/highlight.py	Tue Apr 01 19:52:47 2008 +0900
+++ b/hgext/highlight.py	Wed Apr 02 17:35:48 2008 +0900
@@ -10,12 +10,13 @@
 [extensions]
 hgext.highlight =
 
-There is a single configuration option:
+There are two configuration options:
 
 [web]
 pygments_style = <style>
+pygments_encodings = <fallback encoding1>{,<fallback encoding2>,...}
 
-The default is 'colorful'.  If this is changed the corresponding CSS
+The default style is 'colorful'.  If this is changed the corresponding CSS
 file should be re-generated by running
 
 # pygmentize -f html -S <newstyle>
@@ -38,7 +39,10 @@
 SYNTAX_CSS = ('\n<link rel="stylesheet" href="#staticurl#highlight.css" '
               'type="text/css" />')
 
-def pygmentize(field, fctx, style, tmpl):
+def pygmentize(field, fctx, tmpl, web):
+
+    style = web.config('web', 'pygments_style', 'colorful')
+    encodings = web.config('web', 'pygments_encodings', '')
 
     # append a <link ...> to the syntax highlighting css
     old_header = ''.join(tmpl('header'))
@@ -50,19 +54,26 @@
     if util.binary(text):
         return
 
-    # To get multi-line strings right, we can't format line-by-line
-    try:
-        lexer = guess_lexer_for_filename(fctx.path(), text,
-                                         encoding=util._encoding)
-    except ClassNotFound:
+    encodings = encodings.split(',')
+    encodings.insert(0, util._encoding)
+    for e in encodings:
         try:
-            lexer = guess_lexer(text, encoding=util._encoding)
-        except ClassNotFound:
-            lexer = TextLexer(encoding=util._encoding)
+            # To get multi-line strings right, we can't format line-by-line
+            try:
+                lexer = guess_lexer_for_filename(fctx.path(), text, encoding=e)
+            except ClassNotFound:
+                try:
+                    lexer = guess_lexer(text, encoding=e)
+                except ClassNotFound:
+                    lexer = TextLexer(encoding=e)
+            formatter = HtmlFormatter(style=style, encoding=e)
+            colorized = highlight(text, lexer, formatter)
+            break
+        except UnicodeDecodeError:
+            pass
+    else:
+        return
 
-    formatter = HtmlFormatter(style=style, encoding=util._encoding)
-
-    colorized = highlight(text, lexer, formatter)
     # strip wrapping div
     colorized = colorized[:colorized.find('\n</pre>')]
     colorized = colorized[colorized.find('<pre>')+5:]
@@ -78,14 +89,12 @@
 web_annotate = webcommands.annotate
 
 def filerevision_highlight(web, tmpl, fctx):
-    style = web.config('web', 'pygments_style', 'colorful')
-    pygmentize('fileline', fctx, style, tmpl)
+    pygmentize('fileline', fctx, tmpl, web)
     return web_filerevision(web, tmpl, fctx)
 
 def annotate_highlight(web, req, tmpl):
     fctx = webutil.filectx(web.repo, req)
-    style = web.config('web', 'pygments_style', 'colorful')
-    pygmentize('annotateline', fctx, style, tmpl)
+    pygmentize('annotateline', fctx, tmpl, web)
     return web_annotate(web, req, tmpl)
 
 # monkeypatch in the new version


More information about the Mercurial-devel mailing list