[PATCH 2 of 3] templater: replace jsonescape in main json templater (issue4926)

Matt Mackall mpm at selenic.com
Tue Jan 12 11:01:06 CST 2016


# HG changeset patch
# User Matt Mackall <mpm at selenic.com>
# Date 1452542432 21600
#      Mon Jan 11 14:00:32 2016 -0600
# Node ID 35d049d7e5a2dec87318ce8042844f56e107cf83
# Parent  544d391bd3b42b96975a3521b73c25223db930b0
templater: replace jsonescape in main json templater (issue4926)

This version differs in a couple ways:

- it skips optional escaping of codepoints > U+007f
- it thus handles emoji correctly (JSON requires UTF-16 surrogates)
- but it may run afoul of silly Unicode linebreaks if exec'd in js
- it uses UTF-8b to round-trip undecodeable bytes

diff -r 544d391bd3b4 -r 35d049d7e5a2 mercurial/templatefilters.py
--- a/mercurial/templatefilters.py	Mon Jan 11 13:43:43 2016 -0600
+++ b/mercurial/templatefilters.py	Mon Jan 11 14:00:32 2016 -0600
@@ -197,15 +197,8 @@
         return {None: 'null', False: 'false', True: 'true'}[obj]
     elif isinstance(obj, int) or isinstance(obj, float):
         return str(obj)
-    elif isinstance(obj, encoding.localstr):
-        u = encoding.fromlocal(obj).decode('utf-8')  # can round-trip
-        return '"%s"' % jsonescape(u)
     elif isinstance(obj, str):
-        # no encoding.fromlocal() because it may abort if obj can't be decoded
-        u = unicode(obj, encoding.encoding, 'replace')
-        return '"%s"' % jsonescape(u)
-    elif isinstance(obj, unicode):
-        return '"%s"' % jsonescape(obj)
+        return '"%s"' % encoding.jsonescape(obj)
     elif util.safehasattr(obj, 'keys'):
         out = []
         for k, v in sorted(obj.iteritems()):
diff -r 544d391bd3b4 -r 35d049d7e5a2 tests/test-command-template.t
--- a/tests/test-command-template.t	Mon Jan 11 13:43:43 2016 -0600
+++ b/tests/test-command-template.t	Mon Jan 11 14:00:32 2016 -0600
@@ -3493,12 +3493,12 @@
 json filter should try round-trip conversion to utf-8:
 
   $ HGENCODING=ascii hg log -T "{branch|json}\n" -r0
-  "\u00e9"
+  "\xc3\xa9" (esc)
 
 json filter should not abort if it can't decode bytes:
 (not sure the current behavior is right; we might want to use utf-8b encoding?)
 
   $ HGENCODING=ascii hg log -T "{'`cat utf-8`'|json}\n" -l1
-  "\ufffd\ufffd"
+  "\xc3\xa9" (esc)
 
   $ cd ..
diff -r 544d391bd3b4 -r 35d049d7e5a2 tests/test-hgweb-commands.t
--- a/tests/test-hgweb-commands.t	Mon Jan 11 13:43:43 2016 -0600
+++ b/tests/test-hgweb-commands.t	Mon Jan 11 14:00:32 2016 -0600
@@ -2099,7 +2099,7 @@
   >>> for line in open("out"):
   ...     if line.startswith("var data ="):
   ...         print line,
-  var data = [["061dd13ba3c3", [0, 1], [[0, 0, 1, -1, ""]], "\u80fd", "test", "1970-01-01", ["unstable", true], ["tip"], ["something"]], ["cad8025a2e87", [0, 1], [[0, 0, 1, 3, "FF0000"]], "branch commit with null character: \u0000", "test", "1970-01-01", ["unstable", false], [], []], ["1d22e65f027e", [0, 1], [[0, 0, 1, 3, ""]], "branch", "test", "1970-01-01", ["stable", true], [], []], ["a4f92ed23982", [0, 1], [[0, 0, 1, 3, ""]], "Added tag 1.0 for changeset 2ef0ac749a14", "test", "1970-01-01", ["default", true], [], []], ["2ef0ac749a14", [0, 1], [], "base", "test", "1970-01-01", ["default", false], ["1.0"], ["anotherthing"]]];
+  var data = [["061dd13ba3c3", [0, 1], [[0, 0, 1, -1, ""]], "\xed\xb2\x94\\\\", "test", "1970-01-01", ["unstable", true], ["tip"], ["something"]], ["cad8025a2e87", [0, 1], [[0, 0, 1, 3, "FF0000"]], "branch commit with null character: \\u0000", "test", "1970-01-01", ["unstable", false], [], []], ["1d22e65f027e", [0, 1], [[0, 0, 1, 3, ""]], "branch", "test", "1970-01-01", ["stable", true], [], []], ["a4f92ed23982", [0, 1], [[0, 0, 1, 3, ""]], "Added tag 1.0 for changeset 2ef0ac749a14", "test", "1970-01-01", ["default", true], [], []], ["2ef0ac749a14", [0, 1], [], "base", "test", "1970-01-01", ["default", false], ["1.0"], ["anotherthing"]]]; (esc)
 
 capabilities
 


More information about the Mercurial-devel mailing list