[PATCH 3 of 5] encoding: handle non-BMP characters in fromutf8b

Matt Mackall mpm at selenic.com
Fri Nov 6 15:48:48 CST 2015


# HG changeset patch
# User Matt Mackall <mpm at selenic.com>
# Date 1446765110 21600
#      Thu Nov 05 17:11:50 2015 -0600
# Node ID ac268a2bea78b28e3317b9bebf204ad58a1ff62e
# Parent  f2d5792d81fee871ae52962fb49a4c940f1037cb
encoding: handle non-BMP characters in fromutf8b

diff -r f2d5792d81fe -r ac268a2bea78 mercurial/encoding.py
--- a/mercurial/encoding.py	Thu Nov 05 17:09:00 2015 -0600
+++ b/mercurial/encoding.py	Thu Nov 05 17:11:50 2015 -0600
@@ -504,7 +504,7 @@
     u = s.decode("utf-8")
     r = ""
     for c in u:
-        if ord(c) & 0xff00 == 0xdc00:
+        if ord(c) & 0xffff00 == 0xdc00:
             r += chr(ord(c) & 0xff)
         else:
             r += c.encode("utf-8")


More information about the Mercurial-devel mailing list