[PATCH 3 of 3 STABLE] py3: define and use json.loads polyfill

Gregory Szorc gregory.szorc at gmail.com
Sat Nov 2 15:19:58 EDT 2019


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1572721775 25200
#      Sat Nov 02 12:09:35 2019 -0700
# Branch stable
# Node ID b4a9220a4eb5a61891027d59cd0bded6dc5f7578
# Parent  0d0cd63ca1702b901df2cc021d1f51c77bc0bf61
py3: define and use json.loads polyfill

Python 3.5's json.loads() requires a str. Only Python 3.6+
supports passing a bytes or bytearray.

This commit implements a json.loads() polyfill on Python 3.5
so that we can use bytes. The added function to detect encodings
comes verbatim from Python 3.7.

diff --git a/hgext/bugzilla.py b/hgext/bugzilla.py
--- a/hgext/bugzilla.py
+++ b/hgext/bugzilla.py
@@ -955,7 +955,7 @@ class bzrestapi(bzaccess):
     def _fetch(self, burl):
         try:
             resp = url.open(self.ui, burl)
-            return json.loads(resp.read())
+            return pycompat.json_loads(resp.read())
         except util.urlerr.httperror as inst:
             if inst.code == 401:
                 raise error.Abort(_(b'authorization failed'))
@@ -978,7 +978,7 @@ class bzrestapi(bzaccess):
         req = request_type(burl, data, {b'Content-Type': b'application/json'})
         try:
             resp = url.opener(self.ui).open(req)
-            return json.loads(resp.read())
+            return pycompat.json_loads(resp.read())
         except util.urlerr.httperror as inst:
             if inst.code == 401:
                 raise error.Abort(_(b'authorization failed'))
diff --git a/hgext/fix.py b/hgext/fix.py
--- a/hgext/fix.py
+++ b/hgext/fix.py
@@ -126,7 +126,6 @@ from __future__ import absolute_import
 
 import collections
 import itertools
-import json
 import os
 import re
 import subprocess
@@ -642,7 +641,7 @@ def fixfile(ui, repo, opts, fixers, fixc
             if fixer.shouldoutputmetadata():
                 try:
                     metadatajson, newerdata = stdout.split(b'\0', 1)
-                    metadata[fixername] = json.loads(metadatajson)
+                    metadata[fixername] = pycompat.json_loads(metadatajson)
                 except ValueError:
                     ui.warn(
                         _(b'ignored invalid output from fixer tool: %s\n')
diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py
--- a/hgext/lfs/blobstore.py
+++ b/hgext/lfs/blobstore.py
@@ -363,7 +363,7 @@ class _gitlfsremote(object):
                 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
             )
         try:
-            response = json.loads(rawjson)
+            response = pycompat.json_loads(rawjson)
         except ValueError:
             raise LfsRemoteError(
                 _(b'LFS server returns invalid JSON: %s')
diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py
--- a/hgext/lfs/wireprotolfsserver.py
+++ b/hgext/lfs/wireprotolfsserver.py
@@ -133,7 +133,7 @@ def _processbatchrequest(repo, req, res)
         return True
 
     # XXX: specify an encoding?
-    lfsreq = json.loads(req.bodyfh.read())
+    lfsreq = pycompat.json_loads(req.bodyfh.read())
 
     # If no transfer handlers are explicitly requested, 'basic' is assumed.
     if r'basic' not in lfsreq.get(r'transfers', [r'basic']):
diff --git a/hgext/phabricator.py b/hgext/phabricator.py
--- a/hgext/phabricator.py
+++ b/hgext/phabricator.py
@@ -152,8 +152,8 @@ def vcrcommand(name, flags, spec, helpca
             value = r1params[key][0]
             # we want to compare json payloads without worrying about ordering
             if value.startswith(b'{') and value.endswith(b'}'):
-                r1json = json.loads(value)
-                r2json = json.loads(r2params[key][0])
+                r1json = pycompat.json_loads(value)
+                r2json = pycompat.json_loads(r2params[key][0])
                 if r1json != r2json:
                     return False
             elif r2params[key][0] != value:
@@ -307,7 +307,7 @@ def callconduit(ui, name, params):
         if isinstance(x, pycompat.unicode)
         else x,
         # json.loads only accepts bytes from py3.6+
-        json.loads(encoding.unifromlocal(body)),
+        pycompat.json_loads(encoding.unifromlocal(body)),
     )
     if parsed.get(b'error_code'):
         msg = _(b'Conduit Error (%s): %s') % (
@@ -332,7 +332,7 @@ def debugcallconduit(ui, repo, name):
         lambda x: encoding.unitolocal(x)
         if isinstance(x, pycompat.unicode)
         else x,
-        json.loads(rawparams),
+        pycompat.json_loads(rawparams),
     )
     # json.dumps only accepts unicode strings
     result = pycompat.rapply(
diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -12,6 +12,7 @@ from __future__ import absolute_import
 
 import getopt
 import inspect
+import json
 import os
 import shlex
 import sys
@@ -88,6 +89,7 @@ def rapply(f, xs):
 
 if ispy3:
     import builtins
+    import codecs
     import functools
     import io
     import struct
@@ -340,6 +342,48 @@ if ispy3:
     iteritems = lambda x: x.items()
     itervalues = lambda x: x.values()
 
+    # Python 3.5's json.load and json.loads require str. We polyfill its
+    # code for detecting encoding from bytes.
+    if sys.version_info[0:2] < (3, 6):
+
+        def _detect_encoding(b):
+            bstartswith = b.startswith
+            if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+                return 'utf-32'
+            if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+                return 'utf-16'
+            if bstartswith(codecs.BOM_UTF8):
+                return 'utf-8-sig'
+
+            if len(b) >= 4:
+                if not b[0]:
+                    # 00 00 -- -- - utf-32-be
+                    # 00 XX -- -- - utf-16-be
+                    return 'utf-16-be' if b[1] else 'utf-32-be'
+                if not b[1]:
+                    # XX 00 00 00 - utf-32-le
+                    # XX 00 00 XX - utf-16-le
+                    # XX 00 XX -- - utf-16-le
+                    return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+            elif len(b) == 2:
+                if not b[0]:
+                    # 00 XX - utf-16-be
+                    return 'utf-16-be'
+                if not b[1]:
+                    # XX 00 - utf-16-le
+                    return 'utf-16-le'
+            # default
+            return 'utf-8'
+
+        def json_loads(s, *args, **kwargs):
+            if isinstance(s, (bytes, bytearray)):
+                s = s.decode(_detect_encoding(s), 'surrogatepass')
+
+            return json.loads(s, *args, **kwargs)
+
+    else:
+        json_loads = json.loads
+
 else:
     import cStringIO
 
@@ -417,6 +461,7 @@ else:
     getargspec = inspect.getargspec
     iteritems = lambda x: x.iteritems()
     itervalues = lambda x: x.itervalues()
+    json_loads = json.loads
 
 isjython = sysplatform.startswith(b'java')
 
diff --git a/tests/get-with-headers.py b/tests/get-with-headers.py
--- a/tests/get-with-headers.py
+++ b/tests/get-with-headers.py
@@ -98,7 +98,7 @@ def request(host, path, show):
         if formatjson:
             # json.dumps() will print trailing newlines. Eliminate them
             # to make tests easier to write.
-            data = json.loads(data)
+            data = pycompat.json_loads(data)
             lines = json.dumps(data, sort_keys=True, indent=2).splitlines()
             for line in lines:
                 bodyfh.write(pycompat.sysbytes(line.rstrip()))


More information about the Mercurial-devel mailing list