[PATCH 1 of 2] encoding: make HFS+ ignore code Python 3 compatible
Gregory Szorc
gregory.szorc at gmail.com
Sat Mar 12 06:43:56 UTC 2016
# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1457760214 28800
# Fri Mar 11 21:23:34 2016 -0800
# Node ID bf54b6d99a73008537efc8867984ab474321a0ee
# Parent 70c2f8a982766b512e9d7f41f2d93fdb92f5481f
encoding: make HFS+ ignore code Python 3 compatible
unichr() doesn't exist in Python 3. chr() is the equivalent there.
Unfortunately, we can't use chr() outright because Python 2 only
does accept values larger than 255.
Also, Python 3 returns an int when accessing a character of a
bytes type (s[x]). So, we have to ord() the values in the assert
statement.
diff --git a/mercurial/encoding.py b/mercurial/encoding.py
--- a/mercurial/encoding.py
+++ b/mercurial/encoding.py
@@ -5,30 +5,37 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import array
import locale
import os
+import sys
import unicodedata
from . import (
error,
)
+if sys.version_info[0] >= 3:
+ unichr = chr
+
# These unicode characters are ignored by HFS+ (Apple Technote 1150,
# "Unicode Subtleties"), so we need to ignore them in some places for
# sanity.
_ignore = [unichr(int(x, 16)).encode("utf-8") for x in
"200c 200d 200e 200f 202a 202b 202c 202d 202e "
"206a 206b 206c 206d 206e 206f feff".split()]
# verify the next function will work
-assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"])
+if sys.version_info[0] >= 3:
+ assert set(i[0] for i in _ignore) == set([ord(b'\xe2'), ord(b'\xef')])
+else:
+ assert set(i[0] for i in _ignore) == set(["\xe2", "\xef"])
def hfsignoreclean(s):
"""Remove codepoints ignored by HFS+ from s.
>>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
'.hg'
>>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
'.hg'
More information about the Mercurial-devel
mailing list