[PATCH 06 of 10] py3: use unicode literals and unichr in revset.py

Pulkit Goyal 7895pulkit at gmail.com
Tue Aug 2 16:27:28 EDT 2016


# HG changeset patch
# User Pulkit Goyal <7895pulkit at gmail.com>
# Date 1470168209 -19800
#      Wed Aug 03 01:33:29 2016 +0530
# Node ID da4a0ba184d3eff2819d73884770d342edce88c1
# Parent  4547ab529d26196dc40909693b5e9673763e9058
py3: use unicode literals and unichr in revset.py

The assignment of _syminitletters, _symletters, and _aliassyminitletters
didn't work under Python 3 because of mixed types. We rewrite the code
to work under both Python 2 and Python 3 by using unichr and
unicode literals.

We preserve the final type of elements in the sets as bytes.

diff -r 4547ab529d26 -r da4a0ba184d3 mercurial/revset.py
--- a/mercurial/revset.py	Wed Aug 03 01:20:15 2016 +0530
+++ b/mercurial/revset.py	Wed Aug 03 01:33:29 2016 +0530
@@ -9,6 +9,7 @@
 
 import heapq
 import re
+import sys
 
 from .i18n import _
 from . import (
@@ -27,6 +28,9 @@
     util,
 )
 
+if sys.version_info[0]>=3:
+    unichr = chr
+
 def _revancestors(repo, revs, followfirst):
     """Like revlog.ancestors(), but supports followfirst."""
     if followfirst:
@@ -175,12 +179,12 @@
 keywords = set(['and', 'or', 'not'])
 
 # default set of valid characters for the initial letter of symbols
-_syminitletters = set(c for c in [chr(i) for i in xrange(256)]
-                      if c.isalnum() or c in '._@' or ord(c) > 127)
+_syminitletters = set(c.encode("latin1") for c in [unichr(i) for i in xrange(256)]
+                      if c.isalnum() or c in u'._@' or ord(c) > 127)
 
 # default set of valid characters for non-initial letters of symbols
-_symletters = set(c for c in  [chr(i) for i in xrange(256)]
-                  if c.isalnum() or c in '-._/@' or ord(c) > 127)
+_symletters = set(c.encode("latin-1") for c in  [unichr(i) for i in xrange(256)]
+                  if c.isalnum() or c in u'-._/@' or ord(c) > 127)
 
 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
     '''
@@ -2450,8 +2454,8 @@
 
 # the set of valid characters for the initial letter of symbols in
 # alias declarations and definitions
-_aliassyminitletters = set(c for c in [chr(i) for i in xrange(256)]
-                           if c.isalnum() or c in '._@$' or ord(c) > 127)
+_aliassyminitletters = set(c.encode("latin-1") for c in [unichr(i) for i in xrange(256)]
+                           if c.isalnum() or c in u'._@$' or ord(c) > 127)
 
 def _parsewith(spec, lookup=None, syminitletters=None):
     """Generate a parse tree of given spec with given tokenizing options


More information about the Mercurial-devel mailing list