D8039: chg: force-set LC_CTYPE on server start to actual value from the environment
spectral (Kyle Lippincott)
phabricator at mercurial-scm.org
Wed Jan 29 22:24:06 UTC 2020
spectral created this revision.
Herald added subscribers: mercurial-devel, mjpieters.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
Python 3.7+ will "coerce" the LC_CTYPE variable in many instances, and this can
cause issues with chg being able to start up. D7550 <https://phab.mercurial-scm.org/D7550> attempted to fix this, but a
combination of a misreading of the way that python3.7 does the coercion and an
untested state (LC_CTYPE being set to an invalid value) meant that this was
still not quite working.
This change will cause differences between chg and hg: hg will have the LC_CTYPE
environment variable coerced, while chg will not. This is unlikely to cause any
detectable behavior differences in what Mercurial itself outputs, but it does
have two known effects:
- When using hg, the coerced LC_CTYPE will be passed to subprocesses, even non-python ones. Using chg will remove the coercion, and this will not happen. This is arguably more correct behavior on chg's part.
- On macOS, if you set your region to Brazil but your language to English, this isn't representable in locale strings, so macOS sets LC_CTYPE=UTF-8. If this value is passed along when ssh'ing to a non-macOS machine, some functions (such as locale.setlocale()) may raise an exception due to an unsupported locale setting. This is most easily encountered when doing an interactive commit/split/etc. when using ui.interface=curses.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D8039
AFFECTED FILES
contrib/chg/chg.c
hg
mercurial/chgserver.py
tests/test-chg.t
CHANGE DETAILS
diff --git a/tests/test-chg.t b/tests/test-chg.t
--- a/tests/test-chg.t
+++ b/tests/test-chg.t
@@ -332,8 +332,8 @@
YYYY/MM/DD HH:MM:SS (PID)> log -R cached
YYYY/MM/DD HH:MM:SS (PID)> loaded repo into cache: $TESTTMP/cached (in ...s)
-Test that chg works even when python "coerces" the locale (py3.7+, which is done
-by default if none of LC_ALL, LC_CTYPE, or LANG are set in the environment)
+Test that chg works (sets to the user's actual LC_CTYPE) even when python
+"coerces" the locale (py3.7+)
$ cat > $TESTTMP/debugenv.py <<EOF
> from mercurial import encoding
@@ -347,9 +347,22 @@
> if v is not None:
> ui.write(b'%s=%s\n' % (k, encoding.environ[k]))
> EOF
+(hg keeps python's modified LC_CTYPE, chg doesn't)
+ $ (unset LC_ALL; unset LANG; LC_CTYPE= "$CHGHG" \
+ > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+ LC_CTYPE=C.UTF-8 (py37 !)
+ LC_CTYPE= (no-py37 !)
+ $ (unset LC_ALL; unset LANG; LC_CTYPE= chg \
+ > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+ LC_CTYPE=
+ $ (unset LC_ALL; unset LANG; LC_CTYPE=unsupported_value chg \
+ > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+ LC_CTYPE=unsupported_value
+ $ (unset LC_ALL; unset LANG; LC_CTYPE= chg \
+ > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv)
+ LC_CTYPE=
$ LANG= LC_ALL= LC_CTYPE= chg \
> --config extensions.debugenv=$TESTTMP/debugenv.py debugenv
LC_ALL=
- LC_CTYPE=C.UTF-8 (py37 !)
- LC_CTYPE= (no-py37 !)
+ LC_CTYPE=
LANG=
diff --git a/mercurial/chgserver.py b/mercurial/chgserver.py
--- a/mercurial/chgserver.py
+++ b/mercurial/chgserver.py
@@ -550,40 +550,6 @@
raise ValueError(b'unexpected value in setenv request')
self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys()))
- # Python3 has some logic to "coerce" the C locale to a UTF-8 capable
- # one, and it sets LC_CTYPE in the environment to C.UTF-8 if none of
- # 'LC_CTYPE', 'LC_ALL' or 'LANG' are set (to any value). This can be
- # disabled with PYTHONCOERCECLOCALE=0 in the environment.
- #
- # When fromui is called via _inithashstate, python has already set
- # this, so that's in the environment right when we start up the hg
- # process. Then chg will call us and tell us to set the environment to
- # the one it has; this might NOT have LC_CTYPE, so we'll need to
- # carry-forward the LC_CTYPE that was coerced in these situations.
- #
- # If this is not handled, we will fail config+env validation and fail
- # to start chg. If this is just ignored instead of carried forward, we
- # may have different behavior between chg and non-chg.
- if pycompat.ispy3:
- # Rename for wordwrapping purposes
- oldenv = encoding.environ
- if not any(
- e.get(b'PYTHONCOERCECLOCALE') == b'0' for e in [oldenv, newenv]
- ):
- keys = [b'LC_CTYPE', b'LC_ALL', b'LANG']
- old_keys = [k for k, v in oldenv.items() if k in keys and v]
- new_keys = [k for k, v in newenv.items() if k in keys and v]
- # If the user's environment (from chg) doesn't have ANY of the
- # keys that python looks for, and the environment (from
- # initialization) has ONLY LC_CTYPE and it's set to C.UTF-8,
- # carry it forward.
- if (
- not new_keys
- and old_keys == [b'LC_CTYPE']
- and oldenv[b'LC_CTYPE'] == b'C.UTF-8'
- ):
- newenv[b'LC_CTYPE'] = oldenv[b'LC_CTYPE']
-
encoding.environ.clear()
encoding.environ.update(newenv)
@@ -730,6 +696,11 @@
# environ cleaner.
if b'CHGINTERNALMARK' in encoding.environ:
del encoding.environ[b'CHGINTERNALMARK']
+ if b'CHGORIG_LC_CTYPE' in encoding.environ:
+ encoding.environ[b'LC_CTYPE'] = encoding.environ[b'CHGORIG_LC_CTYPE']
+ del encoding.environ[b'CHGORIG_LC_CTYPE']
+ elif b'CHG_CLEAR_LC_CTYPE' in encoding.environ:
+ del encoding.environ[b'LC_CTYPE']
if repo:
# one chgserver can serve multiple repos. drop repo information
diff --git a/hg b/hg
--- a/hg
+++ b/hg
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
#
# mercurial - scalable distributed SCM
#
diff --git a/contrib/chg/chg.c b/contrib/chg/chg.c
--- a/contrib/chg/chg.c
+++ b/contrib/chg/chg.c
@@ -226,6 +226,16 @@
}
argv[argsize - 1] = NULL;
+ const char *lc_ctype_env = getenv("LC_CTYPE");
+ if (lc_ctype_env == NULL) {
+ if (putenv("CHG_CLEAR_LC_CTYPE=") != 0)
+ abortmsgerrno("failed to putenv CHG_CLEAR_LC_CTYPE");
+ } else {
+ if (setenv("CHGORIG_LC_CTYPE", lc_ctype_env, 1) != 0) {
+ abortmsgerrno("failed to setenv CHGORIG_LC_CTYYPE");
+ }
+ }
+
if (putenv("CHGINTERNALMARK=") != 0)
abortmsgerrno("failed to putenv");
if (execvp(hgcmd, (char **)argv) < 0)
To: spectral, #hg-reviewers
Cc: mjpieters, mercurial-devel
More information about the Mercurial-devel
mailing list