diff -r b65ae19bc42a Lib/test/support/script_helper.py --- a/Lib/test/support/script_helper.py Wed Dec 14 11:52:28 2016 +0100 +++ b/Lib/test/support/script_helper.py Sun Jan 08 12:17:19 2017 +1000 @@ -51,8 +51,35 @@ return __cached_interp_requires_environment -_PythonRunResult = collections.namedtuple("_PythonRunResult", - ("rc", "out", "err")) +class _PythonRunResult(collections.namedtuple("_PythonRunResult", + ("rc", "out", "err"))): + """Helper for reporting Python subprocess run results""" + def fail(self, cmd_line): + """Provide helpful details about failed subcommand runs""" + # Limit to 80 lines to ASCII characters + maxlen = 80 * 100 + out, err = res.out, res.err + if len(out) > maxlen: + out = b'(... truncated stdout ...)' + out[-maxlen:] + if len(err) > maxlen: + err = b'(... truncated stderr ...)' + err[-maxlen:] + out = out.decode('ascii', 'replace').rstrip() + err = err.decode('ascii', 'replace').rstrip() + raise AssertionError("Process return code is %d\n" + "command line: %r\n" + "\n" + "stdout:\n" + "---\n" + "%s\n" + "---\n" + "\n" + "stderr:\n" + "---\n" + "%s\n" + "---" + % (res.rc, cmd_line, + out, + err)) # Executing the interpreter in a subprocess @@ -99,30 +126,7 @@ def _assert_python(expected_success, *args, **env_vars): res, cmd_line = run_python_until_end(*args, **env_vars) if (res.rc and expected_success) or (not res.rc and not expected_success): - # Limit to 80 lines to ASCII characters - maxlen = 80 * 100 - out, err = res.out, res.err - if len(out) > maxlen: - out = b'(... truncated stdout ...)' + out[-maxlen:] - if len(err) > maxlen: - err = b'(... truncated stderr ...)' + err[-maxlen:] - out = out.decode('ascii', 'replace').rstrip() - err = err.decode('ascii', 'replace').rstrip() - raise AssertionError("Process return code is %d\n" - "command line: %r\n" - "\n" - "stdout:\n" - "---\n" - "%s\n" - "---\n" - "\n" - "stderr:\n" - "---\n" - "%s\n" - "---" - % (res.rc, cmd_line, - out, - err)) + res.fail(cmd_line) return res def assert_python_ok(*args, **env_vars): diff -r b65ae19bc42a Lib/test/test_locale_override.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_locale_override.py Sun Jan 08 12:17:19 2017 +1000 @@ -0,0 +1,149 @@ +# Tests the attempted automatic coercion of the C locale to C.UTF-8 + +import unittest +import sys +import shutil +import subprocess +import test.support +from test.support.script_helper import ( + run_python_until_end, + interpreter_requires_environment, +) + +# In addition to the general concept of replacing the C locale with C.UTF-8, +# many of the test specifics are drawn from the Py3 locale handling code in +# click: https://github.com/pallets/click/blob/master/click/_unicodefun.py + +def have_locale_command(): + """Check we have access to a 'locale' command""" + return shutil.which("locale") is not None + +C_UTF8_LOCALES = ("c.utf8", "c.utf-8") + +def have_c_utf8_locale(): + """Check the system actually has a C.UTF-8 locale to use""" + locale_command = shutil.which("locale") + result = subprocess.run([locale_command, "-a"], stdout=subprocess.PIPE) + if result.returncode: + return False # Something went wrong with the locale call + locale_data = result.stdout.decode("ascii", "replace").splitlines() + return any(entry.lower() in C_UTF8_LOCALES for entry in locale_data) + +# Details of the warnings emitted at runtime +CLI_C_LOCALE_COERCION_WARNING = ( + "Python detected LC_CTYPE=C, forcing LC_ALL & LANG to C.UTF-8 " + "(set PYTHONALLOWCLOCALE to disable this locale coercion behaviour)." +) + +LIBRARY_C_LOCALE_WARNING = ( + "Py_Initialize detected LC_CTYPE=C, which limits Unicode compatibility. " + "Some libraries and operating system interfaces may not work correctly. " + "Set `PYTHONALLOWCLOCALE=1 LC_CTYPE=C` to configure a similar " + "environment when running Python directly." +) + +@test.support.cpython_only +@unittest.skipUnless(have_locale_command(), "No locale command available") +@unittest.skipUnless(have_c_utf8_locale(), "No C.UTF-8 locale available") +class LocaleOverrideTest(unittest.TestCase): + + def _get_child_fsencoding(self, env_vars): + """Retrieves sys.getfilesystemencoding() from a child process + + Returns (fsencoding, stderr_lines): + + - fsencoding: a lowercase str value with the child's fsencoding + - stderr_lines: result of calling splitlines() on the stderr output + + The child is run in isolated mode if the current interpreter supports + that. + """ + cmd = "import sys; print(sys.getfilesystemencoding().lower())" + result, py_cmd = run_python_until_end( + "-c", cmd, + __isolated=True, + **env_vars + ) + if not result.rc == 0: + result.fail(py_cmd) + # All subprocess outputs in this test case should be pure ASCII + child_fsencoding = result.out.decode("ascii").rstrip() + child_stderr_lines = result.err.decode("ascii").rstrip().splitlines() + return child_fsencoding, child_stderr_lines + + + def test_C_utf8_locale(self): + # Ensure the C.UTF-8 locale is accepted entirely without complaint + base_var_dict = { + "LANG": "", + "LC_CTYPE": "", + "LC_ALL": "", + } + for env_var in ("LC_ALL", "LC_CTYPE", "LANG"): + with self.subTest(env_var=env_var): + var_dict = base_var_dict.copy() + var_dict[env_var] = "C.UTF-8" + fsencoding, stderr_lines = self._get_child_fsencoding(var_dict) + self.assertEqual(fsencoding, "utf-8") + self.assertFalse(stderr_lines) + + + def _check_c_locale_coercion(self, expected_fsencoding, allow_c_locale): + """Check the handling of the C locale for various configurations + + Parameters: + expected_fsencoding: the encoding the child is expected to report + allow_c_locale: setting to use for PYTHONALLOWCLOCALE + None: don't set the variable at all + str: the value set in the child's environment + """ + if allow_c_locale: + # Check the library emits a warning + expected_warning = [ + LIBRARY_C_LOCALE_WARNING, + ] + else: + # Check C locale is coerced to C.UTF-8 with a warning on stderr + expected_warning = [ + CLI_C_LOCALE_COERCION_WARNING, + ] + base_var_dict = { + "LANG": "", + "LC_CTYPE": "", + "LC_ALL": "", + } + for env_var in ("LC_ALL", "LC_CTYPE", "LANG"): + for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): + with self.subTest(env_var=env_var, + nominal_locale=locale_to_set, + PYTHONALLOWCLOCALE=allow_c_locale): + var_dict = base_var_dict.copy() + var_dict[env_var] = locale_to_set + if allow_c_locale is not None: + var_dict["PYTHONALLOWCLOCALE"] = allow_c_locale + fsencoding, stderr_lines = self._get_child_fsencoding(var_dict) + self.assertEqual(fsencoding, expected_fsencoding) + self.assertEqual(stderr_lines, expected_warning) + + + def test_C_locale(self): + # This should coerce to the C.UTF-8 locale + self._check_c_locale_coercion("utf-8", allow_c_locale=None) + + def test_PYTHONALLOWCLOCALE_empty(self): + # This should coerce to the C.UTF-8 locale + self._check_c_locale_coercion("utf-8", allow_c_locale="") + + def test_PYTHONALLOWCLOCALE_set(self): + # *Any* non-empty string is considered "set" for our purposes + # and hence should result in the locale coercion being disabled + for setting in ("1", "0", "true", "false"): + self._check_c_locale_coercion("ascii", allow_c_locale=setting) + + +def test_main(): + test.support.run_unittest(LocaleOverrideTest) + test.support.reap_children() + +if __name__ == "__main__": + test_main() diff -r b65ae19bc42a Programs/python.c --- a/Programs/python.c Wed Dec 14 11:52:28 2016 +0100 +++ b/Programs/python.c Sun Jan 08 12:17:19 2017 +1000 @@ -15,6 +15,10 @@ } #else +static const char *_CLI_C_LOCALE_COERCION_WARNING = + "Python detected LC_CTYPE=C, forcing LC_ALL & LANG to C.UTF-8 " + "(set PYTHONALLOWCLOCALE to disable this locale coercion behaviour).\n"; + int main(int argc, char **argv) { @@ -23,6 +27,7 @@ wchar_t **argv_copy2; int i, res; char *oldloc; + const char *ctype_loc; /* Force malloc() allocator to bootstrap Python */ (void)_PyMem_SetupAllocators("malloc"); @@ -49,7 +54,41 @@ return 1; } + /* Reconfigure the locale to the default for this process */ setlocale(LC_ALL, ""); + + /* When the LC_CTYPE category still claims to be using the C locale, + assume that's a configuration error and request C.UTF-8 instead. */ + ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL && strcmp(ctype_loc, "C") == 0) { + const char *allow_c_locale = getenv("PYTHONALLOWCLOCALE"); + /* We ignore the Python -E and -I flags here, as we need to sort out + * the locale settings *before* we try to do anything with the command + * line arguments. For cross-platform debugging purposes, we also need + * to give end users a way to force even scripts that are otherwise + * isolated from their environment to use the legacy ASCII-centric C + * locale. + */ + if (allow_c_locale == NULL || *allow_c_locale == '\0') { + /* PYTHONALLOWCLOCALE is not set, or is set to the empty string */ + fprintf(stderr, _CLI_C_LOCALE_COERCION_WARNING); + if (setenv("LC_ALL", "C.UTF-8", 1)) { + fprintf(stderr, "Failed to set LC_ALL\n"); + return 1; + } + if (setenv("LANG", "C.UTF-8", 1)) { + fprintf(stderr, "Failed to set LANG\n"); + return 1; + } + + /* Reconfigure with the overridden environment variables */ + setlocale(LC_ALL, ""); + } else { + /* No C locale warning here, as Py_Initialize will emit one later */ + } + } + + /* Convert from char to wchar_t based on the locale settings */ for (i = 0; i < argc; i++) { argv_copy[i] = Py_DecodeLocale(argv[i], NULL); if (!argv_copy[i]) { diff -r b65ae19bc42a Python/pylifecycle.c --- a/Python/pylifecycle.c Wed Dec 14 11:52:28 2016 +0100 +++ b/Python/pylifecycle.c Sun Jan 08 12:17:19 2017 +1000 @@ -301,6 +301,21 @@ } +static const char *_C_LOCALE_WARNING = + "Py_Initialize detected LC_CTYPE=C, which limits Unicode compatibility. " + "Some libraries and operating system interfaces may not work correctly. " + "Set `PYTHONALLOWCLOCALE=1 LC_CTYPE=C` to configure a similar " + "environment when running Python directly.\n"; + +static void +_emit_stderr_warning_for_c_locale(void) +{ + const char *ctype_loc = setlocale(LC_CTYPE, NULL); + if (ctype_loc != NULL && strcmp(ctype_loc, "C") == 0) { + fprintf(stderr, _C_LOCALE_WARNING); + } +} + void _Py_InitializeEx_Private(int install_sigs, int install_importlib) { @@ -320,6 +335,7 @@ the locale's charset without having to switch locales. */ setlocale(LC_CTYPE, ""); + _emit_stderr_warning_for_c_locale(); #endif if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')