diff -r 9cb1366b251b Lib/test/test_cmd_line.py --- a/Lib/test/test_cmd_line.py Sun Nov 11 20:11:15 2012 +0100 +++ b/Lib/test/test_cmd_line.py Sun Nov 11 23:23:07 2012 +0000 @@ -110,35 +110,28 @@ class CmdLineTest(unittest.TestCase): # arguments as unicode (using wmain() instead of main()). @unittest.skipIf(sys.platform == 'win32', 'Windows has a native unicode API') + @unittest.skipUnless(test.support.TESTFN_UNDECODABLE, + 'need support.TESTFN_UNDECODABLE') def test_undecodable_code(self): - undecodable = b"\xff" + undecodable = test.support.TESTFN_UNDECODABLE env = os.environ.copy() - # Use C locale to get ascii for the locale encoding env['LC_ALL'] = 'C' code = ( - b'import locale; ' + b'import locale, sys; ' b'print(ascii("' + undecodable + b'"), ' - b'locale.getpreferredencoding())') + b'locale.getpreferredencoding(), ' + b'sys.getfilesystemencoding())') p = subprocess.Popen( [sys.executable, "-c", code], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) stdout, stderr = p.communicate() - if p.returncode == 1: - # _Py_char2wchar() decoded b'\xff' as '\udcff' (b'\xff' is not - # decodable from ASCII) and run_command() failed on - # PyUnicode_AsUTF8String(). This is the expected behaviour on - # Linux. - pattern = b"Unable to decode the command from the command line:" - elif p.returncode == 0: - # _Py_char2wchar() decoded b'\xff' as '\xff' even if the locale is - # C and the locale encoding is ASCII. It occurs on FreeBSD, Solaris - # and Mac OS X. - pattern = b"'\\xff' " - # The output is followed by the encoding name, an alias to ASCII. - # Examples: "US-ASCII" or "646" (ISO 646, on Solaris). - else: - raise AssertionError("Unknown exit code: %s, output=%a" % (p.returncode, stdout)) + + # _Py_char2wchar() decoded b'\xff' as '\udcff' (b'\xff' is not + # decodable from ASCII) and run_command() failed on + # PyUnicode_AsUTF8String(). This is the expected behaviour on + # Linux. + pattern = b"Unable to decode the command from the command line:" if not stdout.startswith(pattern): raise AssertionError("%a doesn't start with %a" % (stdout, pattern)) diff -r 9cb1366b251b Lib/test/test_cmd_line_script.py --- a/Lib/test/test_cmd_line_script.py Sun Nov 11 20:11:15 2012 +0100 +++ b/Lib/test/test_cmd_line_script.py Sun Nov 11 23:23:07 2012 +0000 @@ -367,11 +367,10 @@ class CmdLineTest(unittest.TestCase): # Mac OS X denies the creation of a file with an invalid UTF-8 name. # Windows allows to create a name with an arbitrary bytes name, but # Python cannot a undecodable bytes argument to a subprocess. - #if (support.TESTFN_UNDECODABLE - #and sys.platform not in ('win32', 'darwin')): - # name = os.fsdecode(support.TESTFN_UNDECODABLE) - #elif support.TESTFN_NONASCII: - if support.TESTFN_NONASCII: + if (support.TESTFN_UNDECODABLE + and sys.platform not in ('win32', 'darwin')): + name = os.fsdecode(support.TESTFN_UNDECODABLE) + elif support.TESTFN_NONASCII: name = support.TESTFN_NONASCII else: self.skipTest("need support.TESTFN_NONASCII") diff -r 9cb1366b251b Modules/_localemodule.c --- a/Modules/_localemodule.c Sun Nov 11 20:11:15 2012 +0100 +++ b/Modules/_localemodule.c Sun Nov 11 23:23:07 2012 +0000 @@ -425,25 +425,52 @@ PyDoc_STRVAR(nl_langinfo__doc__, "nl_langinfo(key) -> string\n" "Return the value for the locale information associated with key."); +extern char* _Py_GetLocaleEncoding(void); + static PyObject* PyLocale_nl_langinfo(PyObject* self, PyObject* args) { - int item, i; + int item, i, valid; + const char *result; + if (!PyArg_ParseTuple(args, "i:nl_langinfo", &item)) return NULL; + /* Check whether this is a supported constant. GNU libc sometimes returns numeric values in the char* return value, which would crash PyUnicode_FromString. */ - for (i = 0; langinfo_constants[i].name; i++) + valid = 0; + for (i = 0; langinfo_constants[i].name; i++) { if (langinfo_constants[i].value == item) { - /* Check NULL as a workaround for GNU libc's returning NULL - instead of an empty string for nl_langinfo(ERA). */ - const char *result = nl_langinfo(item); - result = result != NULL ? result : ""; - return PyUnicode_DecodeLocale(result, NULL); + valid = 1; + break; } - PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); - return NULL; + } + if (!valid) { + PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); + return NULL; + } + +#ifdef CODESET + if (item == CODESET) { + PyObject *str; + result = _Py_GetLocaleEncoding(); + if (result == NULL) + return NULL; + str = PyUnicode_DecodeASCII(result, strlen(result), NULL); + free(result); + return str; + } + else +#endif + { + result = nl_langinfo(item); + /* Check NULL as a workaround for GNU libc's returning NULL + instead of an empty string for nl_langinfo(ERA). */ + if (result == NULL) + result = ""; + return PyUnicode_DecodeLocale(result, NULL); + } } #endif /* HAVE_LANGINFO_H */ diff -r 9cb1366b251b Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Nov 11 20:11:15 2012 +0100 +++ b/Objects/unicodeobject.c Sun Nov 11 23:23:07 2012 +0000 @@ -2914,6 +2914,7 @@ PyUnicode_Decode(const char *s, return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); else if ((strcmp(lower, "latin-1") == 0) || (strcmp(lower, "latin1") == 0) || + (strcmp(lower, "iso8859-1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) return PyUnicode_DecodeLatin1(s, size, errors); #ifdef HAVE_MBCS @@ -3284,6 +3285,7 @@ PyUnicode_AsEncodedString(PyObject *unic } else if ((strcmp(lower, "latin-1") == 0) || (strcmp(lower, "latin1") == 0) || + (strcmp(lower, "iso8859-1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) return _PyUnicode_AsLatin1String(unicode, errors); #ifdef HAVE_MBCS diff -r 9cb1366b251b Python/pythonrun.c --- a/Python/pythonrun.c Sun Nov 11 20:11:15 2012 +0100 +++ b/Python/pythonrun.c Sun Nov 11 23:23:07 2012 +0000 @@ -170,20 +170,64 @@ error: return NULL; } -static char* -get_locale_encoding(void) +char* +_Py_GetLocaleEncoding(void) { #ifdef MS_WINDOWS char codepage[100]; PyOS_snprintf(codepage, sizeof(codepage), "cp%d", GetACP()); return get_codec_name(codepage); #elif defined(HAVE_LANGINFO_H) && defined(CODESET) - char* codeset = nl_langinfo(CODESET); + char *codeset, *encoding, *loc; + wchar_t wch; + size_t res; + + codeset = nl_langinfo(CODESET); if (!codeset || codeset[0] == '\0') { PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty"); return NULL; } - return get_codec_name(codeset); + + encoding = get_codec_name(codeset); + if (encoding == NULL) + return NULL; + + if (strcmp(encoding, "ascii") != 0) + return encoding; + + loc = setlocale(LC_CTYPE, NULL); + if (loc == NULL || strcmp(loc, "C") != 0) + return encoding; + + /* the locale is not set and nl_langinfo(CODESET) returns "ASCII" + (or an alias of the ASCII encoding). Check if the locale encoding + is really ASCII. */ + + res = mbstowcs(&wch, "\xe9", 1); + if (res == (size_t)-1) { + /* decoding a non-ASCII from the locale encoding failed: + nothing to do, the encoding is really ASCII */ + return encoding; + } + + if (wch != 0xe9) { + PyErr_Format(PyExc_ValueError, + "unknown locale encoding: " + "b'\\xe9' decoded from the locale encoding as U+%04x", + (unsigned int)wch); + return NULL; + } + else { + /* b'\xe9' is decoded from the locale encoding as U+00E9: + the locale encoding is ISO-8859-1, not ASCII */ + free(encoding); + encoding = strdup("iso8859-1"); + if (encoding == NULL) { + PyErr_NoMemory(); + return NULL; + } + return encoding; + } #else PyErr_SetNone(PyExc_NotImplementedError); return NULL; @@ -868,7 +912,7 @@ initfsencoding(PyInterpreterState *inter if (Py_FileSystemDefaultEncoding == NULL) { - Py_FileSystemDefaultEncoding = get_locale_encoding(); + Py_FileSystemDefaultEncoding = _Py_GetLocaleEncoding(); if (Py_FileSystemDefaultEncoding == NULL) Py_FatalError("Py_Initialize: Unable to get the locale encoding");