Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 58587) +++ Objects/unicodeobject.c (working copy) @@ -1049,29 +1049,55 @@ return NULL; } +static char * +normalize(const char *enc) +{ + register size_t i; + size_t len = strlen(enc); + char *p; + + p = PyMem_Malloc(len + 1); + if (p == NULL) + return NULL; + for (i = 0; i < len; i++) { + register char ch = enc[i]; + if (ch == ' ') + ch = '-'; + else + ch = tolower(Py_CHARMASK(ch)); + } + p[i] = '\0'; + return p; +} + PyObject *PyUnicode_Decode(const char *s, - Py_ssize_t size, - const char *encoding, - const char *errors) + Py_ssize_t size, + const char *encoding, + const char *errors) { PyObject *buffer = NULL, *unicode; Py_buffer info; + char *enc; if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); + encoding = PyUnicode_GetDefaultEncoding(); + enc = normalize(encoding); + /* Shortcuts for common default encodings */ - if (strcmp(encoding, "utf-8") == 0) + if (strcmp(enc, "utf-8") == 0) return PyUnicode_DecodeUTF8(s, size, errors); - else if (strcmp(encoding, "latin-1") == 0) + else if (strcmp(enc, "latin-1") == 0) return PyUnicode_DecodeLatin1(s, size, errors); #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) - else if (strcmp(encoding, "mbcs") == 0) + else if (strcmp(enc, "mbcs") == 0) return PyUnicode_DecodeMBCS(s, size, errors); #endif - else if (strcmp(encoding, "ascii") == 0) + else if (strcmp(enc, "ascii") == 0) return PyUnicode_DecodeASCII(s, size, errors); + PyMem_Free(enc); + /* Decode via the codec registry */ buffer = NULL; if (PyBuffer_FillInfo(&info, (void *)s, size, 1, PyBUF_SIMPLE) < 0) Index: Lib/test/regrtest.py =================================================================== --- Lib/test/regrtest.py (revision 58587) +++ Lib/test/regrtest.py (working copy) @@ -1119,6 +1119,15 @@ if not os.path.supports_unicode_filenames: self.expected.add('test_pep277') + # doctest, profile and cProfile tests fail when the encoding + # of the filesystem is not built-in, because of the extra calls + # to the codecs module. + builtin_enc = ("utf-8", "latin-1", "ascii", "mbcs") + if sys.getfilesystemencoding().lower() not in builtin_enc: + self.expected.add('test_profile') + self.expected.add('test_cProfile') + self.expected.add('test_doctest') + try: from test import test_socket_ssl except ImportError: