diff --git a/Include/fileutils.h b/Include/fileutils.h --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -6,6 +6,7 @@ extern "C" { #endif PyAPI_FUNC(PyObject *) _Py_device_encoding(int); +PyAPI_FUNC(PyObject *) _Py_locale_encoding(void); PyAPI_FUNC(wchar_t *) _Py_char2wchar( const char *arg, diff --git a/Python/fileutils.c b/Python/fileutils.c --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -32,11 +32,56 @@ extern wchar_t* _Py_DecodeUTF8_surrogate int _Py_open_cloexec_works = -1; #endif +static PyObject * +get_codec_name(const char *encoding) +{ + /* Normalize codec name so that e.g. 'ANSI_X3.4-1968' becomes + 'ascii'. */ + PyObject *codec, *name; + _Py_IDENTIFIER(name); + codec = _PyCodec_Lookup(encoding); + if (!codec) + return NULL; + name = _PyObject_GetAttrId(codec, &PyId_name); + Py_DECREF(codec); + if (!name) + return NULL; + if (!PyUnicode_Check(name)) { + Py_DECREF(name); + PyErr_SetString(PyExc_TypeError, + "'name' attribute should be a str object"); + return NULL; + } + /* Issue #19846: 'ascii' gets turned into 'utf-8' */ + if (PyUnicode_CompareWithASCIIString(name, "ascii")) + return name; + else { + Py_DECREF(name); + return PyUnicode_FromString("utf-8"); + } +} + +PyObject * +_Py_locale_encoding() +{ +#ifdef MS_WINDOWS + char codepage[100]; + PyOS_snprintf(codepage, sizeof(codepage), "cp%d", GetACP()); + return get_codec_name(codepage); +#elif defined(HAVE_LANGINFO_H) && defined(CODESET) + char *codeset = nl_langinfo(CODESET); + if (codeset != NULL && codeset[0] != 0) + return get_codec_name(codeset); +#endif + Py_RETURN_NONE; +} + PyObject * _Py_device_encoding(int fd) { #if defined(MS_WINDOWS) UINT cp; + char codepage[100]; #endif if (!_PyVerify_fd(fd) || !isatty(fd)) { Py_RETURN_NONE; @@ -50,16 +95,12 @@ PyObject * cp = 0; /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application has no console */ - if (cp != 0) - return PyUnicode_FromFormat("cp%u", (unsigned int)cp); -#elif defined(CODESET) - { - char *codeset = nl_langinfo(CODESET); - if (codeset != NULL && codeset[0] != 0) - return PyUnicode_FromString(codeset); + if (cp != 0) { + PyOS_snprintf(codepage, sizeof(codepage), "cp%d", cp); + return get_codec_name(codepage); } #endif - Py_RETURN_NONE; + return _Py_locale_encoding(); } #if !defined(__APPLE__) && !defined(MS_WINDOWS) diff --git a/Python/pythonrun.c b/Python/pythonrun.c --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -216,55 +216,32 @@ add_flag(int flag, const char *envs) } static char* -get_codec_name(const char *encoding) +get_locale_encoding(void) { - char *name_utf8, *name_str; - PyObject *codec, *name = NULL; - - codec = _PyCodec_Lookup(encoding); - if (!codec) - goto error; - - name = _PyObject_GetAttrId(codec, &PyId_name); - Py_CLEAR(codec); - if (!name) - goto error; - - name_utf8 = _PyUnicode_AsString(name); - if (name_utf8 == NULL) - goto error; - name_str = _PyMem_RawStrdup(name_utf8); - Py_DECREF(name); - if (name_str == NULL) { - PyErr_NoMemory(); + PyObject *name = _Py_locale_encoding(); + if (name == NULL) + return NULL; + else if (name == Py_None) { + Py_DECREF(name); + PyErr_SetString(PyExc_NotImplementedError, + "could not determine locale encoding"); return NULL; } - return name_str; - -error: - Py_XDECREF(codec); - Py_XDECREF(name); - return NULL; -} - -static char* -get_locale_encoding(void) -{ -#ifdef MS_WINDOWS - char codepage[100]; - PyOS_snprintf(codepage, sizeof(codepage), "cp%d", GetACP()); - return get_codec_name(codepage); -#elif defined(HAVE_LANGINFO_H) && defined(CODESET) - char* codeset = nl_langinfo(CODESET); - if (!codeset || codeset[0] == '\0') { - PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty"); - return NULL; + else { + char *name_utf8, *name_str; + name_utf8 = _PyUnicode_AsString(name); + if (name_utf8 == NULL) { + Py_DECREF(name); + return NULL; + } + name_str = _PyMem_RawStrdup(name_utf8); + Py_DECREF(name); + if (name_str == NULL) { + PyErr_NoMemory(); + return NULL; + } + return name_str; } - return get_codec_name(codeset); -#else - PyErr_SetNone(PyExc_NotImplementedError); - return NULL; -#endif } static void