Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (révision 80924) +++ Python/pythonrun.c (copie de travail) @@ -57,6 +57,7 @@ /* Forward */ static void initmain(void); +static void initfsencoding(void); static void initsite(void); static int initstdio(void); static void flush_io(void); @@ -171,9 +172,6 @@ PyThreadState *tstate; PyObject *bimod, *sysmod, *pstderr; char *p; -#if defined(HAVE_LANGINFO_H) && defined(CODESET) - char *codeset; -#endif extern void _Py_ReadyTypes(void); if (initialized) @@ -264,22 +262,8 @@ _PyImportHooks_Init(); -#if defined(HAVE_LANGINFO_H) && defined(CODESET) - /* On Unix, set the file system encoding according to the - user's preference, if the CODESET names a well-known - Python codec, and Py_FileSystemDefaultEncoding isn't - initialized by other means. Also set the encoding of - stdin and stdout if these are terminals. */ + initfsencoding(); - codeset = get_codeset(); - if (codeset) { - if (!Py_FileSystemDefaultEncoding) - Py_FileSystemDefaultEncoding = codeset; - else - free(codeset); - } -#endif - if (install_sigs) initsigs(); /* Signal handling stuff, including initintr() */ @@ -498,7 +482,8 @@ /* reset file system default encoding */ if (!Py_HasFileSystemDefaultEncoding) { free((char*)Py_FileSystemDefaultEncoding); - Py_FileSystemDefaultEncoding = NULL; + Py_FileSystemDefaultEncoding = "ascii"; + Py_HasFileSystemDefaultEncoding = 1; } /* XXX Still allocated: @@ -707,6 +692,39 @@ } } +static void +initfsencoding(void) +{ + PyObject *codec; +#if defined(HAVE_LANGINFO_H) && defined(CODESET) + char *codeset; + + /* On Unix, set the file system encoding according to the + user's preference, if the CODESET names a well-known + Python codec, and Py_FileSystemDefaultEncoding isn't + initialized by other means. Also set the encoding of + stdin and stdout if these are terminals. */ + codeset = get_codeset(); + if (codeset == NULL) { + fprintf(stderr, + "Unable to get the locale encoding: " + "fallback to ascii\n"); + Py_FileSystemDefaultEncoding = "ascii"; + Py_HasFileSystemDefaultEncoding = 1; + } else { + Py_FileSystemDefaultEncoding = codeset; + } +#endif + + codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); + if (!codec) { + Py_FatalError( + "Py_Initialize: unable to load the file system codec"); + } else { + Py_DECREF(codec); + } +} + /* Import the site module (not into __main__ though) */ static void Index: Python/bltinmodule.c =================================================================== --- Python/bltinmodule.c (révision 80924) +++ Python/bltinmodule.c (copie de travail) @@ -9,6 +9,10 @@ #include +#ifdef HAVE_LANGINFO_H +#include /* CODESET */ +#endif + /* The default encoding used by the platform file system APIs Can remain NULL for all platforms that don't have such a concept @@ -21,9 +25,12 @@ #elif defined(__APPLE__) const char *Py_FileSystemDefaultEncoding = "utf-8"; int Py_HasFileSystemDefaultEncoding = 1; +#elif defined(HAVE_LANGINFO_H) && defined(CODESET) +const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ +int Py_HasFileSystemDefaultEncoding = 0; #else -const char *Py_FileSystemDefaultEncoding = NULL; /* use default */ -int Py_HasFileSystemDefaultEncoding = 0; +const char *Py_FileSystemDefaultEncoding = "ascii"; +int Py_HasFileSystemDefaultEncoding = 1; #endif int Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 80924) +++ Objects/unicodeobject.c (copie de travail) @@ -1612,7 +1612,7 @@ "surrogateescape"); } else { - return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); + return PyUnicode_DecodeASCII(s, size, "surrogateescape"); } } Index: Misc/NEWS =================================================================== --- Misc/NEWS (révision 80924) +++ Misc/NEWS (copie de travail) @@ -12,6 +12,10 @@ Core and Builtins ----------------- +- Issue #8610: Load file system codec at startup, and display a fatal error on + failure. Set the file system encoding to ascii if getting the locale encoding + failed, or if nl_langinfo(CODESET) function is missing. + - PyFile_FromFd() uses PyUnicode_DecodeFSDefault() instead of PyUnicode_FromString() to support surrogates in the filename and use the right encoding Index: Doc/library/sys.rst =================================================================== --- Doc/library/sys.rst (révision 80924) +++ Doc/library/sys.rst (copie de travail) @@ -298,15 +298,13 @@ .. function:: getfilesystemencoding() - Return the name of the encoding used to convert Unicode filenames into system - file names, or ``None`` if the system default encoding is used. The result value - depends on the operating system: + Return the name of the encoding used to convert Unicode filenames into + system file names. The result value depends on the operating system: * On Mac OS X, the encoding is ``'utf-8'``. * On Unix, the encoding is the user's preference according to the result of - nl_langinfo(CODESET), or ``None`` if the ``nl_langinfo(CODESET)`` - failed. + nl_langinfo(CODESET), or ``ascii`` if ``nl_langinfo(CODESET)`` failed. * On Windows NT+, file names are Unicode natively, so no conversion is performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as @@ -316,7 +314,11 @@ * On Windows 9x, the encoding is ``'mbcs'``. + .. versionchanged:: 3.2 + On Unix, use ``ascii`` instead of ``None`` if ``nl_langinfo(CODESET)`` + failed. :func:`getfilesystemencoding` result cannot be ``None``. + .. function:: getrefcount(object) Return the reference count of the *object*. The count returned is generally one