Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (révision 84163) +++ Python/pythonrun.c (copie de travail) @@ -134,18 +134,13 @@ return flag; } -#if defined(HAVE_LANGINFO_H) && defined(CODESET) static char* -get_codeset(void) +get_codec_name(const char *encoding) { - char* codeset, *name_str; + char *name_utf8, *name_str; PyObject *codec, *name = NULL; - codeset = nl_langinfo(CODESET); - if (!codeset || codeset[0] == '\0') - return NULL; - - codec = _PyCodec_Lookup(codeset); + codec = _PyCodec_Lookup(encoding); if (!codec) goto error; @@ -154,18 +149,34 @@ if (!name) goto error; - name_str = _PyUnicode_AsString(name); + name_utf8 = _PyUnicode_AsString(name); if (name == NULL) goto error; - codeset = strdup(name_str); + name_str = strdup(name_utf8); Py_DECREF(name); - return codeset; + if (name_str == NULL) { + PyErr_NoMemory(); + return NULL; + } + return name_str; error: Py_XDECREF(codec); Py_XDECREF(name); return NULL; } + +#if defined(HAVE_LANGINFO_H) && defined(CODESET) +static char* +get_codeset(void) +{ + char* codeset = nl_langinfo(CODESET); + if (!codeset || codeset[0] == '\0') { + PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty"); + return NULL; + } + return get_codec_name(codeset); +} #endif void @@ -706,25 +717,35 @@ { PyObject *codec; #if defined(HAVE_LANGINFO_H) && defined(CODESET) - char *codeset; + char *codeset = NULL; if (Py_FileSystemDefaultEncoding == NULL) { - /* On Unix, set the file system encoding according to the - user's preference, if the CODESET names a well-known - Python codec, and Py_FileSystemDefaultEncoding isn't - initialized by other means. Also set the encoding of - stdin and stdout if these are terminals. */ - codeset = get_codeset(); + const char *env_encoding = Py_GETENV("PYTHONFSENCODING"); + if (env_encoding != NULL) { + codeset = get_codec_name(env_encoding); + if (!codeset) { + fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n"); + PyErr_Print(); + } + } + if (!codeset) { + /* On Unix, set the file system encoding according to the + user's preference, if the CODESET names a well-known + Python codec, and Py_FileSystemDefaultEncoding isn't + initialized by other means. Also set the encoding of + stdin and stdout if these are terminals. */ + codeset = get_codeset(); + } if (codeset != NULL) { Py_FileSystemDefaultEncoding = codeset; Py_HasFileSystemDefaultEncoding = 0; return; + } else { + fprintf(stderr, "Unable to get the locale encoding:\n"); + PyErr_Print(); } - PyErr_Clear(); - fprintf(stderr, - "Unable to get the locale encoding: " - "fallback to utf-8\n"); + fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n"); Py_FileSystemDefaultEncoding = "utf-8"; Py_HasFileSystemDefaultEncoding = 1; } Index: Doc/using/cmdline.rst =================================================================== --- Doc/using/cmdline.rst (révision 84162) +++ Doc/using/cmdline.rst (copie de travail) @@ -442,11 +442,20 @@ import of source modules. +.. envvar:: PYTHONFSENCODING + + If this is set before running the intepreter, it overrides the encoding used + for the filesystem encoding (see :func:`sys.getfilesystemencoding`). + + .. versionadded:: 3.2 + + .. envvar:: PYTHONIOENCODING - Overrides the encoding used for stdin/stdout/stderr, in the syntax - ``encodingname:errorhandler``. The ``:errorhandler`` part is optional and - has the same meaning as in :func:`str.encode`. + If this is set before running the intepreter, it overrides the encoding used + for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The + ``:errorhandler`` part is optional and has the same meaning as in + :func:`str.encode`. For stderr, the ``:errorhandler`` part is ignored; the handler will always be ``'backslashreplace'``. Index: Doc/whatsnew/3.2.rst =================================================================== --- Doc/whatsnew/3.2.rst (révision 84162) +++ Doc/whatsnew/3.2.rst (copie de travail) @@ -232,6 +232,15 @@ * Stub + +Unicode +======= + +The filesystem encoding can be specified by setting the +:envvar:`PYTHONFSENCODING` environment variable before running the intepreter. +The value should be a string in the form ````, e.g. ``utf-8``. + + IDLE ==== Index: Lib/test/test_pep277.py =================================================================== --- Lib/test/test_pep277.py (révision 84162) +++ Lib/test/test_pep277.py (copie de travail) @@ -43,7 +43,7 @@ # Is it Unicode-friendly? if not os.path.supports_unicode_filenames: - fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + fsencoding = sys.getfilesystemencoding() try: for name in filenames: name.encode(fsencoding) Index: Lib/test/test_sys.py =================================================================== --- Lib/test/test_sys.py (révision 84162) +++ Lib/test/test_sys.py (copie de travail) @@ -863,16 +863,24 @@ def test_getfilesystemencoding(self): import codecs - def check_fsencoding(fs_encoding): + def check_fsencoding(fs_encoding, expected=None): self.assertIsNotNone(fs_encoding) if sys.platform == 'darwin': self.assertEqual(fs_encoding, 'utf-8') codecs.lookup(fs_encoding) + if expected: + self.assertEqual(fs_encoding, expected) fs_encoding = sys.getfilesystemencoding() check_fsencoding(fs_encoding) - # Even in C locale + def get_fsencoding(env): + output = subprocess.check_output( + [sys.executable, "-c", + "import sys; print(sys.getfilesystemencoding())"], + env=env) + return output.rstrip().decode('ascii') + try: sys.executable.encode('ascii') except UnicodeEncodeError: @@ -880,15 +888,23 @@ # see issue #8611 pass else: + # Even in C locale env = os.environ.copy() env['LANG'] = 'C' - output = subprocess.check_output( - [sys.executable, "-c", - "import sys; print(sys.getfilesystemencoding())"], - env=env) - fs_encoding = output.rstrip().decode('ascii') - check_fsencoding(fs_encoding) + try: + del env['PYTHONFSENCODING'] + except KeyError: + pass + check_fsencoding(get_fsencoding(env), 'ascii') + # Filesystem encoding is hardcoded on Windows and Mac OS X + if sys.platform not in ('win32', 'darwin'): + for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'): + env = os.environ.copy() + env['PYTHONFSENCODING'] = encoding + check_fsencoding(get_fsencoding(env), encoding) + + def test_setfilesystemencoding(self): old = sys.getfilesystemencoding() try: Index: Modules/main.c =================================================================== --- Modules/main.c (révision 84162) +++ Modules/main.c (copie de travail) @@ -99,6 +99,7 @@ The default module search path uses %s.\n\ PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\ PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\ +PYTHONFSENCODING: Encoding used for the filesystem.\n\ "; FILE *