diff -r 7444ac6d93c3 Doc/library/functions.rst --- a/Doc/library/functions.rst Thu Apr 09 22:29:52 2015 +0200 +++ b/Doc/library/functions.rst Sat Apr 18 16:49:30 2015 +0200 @@ -1070,6 +1070,11 @@ are always available. They are listed h exception, the function now retries the system call instead of raising an :exc:`InterruptedError` exception (see :pep:`475` for the rationale). + .. versionchanged:: 3.5 + When the :py:data:`locale.LC_CTYPE` locale is the POSIX locale (``'C'`` + locale) at Python startup, the ``'surrogateescape'`` error handler is + used by default (if *errors* is ``None``), instead of ``'strict'``. + .. function:: ord(c) diff -r 7444ac6d93c3 Doc/library/io.rst --- a/Doc/library/io.rst Thu Apr 09 22:29:52 2015 +0200 +++ b/Doc/library/io.rst Sat Apr 18 16:49:30 2015 +0200 @@ -872,6 +872,11 @@ Text I/O locale encoding using :func:`locale.setlocale`, use the current locale encoding instead of the user preferred encoding. + .. versionchanged:: 3.5 + When the :py:data:`locale.LC_CTYPE` locale is the POSIX locale (``'C'`` + locale) at Python startup, the ``'surrogateescape'`` error handler is + used by default (if *errors* is ``None``), instead of ``'strict'``. + :class:`TextIOWrapper` provides one attribute in addition to those of :class:`TextIOBase` and its parents: diff -r 7444ac6d93c3 Doc/library/sys.rst --- a/Doc/library/sys.rst Thu Apr 09 22:29:52 2015 +0200 +++ b/Doc/library/sys.rst Sat Apr 18 16:49:30 2015 +0200 @@ -441,6 +441,14 @@ always available. .. versionchanged:: 3.2 :func:`getfilesystemencoding` result cannot be ``None`` anymore. +.. function:: getdefaulterrorhandler() + + Return the name of the default error handler: ``'surrogateescape'`` if the + :py:data:`locale.LC_CTYPE` locale is the POSIX locale (``'C'`` locale) at + Python startup, ``'strict'`` otherwise. + + .. versionadded:: 3.5 + .. function:: getrefcount(object) diff -r 7444ac6d93c3 Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Thu Apr 09 22:29:52 2015 +0200 +++ b/Doc/whatsnew/3.5.rst Sat Apr 18 16:49:30 2015 +0200 @@ -83,10 +83,10 @@ New built-in features: Implementation improvements: -* When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), - :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the - ``surrogateescape`` error handler, instead of the ``strict`` error handler - (:issue:`19977`). +* When the :py:data:`locale.LC_CTYPE` locale is the POSIX locale (``'C'`` + locale) at Python startup, :py:data:`sys.stdin`, :py:data:`sys.stdout` and + :func:`open` are now using the ``'surrogateescape'`` error handler by + default, instead of ``'strict'`` (:issue:`19977`, :issue:`23993`). Significantly Improved Library Modules: @@ -459,6 +459,13 @@ socket :meth:`socket.socket.send`. (Contributed by Giampaolo Rodola' in :issue:`17552`.) +sys +--- + +* New :func:`sys.getdefaulterrorhandler` function to get the default error + handler: ``'surrogateescape'`` if the :py:data:`locale.LC_CTYPE` locale is the + POSIX locale (``'C'`` locale) at Python startup, ``'strict'`` otherwise. + sysconfig --------- diff -r 7444ac6d93c3 Include/fileobject.h --- a/Include/fileobject.h Thu Apr 09 22:29:52 2015 +0200 +++ b/Include/fileobject.h Sat Apr 18 16:49:30 2015 +0200 @@ -24,6 +24,7 @@ PyAPI_FUNC(char *) Py_UniversalNewlineFg */ PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; PyAPI_DATA(int) Py_HasFileSystemDefaultEncoding; +PyAPI_DATA(const char*) Py_DefaultErrorHandler; /* Internal API diff -r 7444ac6d93c3 Lib/_pyio.py --- a/Lib/_pyio.py Thu Apr 09 22:29:52 2015 +0200 +++ b/Lib/_pyio.py Sat Apr 18 16:49:30 2015 +0200 @@ -2,11 +2,12 @@ Python implementation of the io module. """ -import os import abc +import array import codecs import errno -import array +import os +import sys # Import _thread instead of threading to reduce startup cost try: from _thread import allocate_lock as Lock @@ -1589,7 +1590,7 @@ class TextIOWrapper(TextIOBase): raise LookupError(msg % encoding) if errors is None: - errors = "strict" + errors = sys.getdefaulterrorhandler() else: if not isinstance(errors, str): raise ValueError("invalid errors: %r" % errors) diff -r 7444ac6d93c3 Lib/os.py --- a/Lib/os.py Thu Apr 09 22:29:52 2015 +0200 +++ b/Lib/os.py Sat Apr 18 16:49:30 2015 +0200 @@ -799,10 +799,7 @@ if supports_bytes_environ: def _fscodec(): encoding = sys.getfilesystemencoding() - if encoding == 'mbcs': - errors = 'strict' - else: - errors = 'surrogateescape' + errors = 'strict' if encoding == 'mbcs' else 'surrogateescape' def fsencode(filename): """ diff -r 7444ac6d93c3 Modules/_io/textio.c --- a/Modules/_io/textio.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Modules/_io/textio.c Sat Apr 18 16:49:30 2015 +0200 @@ -851,7 +851,7 @@ textiowrapper_init(textio *self, PyObjec NULL}; PyObject *buffer, *raw, *codec_info = NULL; char *encoding = NULL; - char *errors = NULL; + const char *errors = NULL; char *newline = NULL; int line_buffering = 0, write_through = 0; _PyIO_State *state = NULL; @@ -973,7 +973,7 @@ textiowrapper_init(textio *self, PyObjec */ if (errors == NULL) - errors = "strict"; + errors = Py_DefaultErrorHandler; self->errors = PyBytes_FromString(errors); if (self->errors == NULL) goto error; diff -r 7444ac6d93c3 Python/bltinmodule.c --- a/Python/bltinmodule.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/bltinmodule.c Sat Apr 18 16:49:30 2015 +0200 @@ -32,6 +32,12 @@ const char *Py_FileSystemDefaultEncoding int Py_HasFileSystemDefaultEncoding = 0; #endif +/* When the LC_CTYPE locale is the POSIX locale ("C locale"), force the error + handler stdin and stdout use the surrogateescape error handler by default, + instead of the strict error handler. This variable is set at startup by + initfsencoding(). */ +const char* Py_DefaultErrorHandler = "strict"; + _Py_IDENTIFIER(__builtins__); _Py_IDENTIFIER(__dict__); _Py_IDENTIFIER(__prepare__); diff -r 7444ac6d93c3 Python/pylifecycle.c --- a/Python/pylifecycle.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/pylifecycle.c Sat Apr 18 16:49:30 2015 +0200 @@ -921,8 +921,16 @@ initfsencoding(PyInterpreterState *inter { PyObject *codec; - if (Py_FileSystemDefaultEncoding == NULL) - { + if (!interp->fscodec_initialized) { + /* When the LC_CTYPE locale is the POSIX locale ("C locale"), stdin, + stdout and open() use the "surrogateescape" error handler by + default, instead of "strict". */ + char *loc = setlocale(LC_CTYPE, NULL); + if (loc != NULL && strcmp(loc, "C") == 0) + Py_DefaultErrorHandler = "surrogateescape"; + } + + if (Py_FileSystemDefaultEncoding == NULL) { Py_FileSystemDefaultEncoding = get_locale_encoding(); if (Py_FileSystemDefaultEncoding == NULL) Py_FatalError("Py_Initialize: Unable to get the locale encoding"); @@ -941,6 +949,7 @@ initfsencoding(PyInterpreterState *inter return -1; } Py_DECREF(codec); + interp->fscodec_initialized = 1; return 0; } @@ -966,7 +975,7 @@ initsite(void) static PyObject* create_stdio(PyObject* io, int fd, int write_mode, char* name, - char* encoding, char* errors) + const char* encoding, const char* errors) { PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res; const char* mode; @@ -1085,7 +1094,8 @@ initstdio(void) PyObject *std = NULL; int status = 0, fd; PyObject * encoding_attr; - char *pythonioencoding = NULL, *encoding, *errors; + char *pythonioencoding = NULL, *encoding; + const char *errors; /* Hack to avoid a nasty recursion issue when Python is invoked in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ @@ -1120,14 +1130,8 @@ initstdio(void) encoding = _Py_StandardStreamEncoding; errors = _Py_StandardStreamErrors; if (!encoding || !errors) { - if (!errors) { - /* When the LC_CTYPE locale is the POSIX locale ("C locale"), - stdin and stdout use the surrogateescape error handler by - default, instead of the strict error handler. */ - char *loc = setlocale(LC_CTYPE, NULL); - if (loc != NULL && strcmp(loc, "C") == 0) - errors = "surrogateescape"; - } + if (!errors) + errors = Py_DefaultErrorHandler; pythonioencoding = Py_GETENV("PYTHONIOENCODING"); if (pythonioencoding) { diff -r 7444ac6d93c3 Python/sysmodule.c --- a/Python/sysmodule.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/sysmodule.c Sat Apr 18 16:49:30 2015 +0200 @@ -311,6 +311,18 @@ operating system filenames." ); static PyObject * +sys_getdefaulterrorhandler(PyObject *self) +{ + return PyUnicode_FromString(Py_DefaultErrorHandler); +} + +PyDoc_STRVAR(getdefaulterrorhandler_doc, +"getdefaulterrorhandler() -> string\n\ +\n\ +Return the default error handler." +); + +static PyObject * sys_intern(PyObject *self, PyObject *args) { PyObject *s; @@ -1168,6 +1180,8 @@ static PyMethodDef sys_methods[] = { #endif {"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding, METH_NOARGS, getfilesystemencoding_doc}, + {"getdefaulterrorhandler", (PyCFunction)sys_getdefaulterrorhandler, + METH_NOARGS, getdefaulterrorhandler_doc}, #ifdef Py_TRACE_REFS {"getobjects", _Py_GetObjects, METH_VARARGS}, #endif