diff -r 7444ac6d93c3 Doc/library/sys.rst --- a/Doc/library/sys.rst Thu Apr 09 22:29:52 2015 +0200 +++ b/Doc/library/sys.rst Sat Apr 18 11:24:32 2015 +0200 @@ -441,6 +441,13 @@ always available. .. versionchanged:: 3.2 :func:`getfilesystemencoding` result cannot be ``None`` anymore. +.. function:: getdefaulterrorhandler() + + Return the name of the default error handler. Return ``surrogateescape`` if + the locale is C, return ``strict`` otherwise. + + .. versionadded:: 3.5 + .. function:: getrefcount(object) diff -r 7444ac6d93c3 Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Thu Apr 09 22:29:52 2015 +0200 +++ b/Doc/whatsnew/3.5.rst Sat Apr 18 11:24:32 2015 +0200 @@ -459,6 +459,12 @@ socket :meth:`socket.socket.send`. (Contributed by Giampaolo Rodola' in :issue:`17552`.) +sys +--- + +* New :func:`sys.getdefaulerrorhandler` to get the default error handler: + ``strict`` or ``surrogateescape``. + sysconfig --------- diff -r 7444ac6d93c3 Include/ceval.h --- a/Include/ceval.h Thu Apr 09 22:29:52 2015 +0200 +++ b/Include/ceval.h Sat Apr 18 11:24:32 2015 +0200 @@ -23,6 +23,7 @@ PyAPI_FUNC(PyObject *) PyEval_CallMethod #ifndef Py_LIMITED_API PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetGeneratorWrapper(PyObject *wrapper); #endif struct _frame; /* Avoid including frameobject.h */ diff -r 7444ac6d93c3 Include/fileobject.h --- a/Include/fileobject.h Thu Apr 09 22:29:52 2015 +0200 +++ b/Include/fileobject.h Sat Apr 18 11:24:32 2015 +0200 @@ -24,6 +24,7 @@ PyAPI_FUNC(char *) Py_UniversalNewlineFg */ PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; PyAPI_DATA(int) Py_HasFileSystemDefaultEncoding; +PyAPI_DATA(const char*) Py_DefaultErrorHandler; /* Internal API diff -r 7444ac6d93c3 Include/fileutils.h --- a/Include/fileutils.h Thu Apr 09 22:29:52 2015 +0200 +++ b/Include/fileutils.h Sat Apr 18 11:24:32 2015 +0200 @@ -5,6 +5,8 @@ extern "C" { #endif +PyAPI_FUNC(int) _Py_CheckForceASCII(void); + PyAPI_FUNC(PyObject *) _Py_device_encoding(int); PyAPI_FUNC(wchar_t *) Py_DecodeLocale( diff -r 7444ac6d93c3 Lib/_pyio.py --- a/Lib/_pyio.py Thu Apr 09 22:29:52 2015 +0200 +++ b/Lib/_pyio.py Sat Apr 18 11:24:32 2015 +0200 @@ -1589,7 +1589,7 @@ class TextIOWrapper(TextIOBase): raise LookupError(msg % encoding) if errors is None: - errors = "strict" + errors = sys.getdefaulterrorhandler() else: if not isinstance(errors, str): raise ValueError("invalid errors: %r" % errors) diff -r 7444ac6d93c3 Lib/os.py --- a/Lib/os.py Thu Apr 09 22:29:52 2015 +0200 +++ b/Lib/os.py Sat Apr 18 11:24:32 2015 +0200 @@ -799,10 +799,7 @@ if supports_bytes_environ: def _fscodec(): encoding = sys.getfilesystemencoding() - if encoding == 'mbcs': - errors = 'strict' - else: - errors = 'surrogateescape' + errors = 'strict' if encoding == 'mbcs' else 'surrogateescape' def fsencode(filename): """ diff -r 7444ac6d93c3 Modules/_io/textio.c --- a/Modules/_io/textio.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Modules/_io/textio.c Sat Apr 18 11:24:32 2015 +0200 @@ -851,7 +851,7 @@ textiowrapper_init(textio *self, PyObjec NULL}; PyObject *buffer, *raw, *codec_info = NULL; char *encoding = NULL; - char *errors = NULL; + const char *errors = NULL; char *newline = NULL; int line_buffering = 0, write_through = 0; _PyIO_State *state = NULL; @@ -973,7 +973,7 @@ textiowrapper_init(textio *self, PyObjec */ if (errors == NULL) - errors = "strict"; + errors = Py_DefaultErrorHandler; self->errors = PyBytes_FromString(errors); if (self->errors == NULL) goto error; diff -r 7444ac6d93c3 Python/bltinmodule.c --- a/Python/bltinmodule.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/bltinmodule.c Sat Apr 18 11:24:32 2015 +0200 @@ -32,6 +32,12 @@ const char *Py_FileSystemDefaultEncoding int Py_HasFileSystemDefaultEncoding = 0; #endif +/* When the LC_CTYPE locale is the POSIX locale ("C locale"), force the error + handler stdin and stdout use the surrogateescape error handler by default, + instead of the strict error handler. This variable is set at startup by + initfsencoding(). */ +const char* Py_DefaultErrorHandler = "strict"; + _Py_IDENTIFIER(__builtins__); _Py_IDENTIFIER(__dict__); _Py_IDENTIFIER(__prepare__); diff -r 7444ac6d93c3 Python/ceval.c --- a/Python/ceval.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/ceval.c Sat Apr 18 11:24:32 2015 +0200 @@ -153,6 +153,8 @@ static PyObject * special_lookup(PyObjec "free variable '%.200s' referenced before assignment" \ " in enclosing scope" +static PyObject *generator_wrapper = NULL; + /* Dynamic execution profile */ #ifdef DYNAMIC_EXECUTION_PROFILE #ifdef DXPAIRS @@ -3598,6 +3600,8 @@ static PyObject * } if (co->co_flags & CO_GENERATOR) { + PyObject *gen; + /* Don't need to keep the reference to f_back, it will be set * when the generator is resumed. */ Py_CLEAR(f->f_back); @@ -3606,7 +3610,16 @@ static PyObject * /* Create a new generator that owns the ready to run frame * and return that as the value. */ - return PyGen_NewWithQualName(f, name, qualname); + gen = PyGen_NewWithQualName(f, name, qualname); + if (gen == NULL) + return NULL; + + if (generator_wrapper) { + PyObject *gen2 = PyObject_CallFunction(generator_wrapper, "O", gen); + Py_DECREF(gen); + gen = gen2; + } + return gen; } retval = PyEval_EvalFrameEx(f,0); @@ -4042,6 +4055,15 @@ PyEval_SetTrace(Py_tracefunc func, PyObj || (tstate->c_profilefunc != NULL)); } +void +PyEval_SetGeneratorWrapper(PyObject *wrapper) +{ + Py_CLEAR(generator_wrapper); + + Py_XINCREF(wrapper); + generator_wrapper = wrapper; +} + PyObject * PyEval_GetBuiltins(void) { diff -r 7444ac6d93c3 Python/fileutils.c --- a/Python/fileutils.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/fileutils.c Sat Apr 18 11:24:32 2015 +0200 @@ -89,12 +89,12 @@ extern int _Py_normalize_encoding(const decode_ascii_surrogateescape() 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and Py_DecodeLocale() uses mbstowcs() - -1: unknown, need to call check_force_ascii() to get the value + -1: unknown, need to call _Py_CheckForceASCII() to get the value */ static int force_ascii = -1; -static int -check_force_ascii(void) +int +_Py_CheckForceASCII(void) { char *loc; #if defined(HAVE_LANGINFO_H) && defined(CODESET) @@ -289,7 +289,7 @@ Py_DecodeLocale(const char* arg, size_t #ifndef MS_WINDOWS if (force_ascii == -1) - force_ascii = check_force_ascii(); + force_ascii = _Py_CheckForceASCII(); if (force_ascii) { /* force ASCII encoding to workaround mbstowcs() issue */ @@ -456,7 +456,7 @@ Py_EncodeLocale(const wchar_t *text, siz #ifndef MS_WINDOWS if (force_ascii == -1) - force_ascii = check_force_ascii(); + force_ascii = _Py_CheckForceASCII(); if (force_ascii) return encode_ascii_surrogateescape(text, error_pos); diff -r 7444ac6d93c3 Python/pylifecycle.c --- a/Python/pylifecycle.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/pylifecycle.c Sat Apr 18 11:24:32 2015 +0200 @@ -921,8 +921,23 @@ initfsencoding(PyInterpreterState *inter { PyObject *codec; - if (Py_FileSystemDefaultEncoding == NULL) - { + if (!interp->fscodec_initialized) { + if (!_Py_CheckForceASCII()) { + /* When the LC_CTYPE locale is the POSIX locale ("C locale"), + stdin and stdout use the surrogateescape error handler by + default, instead of the strict error handler. */ + char *loc = setlocale(LC_CTYPE, NULL); + if (loc != NULL && strcmp(loc, "C") == 0) + Py_DefaultErrorHandler = "surrogateescape"; + } + else { + /* Py_EncodeLocale() and Py_DecodeLocale() force the ASCII + encoding, so use surrogateescape by default */ + Py_DefaultErrorHandler = "surrogateescape"; + } + } + + if (Py_FileSystemDefaultEncoding == NULL) { Py_FileSystemDefaultEncoding = get_locale_encoding(); if (Py_FileSystemDefaultEncoding == NULL) Py_FatalError("Py_Initialize: Unable to get the locale encoding"); @@ -941,6 +956,7 @@ initfsencoding(PyInterpreterState *inter return -1; } Py_DECREF(codec); + interp->fscodec_initialized = 1; return 0; } @@ -966,7 +982,7 @@ initsite(void) static PyObject* create_stdio(PyObject* io, int fd, int write_mode, char* name, - char* encoding, char* errors) + const char* encoding, const char* errors) { PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res; const char* mode; @@ -1085,7 +1101,8 @@ initstdio(void) PyObject *std = NULL; int status = 0, fd; PyObject * encoding_attr; - char *pythonioencoding = NULL, *encoding, *errors; + char *pythonioencoding = NULL, *encoding; + const char *errors; /* Hack to avoid a nasty recursion issue when Python is invoked in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ @@ -1120,14 +1137,8 @@ initstdio(void) encoding = _Py_StandardStreamEncoding; errors = _Py_StandardStreamErrors; if (!encoding || !errors) { - if (!errors) { - /* When the LC_CTYPE locale is the POSIX locale ("C locale"), - stdin and stdout use the surrogateescape error handler by - default, instead of the strict error handler. */ - char *loc = setlocale(LC_CTYPE, NULL); - if (loc != NULL && strcmp(loc, "C") == 0) - errors = "surrogateescape"; - } + if (!errors) + errors = Py_DefaultErrorHandler; pythonioencoding = Py_GETENV("PYTHONIOENCODING"); if (pythonioencoding) { diff -r 7444ac6d93c3 Python/sysmodule.c --- a/Python/sysmodule.c Thu Apr 09 22:29:52 2015 +0200 +++ b/Python/sysmodule.c Sat Apr 18 11:24:32 2015 +0200 @@ -311,6 +311,18 @@ operating system filenames." ); static PyObject * +sys_getdefaulterrorhandler(PyObject *self) +{ + return PyUnicode_FromString(Py_DefaultErrorHandler); +} + +PyDoc_STRVAR(getdefaulterrorhandler_doc, +"getdefaulterrorhandler() -> string\n\ +\n\ +Return the default error handler." +); + +static PyObject * sys_intern(PyObject *self, PyObject *args) { PyObject *s; @@ -1168,6 +1180,8 @@ static PyMethodDef sys_methods[] = { #endif {"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding, METH_NOARGS, getfilesystemencoding_doc}, + {"getdefaulterrorhandler", (PyCFunction)sys_getdefaulterrorhandler, + METH_NOARGS, getdefaulterrorhandler_doc}, #ifdef Py_TRACE_REFS {"getobjects", _Py_GetObjects, METH_VARARGS}, #endif