diff -r d433d74e0377 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Sep 10 16:00:06 2015 +0200 +++ b/Objects/unicodeobject.c Thu Sep 10 21:15:23 2015 +0200 @@ -3929,6 +3929,23 @@ PyUnicode_GetDefaultEncoding(void) return "utf-8"; } +#define _Py_CODEC_ERROR_UNKNOWN 0 +#define _Py_CODEC_ERROR_SURROGATEPASS 1 +#define _Py_CODEC_ERROR_SURROGATEESCAPE 2 +#define _Py_CODEC_ERROR_OTHER -1 + +static int +detect_standard_errorhandler(const char *errors) +{ + if (errors == NULL) + return _Py_CODEC_ERROR_OTHER; /* strict */ + if (strcmp(errors, "surrogatepass") == 0) + return _Py_CODEC_ERROR_SURROGATEPASS; + if (strcmp(errors, "surrogateescape") == 0) + return _Py_CODEC_ERROR_SURROGATEESCAPE; + return _Py_CODEC_ERROR_OTHER; +} + /* create or adjust a UnicodeDecodeError */ static void make_decode_exception(PyObject **exceptionObject, @@ -6659,6 +6676,7 @@ PyUnicode_DecodeASCII(const char *s, const char *e; PyObject *errorHandler = NULL; PyObject *exc = NULL; + int errorType = _Py_CODEC_ERROR_UNKNOWN; if (size == 0) _Py_RETURN_UNICODE_EMPTY(); @@ -6689,6 +6707,23 @@ PyUnicode_DecodeASCII(const char *s, ++s; } else { + if (errorType == _Py_CODEC_ERROR_UNKNOWN) { + errorType = detect_standard_errorhandler(errors); + if (errorType == _Py_CODEC_ERROR_SURROGATEESCAPE && + kind < PyUnicode_2BYTE_KIND) { + if (_PyUnicodeWriter_Prepare(&writer, size - writer.pos, 0xffff) < 0) + return NULL; + kind = writer.kind; + data = writer.data; + } + } + if (errorType == _Py_CODEC_ERROR_SURROGATEESCAPE) { + PyUnicode_WRITE(kind, data, writer.pos, c + 0xdc00); + writer.pos++; + ++s; + continue; + } + startinpos = s-starts; endinpos = startinpos + 1; if (unicode_decode_call_errorhandler_writer(