diff -r e6c0c216bfde Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Sat Oct 03 01:55:51 2015 +0200 +++ b/Lib/test/test_codecs.py Sat Oct 03 01:56:29 2015 +0200 @@ -788,6 +788,18 @@ class UTF8Test(ReadTest, unittest.TestCa self.check_state_handling_decode(self.encoding, u, u.encode(self.encoding)) + def test_decode_error(self): + for data, error_handler, expected in ( + (b'[\x80\xff]', 'ignore', '[]'), + (b'[\x80\xff]', 'replace', '[\ufffd\ufffd]'), + (b'[\x80\xff]', 'surrogateescape', '[\udc80\udcff]'), + (b'[\x80\xff]', 'backslashreplace', '[\\x80\\xff]'), + ): + with self.subTest(data=data, error_handler=error_handler, + expected=expected): + self.assertEqual(data.decode(self.encoding, error_handler), + expected) + def test_lone_surrogates(self): super().test_lone_surrogates() # not sure if this is making sense for diff -r e6c0c216bfde Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Oct 03 01:55:51 2015 +0200 +++ b/Objects/unicodeobject.c Sat Oct 03 01:56:29 2015 +0200 @@ -4714,8 +4714,9 @@ PyUnicode_DecodeUTF8Stateful(const char Py_ssize_t startinpos; Py_ssize_t endinpos; const char *errmsg = ""; - PyObject *errorHandler = NULL; + PyObject *error_handler_obj = NULL; PyObject *exc = NULL; + _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; if (size == 0) { if (consumed) @@ -4740,6 +4741,7 @@ PyUnicode_DecodeUTF8Stateful(const char while (s < end) { Py_UCS4 ch; int kind = writer.kind; + if (kind == PyUnicode_1BYTE_KIND) { if (PyUnicode_IS_ASCII(writer.buffer)) ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos); @@ -4778,24 +4780,52 @@ PyUnicode_DecodeUTF8Stateful(const char continue; } - if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, - "utf-8", errmsg, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &writer)) - goto onError; + if (error_handler == _Py_ERROR_UNKNOWN) + error_handler = get_error_handler(errors); + + switch (error_handler) { + case _Py_ERROR_IGNORE: + s += (endinpos - startinpos); + break; + + case _Py_ERROR_REPLACE: + if (_PyUnicodeWriter_WriteCharInline(&writer, 0xfffd) < 0) + goto onError; + s += (endinpos - startinpos); + break; + + case _Py_ERROR_SURROGATEESCAPE: + if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0) + goto onError; + for (Py_ssize_t i=startinpos; i