diff -r 494f736f5945 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Nov 04 11:28:26 2013 +0100 +++ b/Objects/unicodeobject.c Mon Nov 04 23:18:00 2013 +1000 @@ -3051,8 +3051,10 @@ goto onError; if (!PyUnicode_Check(unicode)) { PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(unicode)->tp_name); + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); Py_DECREF(unicode); goto onError; } @@ -3110,8 +3112,10 @@ goto onError; if (!PyUnicode_Check(v)) { PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); Py_DECREF(v); goto onError; } @@ -3371,10 +3375,11 @@ #endif } -PyObject * -PyUnicode_AsEncodedString(PyObject *unicode, - const char *encoding, - const char *errors) +static PyObject * +_PyUnicode_AsEncodedStringInternal(PyObject *unicode, + const char *encoding, + const char *errors, + int *bad_output) { PyObject *v; char lower[11]; /* Enough for any encoding shortcut */ @@ -3434,14 +3439,29 @@ return b; } + /* Let the method wrapper know the problem is with the output type */ + if (bad_output != NULL) { + *bad_output = 1; + } PyErr_Format(PyExc_TypeError, - "encoder did not return a bytes object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' encoder returned '%.400s' instead of 'bytes'; " + "use codecs.encode to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); Py_DECREF(v); return NULL; } PyObject * +PyUnicode_AsEncodedString(PyObject *unicode, + const char *encoding, + const char *errors) +{ + return _PyUnicode_AsEncodedStringInternal(unicode, + encoding, errors, NULL); +} + +PyObject * PyUnicode_AsEncodedUnicode(PyObject *unicode, const char *encoding, const char *errors) @@ -3462,8 +3482,10 @@ goto onError; if (!PyUnicode_Check(v)) { PyErr_Format(PyExc_TypeError, - "encoder did not return an str object (type=%.400s)", - Py_TYPE(v)->tp_name); + "'%.400s' encoder returned '%.400s' instead of 'str'; " + "use codecs.encode to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); Py_DECREF(v); goto onError; } @@ -10981,11 +11003,63 @@ static char *kwlist[] = {"encoding", "errors", 0}; char *encoding = NULL; char *errors = NULL; + int bad_output = 0; + PyObject *result; + PyObject *exc, *val, *tb; + PyObject *new_exc, *new_val, *new_tb; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", kwlist, &encoding, &errors)) return NULL; - return PyUnicode_AsEncodedString(self, encoding, errors); + result = _PyUnicode_AsEncodedStringInternal(self, + encoding, errors, + &bad_output); + if (result == NULL && !bad_output) { + /* We limit this to *exact* matches on TypeError and ValueError + * since changing types to a broader exception type is backwards + * incompatible, and subclasses may not support instantiation with + * PyErr_Format. + * + * We also ignore any failures due to an invalid output type + */ + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Fetch(&exc, &val, &tb); + if (exc != PyExc_TypeError) { + PyErr_Restore(exc, val, tb); + } + else { + PyErr_NormalizeException(&exc, &val, &tb); + PyErr_Format(exc, + "invalid input type for '%s' codec (%s: %S)", + encoding, Py_TYPE(val)->tp_name, val); + Py_DECREF(exc); + Py_XDECREF(tb); + PyErr_Fetch(&new_exc, &new_val, &new_tb); + PyErr_NormalizeException(&new_exc, &new_val, &new_tb); + PyException_SetCause(new_val, val); + PyErr_Restore(new_exc, new_val, new_tb); + } + } + else if (PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Fetch(&exc, &val, &tb); + if (exc != PyExc_ValueError) { + PyErr_Restore(exc, val, tb); + } + else { + PyErr_NormalizeException(&exc, &val, &tb); + PyErr_Format(exc, + "invalid input value for '%s' codec (%s: %S)", + encoding, Py_TYPE(val)->tp_name, val); + Py_DECREF(exc); + Py_XDECREF(tb); + PyErr_Fetch(&new_exc, &new_val, &new_tb); + PyErr_NormalizeException(&new_exc, &new_val, &new_tb); + PyException_SetCause(new_val, val); + PyErr_Restore(new_exc, new_val, new_tb); + } + } + } + return result; } PyDoc_STRVAR(expandtabs__doc__,