diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -25,10 +25,16 @@ def search_function(encoding): return (42, 42) # no unicode def decode2(input, errors="strict"): return (42, 42) # no unicode + def encode3(input, errors="strict"): + return (u'42', 42) # not bytes + def decode3(input, errors="strict"): + return ('42', 42) # not unicode if encoding=="test.unicode1": return (encode1, decode1, None, None) elif encoding=="test.unicode2": return (encode2, decode2, None, None) + elif encoding=="test.unicode3": + return (encode3, decode3, None, None) else: return None codecs.register(search_function) @@ -1851,6 +1857,27 @@ class UnicodeTest( unicode_encodedecimal(u"123" + s, "xmlcharrefreplace"), '123' + exp) + def test_py3k_warning_encode_not_bytes(self): + with test_support.check_py3k_warnings( + ('encoders must return bytestrings in 3.x', DeprecationWarning)): + u"abcd".encode('test.unicode3') + + def test_py3k_warning_decode_not_unicode(self): + with test_support.check_py3k_warnings( + ('decoders must return unicode in 3.x', DeprecationWarning)): + "abcd".decode('test.unicode3') + + def test_py3k_warning_str_encode(self): + with test_support.check_py3k_warnings( + (r'bytes.encode\(\) does not exist in 3.x', DeprecationWarning)): + "abcd".encode('ascii') + + def test_py3k_warning_unicode_decode(self): + with test_support.check_py3k_warnings( + (r'unicode.decode\(\) does not exist in 3.x', DeprecationWarning)): + u"abcd".decode('ascii') + + def test_main(): test_support.run_unittest(__name__) diff --git a/Objects/stringobject.c b/Objects/stringobject.c --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3010,6 +3010,9 @@ string_encode(PyStringObject *self, PyOb char *errors = NULL; PyObject *v; + if (PyErr_WarnPy3k("bytes.encode() does not exist in 3.x", 1) < 0) + return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", kwlist, &encoding, &errors)) return NULL; @@ -3063,6 +3066,13 @@ string_decode(PyStringObject *self, PyOb Py_DECREF(v); return NULL; } + if (!PyUnicode_Check(v)) { + if (PyErr_WarnPy3k("decoders must return unicode in 3.x; " + "use codecs.decode() to decode to arbitrary types", 1) < 0) { + Py_DECREF(v); + return NULL; + } + } return v; onError: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6451,6 +6451,13 @@ unicode_encode(PyUnicodeObject *self, Py Py_DECREF(v); return NULL; } + if (!PyBytes_Check(v)) { + if (PyErr_WarnPy3k("encoders must return bytestrings in 3.x; " + "use codecs.encode() to encode to arbitrary types", 1) < 0) { + Py_DECREF(v); + return NULL; + } + } return v; onError: @@ -6475,6 +6482,9 @@ unicode_decode(PyUnicodeObject *self, Py char *errors = NULL; PyObject *v; + if (PyErr_WarnPy3k("unicode.decode() does not exist in 3.x", 1) < 0) + return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors)) return NULL;