diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1194,7 +1194,7 @@ class EscapeDecodeTest(unittest.TestCase): check(b"[\\\n]", b"[]") check(br'[\"]', b'["]') check(br"[\']", b"[']") - check(br"[\\]", br"[\]") + check(br"[\\]", b"[\\]") check(br"[\a]", b"[\x07]") check(br"[\b]", b"[\x08]") check(br"[\t]", b"[\x09]") @@ -1203,7 +1203,6 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\f]", b"[\x0c]") check(br"[\r]", b"[\x0d]") check(br"[\7]", b"[\x07]") - check(br"[\8]", br"[\8]") check(br"[\78]", b"[\x078]") check(br"[\41]", b"[!]") check(br"[\418]", b"[!8]") @@ -1211,12 +1210,12 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\1010]", b"[A0]") check(br"[\501]", b"[A]") check(br"[\x41]", b"[A]") - check(br"[\X41]", br"[\X41]") check(br"[\x410]", b"[A0]") for b in range(256): if b not in b'\n"\'\\abtnvfr01234567x': b = bytes([b]) - check(b'\\' + b, b'\\' + b) + with self.assertWarns(DeprecationWarning): + decode(b"\\" + b) def test_errors(self): decode = codecs.escape_decode diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2729,6 +2729,10 @@ class UnicodeTest(string_tests.CommonTest, support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + def test_invalid_sequences(self): + for letter in "cdeghijklmopqswyzABCDEFGHIJKLMOPQRSTVWXYZ": + with self.assertWarns(DeprecationWarning): + eval(r"'\%s'" % letter) class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s, break; default: + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '%c'", *(--s)) < 0) + goto failed; *p++ = '\\'; - s--; goto non_esc; /* an arbitrary number of unescaped UTF-8 bytes may follow. */ } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5848,6 +5848,7 @@ PyUnicode_DecodeUnicodeEscape(const char *s, _PyUnicodeWriter writer; const char *end; char* message; + int ret; Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */ PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -6022,6 +6023,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s, goto error; } else { + /* Deprecate invalid escape sequences */ + ret = PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '%c'", c); + if (ret < 0) + goto onError; WRITECHAR('\\'); WRITECHAR((unsigned char)s[-1]); }