diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1194,7 +1194,7 @@ class EscapeDecodeTest(unittest.TestCase): check(b"[\\\n]", b"[]") check(br'[\"]', b'["]') check(br"[\']", b"[']") - check(br"[\\]", br"[\]") + check(br"[\\]", b"[\\]") check(br"[\a]", b"[\x07]") check(br"[\b]", b"[\x08]") check(br"[\t]", b"[\x09]") @@ -1203,7 +1203,6 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\f]", b"[\x0c]") check(br"[\r]", b"[\x0d]") check(br"[\7]", b"[\x07]") - check(br"[\8]", br"[\8]") check(br"[\78]", b"[\x078]") check(br"[\41]", b"[!]") check(br"[\418]", b"[!8]") @@ -1211,12 +1210,18 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\1010]", b"[A0]") check(br"[\501]", b"[A]") check(br"[\x41]", b"[A]") - check(br"[\X41]", br"[\X41]") check(br"[\x410]", b"[A0]") - for b in range(256): - if b not in b'\n"\'\\abtnvfr01234567x': - b = bytes([b]) - check(b'\\' + b, b'\\' + b) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtvx': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b, b"\\" + b) + with self.assertWarns(DeprecationWarning): + check(b"\\" + b.upper(), b"\\" + b.upper()) + with self.assertWarns(DeprecationWarning): + check(br"\8", b"\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", b"\\9") def test_errors(self): decode = codecs.escape_decode @@ -2465,7 +2470,6 @@ class UnicodeEscapeTest(unittest.TestCase): check(br"[\f]", "[\x0c]") check(br"[\r]", "[\x0d]") check(br"[\7]", "[\x07]") - check(br"[\8]", r"[\8]") check(br"[\78]", "[\x078]") check(br"[\41]", "[!]") check(br"[\418]", "[!8]") @@ -2475,9 +2479,18 @@ class UnicodeEscapeTest(unittest.TestCase): check(br"[\x410]", "[A0]") check(br"\u20ac", "\u20ac") check(br"\U0001d120", "\U0001d120") - for b in range(256): - if b not in b'\n"\'\\abtnvfr01234567xuUN': - check(b'\\' + bytes([b]), '\\' + chr(b)) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtuvx': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b, "\\" + chr(i)) + if b.upper() not in b'UN': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b.upper(), "\\" + chr(i-32)) + with self.assertWarns(DeprecationWarning): + check(br"\8", "\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", "\\9") def test_decode_errors(self): decode = codecs.unicode_escape_decode diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -10,6 +10,7 @@ import codecs import itertools import operator import struct +import string import sys import unittest import warnings @@ -2729,6 +2730,12 @@ class UnicodeTest(string_tests.CommonTest, support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + def test_invalid_sequences(self): + for letter in string.ascii_letters + "89": # 0-7 are octal escapes + if letter in "abfnrtuvxNU": + continue + with self.assertWarns(DeprecationWarning): + eval(r"'\%s'" % letter) class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s, break; default: + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '%c'", *(--s)) < 0) + goto failed; *p++ = '\\'; - s--; goto non_esc; /* an arbitrary number of unescaped UTF-8 bytes may follow. */ } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5849,6 +5849,7 @@ PyUnicode_DecodeUnicodeEscape(const char *s, _PyUnicodeWriter writer; const char *end; char* message; + int ret; Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */ PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -6023,6 +6024,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s, goto error; } else { + /* Deprecate invalid escape sequences */ + ret = PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '%c'", c); + if (ret < 0) + goto onError; WRITECHAR('\\'); WRITECHAR((unsigned char)s[-1]); }