diff -r b4cc62473c13 Doc/reference/lexical_analysis.rst --- a/Doc/reference/lexical_analysis.rst Thu Sep 08 13:59:53 2016 -0400 +++ b/Doc/reference/lexical_analysis.rst Thu Sep 08 14:43:06 2016 -0400 @@ -560,6 +560,10 @@ escape sequences only recognized in string literals fall into the category of unrecognized escapes for bytes literals. + .. versionchanged:: 3.6 + Unrecognized escape sequences produce a DeprecationWarning. In + some future version of Python they will be a SyntaxError. + Even in a raw literal, quotes can be escaped with a backslash, but the backslash remains in the result; for example, ``r"\""`` is a valid string literal consisting of two characters: a backslash and a double quote; ``r"\"`` diff -r b4cc62473c13 Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Thu Sep 08 13:59:53 2016 -0400 +++ b/Doc/whatsnew/3.6.rst Thu Sep 08 14:43:06 2016 -0400 @@ -952,6 +952,11 @@ parameter will be dropped in a future Python release and likely earlier through third party tools. See :issue:`27919` for details. +* A backslash-character pair that is not a valid escape sequence now generates + a DeprecationWarning. Although this will eventually become a SyntaxError, + that will not be for several Python releases. (Contributed by Emanuel Barry + in :issue:`27364`.) + Deprecated Python behavior -------------------------- diff -r b4cc62473c13 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Thu Sep 08 13:59:53 2016 -0400 +++ b/Lib/test/test_codecs.py Thu Sep 08 14:43:06 2016 -0400 @@ -1175,7 +1175,7 @@ check(b"[\\\n]", b"[]") check(br'[\"]', b'["]') check(br"[\']", b"[']") - check(br"[\\]", br"[\]") + check(br"[\\]", b"[\\]") check(br"[\a]", b"[\x07]") check(br"[\b]", b"[\x08]") check(br"[\t]", b"[\x09]") @@ -1184,7 +1184,6 @@ check(br"[\f]", b"[\x0c]") check(br"[\r]", b"[\x0d]") check(br"[\7]", b"[\x07]") - check(br"[\8]", br"[\8]") check(br"[\78]", b"[\x078]") check(br"[\41]", b"[!]") check(br"[\418]", b"[!8]") @@ -1192,12 +1191,18 @@ check(br"[\1010]", b"[A0]") check(br"[\501]", b"[A]") check(br"[\x41]", b"[A]") - check(br"[\X41]", br"[\X41]") check(br"[\x410]", b"[A0]") - for b in range(256): - if b not in b'\n"\'\\abtnvfr01234567x': - b = bytes([b]) - check(b'\\' + b, b'\\' + b) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtvx': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b, b"\\" + b) + with self.assertWarns(DeprecationWarning): + check(b"\\" + b.upper(), b"\\" + b.upper()) + with self.assertWarns(DeprecationWarning): + check(br"\8", b"\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", b"\\9") def test_errors(self): decode = codecs.escape_decode @@ -2448,7 +2453,6 @@ check(br"[\f]", "[\x0c]") check(br"[\r]", "[\x0d]") check(br"[\7]", "[\x07]") - check(br"[\8]", r"[\8]") check(br"[\78]", "[\x078]") check(br"[\41]", "[!]") check(br"[\418]", "[!8]") @@ -2458,9 +2462,18 @@ check(br"[\x410]", "[A0]") check(br"\u20ac", "\u20ac") check(br"\U0001d120", "\U0001d120") - for b in range(256): - if b not in b'\n"\'\\abtnvfr01234567xuUN': - check(b'\\' + bytes([b]), '\\' + chr(b)) + for i in range(97, 123): + b = bytes([i]) + if b not in b'abfnrtuvx': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b, "\\" + chr(i)) + if b.upper() not in b'UN': + with self.assertWarns(DeprecationWarning): + check(b"\\" + b.upper(), "\\" + chr(i-32)) + with self.assertWarns(DeprecationWarning): + check(br"\8", "\\8") + with self.assertWarns(DeprecationWarning): + check(br"\9", "\\9") def test_decode_errors(self): decode = codecs.unicode_escape_decode diff -r b4cc62473c13 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Thu Sep 08 13:59:53 2016 -0400 +++ b/Lib/test/test_unicode.py Thu Sep 08 14:43:06 2016 -0400 @@ -10,6 +10,7 @@ import itertools import operator import struct +import string import sys import unittest import warnings @@ -2752,6 +2753,12 @@ support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + def test_invalid_sequences(self): + for letter in string.ascii_letters + "89": # 0-7 are octal escapes + if letter in "abfnrtuvxNU": + continue + with self.assertWarns(DeprecationWarning): + eval(r"'\%s'" % letter) class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff -r b4cc62473c13 Misc/NEWS --- a/Misc/NEWS Thu Sep 08 13:59:53 2016 -0400 +++ b/Misc/NEWS Thu Sep 08 14:43:06 2016 -0400 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #27364: A backslash-character pair that is not a valid escape sequence + now generates a DeprecationWarning. + - Issue #27350: `dict` implementation is changed like PyPy. It is more compact and preserves insertion order. diff -r b4cc62473c13 Objects/bytesobject.c --- a/Objects/bytesobject.c Thu Sep 08 13:59:53 2016 -0400 +++ b/Objects/bytesobject.c Thu Sep 08 14:43:06 2016 -0400 @@ -1207,8 +1207,9 @@ break; default: + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0) + goto failed; *p++ = '\\'; - s--; goto non_esc; /* an arbitrary number of unescaped UTF-8 bytes may follow. */ } diff -r b4cc62473c13 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Sep 08 13:59:53 2016 -0400 +++ b/Objects/unicodeobject.c Thu Sep 08 14:43:06 2016 -0400 @@ -6065,6 +6065,9 @@ goto error; default: + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, + "invalid escape sequence '\\%c'", c) < 0) + goto onError; WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); continue;