diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -401,7 +401,7 @@ .. productionlist:: stringliteral: [`stringprefix`](`shortstring` | `longstring`) - stringprefix: "r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR" + stringprefix: "r" | "u" | "R" | "U" shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""' shortstringitem: `shortstringchar` | `stringescapeseq` @@ -447,7 +447,9 @@ Both string and bytes literals may optionally be prefixed with a letter ``'r'`` or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as literal characters. As a result, in string literals, ``'\U'`` and ``'\u'`` -escapes in raw strings are not treated specially. +escapes in raw strings are not treated specially. Given that Python 2.x's raw +unicode literals behave differently than Python 3.x's the ``'ur'`` syntax +is not supported. .. versionadded:: 3.3 The ``'rb'`` prefix of raw bytes literals has been added as a synonym diff --git a/Lib/test/test_strlit.py b/Lib/test/test_strlit.py --- a/Lib/test/test_strlit.py +++ b/Lib/test/test_strlit.py @@ -123,6 +123,13 @@ self.assertRaises(SyntaxError, eval, """ rrb'' """) self.assertRaises(SyntaxError, eval, """ rbb'' """) + def test_eval_str_u(self): + self.assertEqual(eval(""" u'x' """), 'x') + self.assertEqual(eval(""" U'\u00e4' """), 'ä') + self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä') + self.assertRaises(SyntaxError, eval, """ ur'' """) + self.assertRaises(SyntaxError, eval, """ ru'' """) + def check_encoding(self, encoding, extra=""): modname = "xx_" + encoding.replace("-", "_") fn = os.path.join(self.tmpdir, modname + ".py") diff --git a/Misc/HISTORY b/Misc/HISTORY --- a/Misc/HISTORY +++ b/Misc/HISTORY @@ -18,6 +18,9 @@ Core and Builtins ----------------- +- Issue #15096: Removed support for ur'' as the raw notation isn't + compatible with Python 2.x's raw unicode strings. + - Issue #3996: On Windows, the PyOS_CheckStack function would cause the interpreter to abort ("Fatal Python error: Could not reset the stack!") instead of throwing a MemoryError. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1421,7 +1421,7 @@ want to support it in arbitrary order like byte literals. */ else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U')) saw_u = 1; - else if (!saw_r && (c == 'r' || c == 'R')) + else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) saw_r = 1; else break;