Index: Doc/library/re.rst =================================================================== --- Doc/library/re.rst (Revision 65995) +++ Doc/library/re.rst (Arbeitskopie) @@ -403,8 +403,8 @@ accepted by the regular expression parser:: \a \b \f \n - \r \t \v \x - \\ + \r \t \u \U + \v \x \\ Octal escapes are included in a limited form: If the first digit is a 0, or if there are three octal digits, it is considered an octal escape. Otherwise, it is Index: Lib/test/test_re.py =================================================================== --- Lib/test/test_re.py (Revision 65995) +++ Lib/test/test_re.py (Arbeitskopie) @@ -449,6 +449,8 @@ self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) + self.assertNotEqual(re.match(r"\u1234", "\u1234"), None) + self.assertNotEqual(re.match(r"\U00001234", "\u1234"), None) self.assertRaises(re.error, re.match, "\911", "") def test_sre_character_class_literals(self): @@ -459,6 +461,8 @@ self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) + self.assertNotEqual(re.match(r"[\u1234-\u1236]", "\u1235"), None) + self.assertNotEqual(re.match(r"[\U00001234]", "\u1234"), None) self.assertRaises(re.error, re.match, "[\911]", "") def test_bug_113254(self): Index: Lib/sre_parse.py =================================================================== --- Lib/sre_parse.py (Revision 65995) +++ Lib/sre_parse.py (Arbeitskopie) @@ -253,6 +253,20 @@ if len(escape) != 2: raise error("bogus escape: %s" % repr("\\" + escape)) return LITERAL, int(escape, 16) & 0xff + elif c == "u": + # unicode escape (exactly four digits) + while source.next in HEXDIGITS and len(escape) < 6: + escape = escape + source.get() + if len(escape) != 6: + raise error("bogus escape: %s" % repr(escape)) + return LITERAL, int(escape[2:], 16) + elif c == "U": + # unicode escape (exactly eight digits) + while source.next in HEXDIGITS and len(escape) < 10: + escape = escape + source.get() + if len(escape) != 10: + raise error("bogus escape: %s" % repr(escape)) + return LITERAL, int(escape[2:], 16) elif c in OCTDIGITS: # octal escape (up to three digits) while source.next in OCTDIGITS and len(escape) < 4: @@ -284,6 +298,20 @@ if len(escape) != 4: raise ValueError return LITERAL, int(escape[2:], 16) & 0xff + elif c == "u": + # unicode escape (exactly four digits) + while source.next in HEXDIGITS and len(escape) < 6: + escape = escape + source.get() + if len(escape) != 6: + raise ValueError + return LITERAL, int(escape[2:], 16) + elif c == "U": + # unicode escape (exactly eight digits) + while source.next in HEXDIGITS and len(escape) < 10: + escape = escape + source.get() + if len(escape) != 10: + raise ValueError + return LITERAL, int(escape[2:], 16) elif c == "0": # octal escape while source.next in OCTDIGITS and len(escape) < 4: