diff -r 8ddf40f68def Doc/library/re.rst --- a/Doc/library/re.rst Fri Jun 01 00:07:28 2012 -0500 +++ b/Doc/library/re.rst Fri Jun 01 09:39:29 2012 +0300 @@ -414,8 +414,8 @@ accepted by the regular expression parser:: \a \b \f \n - \r \t \v \x - \\ + \r \t \u \U + \v \x \\ (Note that ``\b`` is used to represent word boundaries, and means "backspace" only inside character classes.) diff -r 8ddf40f68def Lib/sre_parse.py --- a/Lib/sre_parse.py Fri Jun 01 00:07:28 2012 -0500 +++ b/Lib/sre_parse.py Fri Jun 01 09:39:29 2012 +0300 @@ -247,6 +247,20 @@ if len(escape) != 2: raise error("bogus escape: %s" % repr("\\" + escape)) return LITERAL, int(escape, 16) & 0xff + elif c == "u": + # unicode escape (exactly four digits) + while source.next in HEXDIGITS and len(escape) < 6: + escape = escape + source.get() + if len(escape) != 6: + raise error("bogus escape: %s" % repr(escape)) + return LITERAL, int(escape[2:], 16) + elif c == "U": + # unicode escape (exactly eight digits) + while source.next in HEXDIGITS and len(escape) < 10: + escape = escape + source.get() + if len(escape) != 10: + raise error("bogus escape: %s" % repr(escape)) + return LITERAL, int(escape[2:], 16) elif c in OCTDIGITS: # octal escape (up to three digits) while source.next in OCTDIGITS and len(escape) < 4: @@ -278,6 +292,20 @@ if len(escape) != 4: raise ValueError return LITERAL, int(escape[2:], 16) & 0xff + elif c == "u": + # unicode escape (exactly four digits) + while source.next in HEXDIGITS and len(escape) < 6: + escape = escape + source.get() + if len(escape) != 6: + raise ValueError + return LITERAL, int(escape[2:], 16) + elif c == "U": + # unicode escape (exactly eight digits) + while source.next in HEXDIGITS and len(escape) < 10: + escape = escape + source.get() + if len(escape) != 10: + raise ValueError + return LITERAL, int(escape[2:], 16) elif c == "0": # octal escape while source.next in OCTDIGITS and len(escape) < 4: diff -r 8ddf40f68def Lib/test/test_re.py --- a/Lib/test/test_re.py Fri Jun 01 00:07:28 2012 -0500 +++ b/Lib/test/test_re.py Fri Jun 01 09:39:29 2012 +0300 @@ -533,6 +533,8 @@ self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) + self.assertNotEqual(re.match(r"\u1234", "\u1234"), None) + self.assertNotEqual(re.match(r"\U00001234", "\u1234"), None) self.assertRaises(re.error, re.match, "\911", "") def test_sre_character_class_literals(self): @@ -543,6 +545,8 @@ self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) + self.assertNotEqual(re.match(r"[\u1234-\u1236]", "\u1235"), None) + self.assertNotEqual(re.match(r"[\U00001234]", "\u1234"), None) self.assertRaises(re.error, re.match, "[\911]", "") def test_bug_113254(self):