diff -r 2c12a1236fdc Lib/sre_parse.py --- a/Lib/sre_parse.py Thu Jan 24 13:44:18 2013 +0200 +++ b/Lib/sre_parse.py Thu Jan 24 17:23:22 2013 +0200 @@ -224,21 +224,6 @@ def seek(self, index): self.index, self.next = index -def isident(char): - return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" - -def isdigit(char): - return "0" <= char <= "9" - -def isname(name): - # check that group name is a valid string - if not isident(name[0]): - return False - for char in name[1:]: - if not isident(char) and not isdigit(char): - return False - return True - def _class_escape(source, escape): # handle escape code inside character class code = ESCAPES.get(escape) @@ -582,7 +567,7 @@ group = 1 if not name: raise error("missing group name") - if not isname(name): + if not name.isidentifier(): raise error("bad character in group name") elif sourcematch("="): # named backreference @@ -596,7 +581,7 @@ name = name + char if not name: raise error("missing group name") - if not isname(name): + if not name.isidentifier(): raise error("bad character in group name") gid = state.groupdict.get(name) if gid is None: @@ -650,7 +635,7 @@ group = 2 if not condname: raise error("missing group name") - if isname(condname): + if condname.isidentifier(): condgroup = state.groupdict.get(condname) if condgroup is None: raise error("unknown group name") @@ -787,7 +772,7 @@ if index < 0: raise error("negative group number") except ValueError: - if not isname(name): + if not name.isidentifier(): raise error("bad character in group name") try: index = pattern.groupindex[name] diff -r 2c12a1236fdc Lib/test/test_re.py --- a/Lib/test/test_re.py Thu Jan 24 13:44:18 2013 +0200 +++ b/Lib/test/test_re.py Thu Jan 24 17:23:22 2013 +0200 @@ -192,6 +192,16 @@ self.assertRaises(re.error, re.sub, '(?Px)|(?Py)', '\\2', 'xx') self.assertRaises(re.error, re.sub, '(?Px)', '\g<-1>', 'xx') + def test_unicode_symbolic_groups(self): + re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') + re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)') + self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)') + + def test_unicode_symbolic_refs(self): + self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx') + self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx') + self.assertRaises(re.error, re.sub, '(?Px)', r'\g<ยฉ>', 'xx') + def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))