Index: Lib/test/test_re.py =================================================================== --- Lib/test/test_re.py (révision 76112) +++ Lib/test/test_re.py (copie de travail) @@ -716,6 +716,17 @@ self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII) self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE) self.assertRaises(ValueError, re.compile, '(?au)\w') + + def test_bug_6509(self): + # Replacement strings of both types must parse properly. + pat = re.compile('a(\w)') + self.assertEqual(pat.sub('b\\1', 'ac'), 'bc') + pat = re.compile('a(.)') + self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234') + pat = re.compile(b'a(\w)') + self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc') + pat = re.compile(b'a(.)') + self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD') def run_re_tests(): Index: Lib/sre_parse.py =================================================================== --- Lib/sre_parse.py (révision 76112) +++ Lib/sre_parse.py (copie de travail) @@ -786,12 +786,18 @@ groups = [] groupsappend = groups.append literals = [None] * len(p) + if isinstance(source, str): + encode = lambda x: x + else: + # The tokenizer implicitly decodes bytes objects as latin-1, we must + # therefore re-encode the final representation. + encode = lambda x: x.encode('latin1') for c, s in p: if c is MARK: groupsappend((i, s)) # literal[i] is already None else: - literals[i] = s + literals[i] = encode(s) i = i + 1 return groups, literals