diff -r c2f3b7c56dff Lib/sre_parse.py --- a/Lib/sre_parse.py Sat Oct 15 15:41:05 2016 +0900 +++ b/Lib/sre_parse.py Sun Oct 23 09:44:41 2016 +0200 @@ -725,8 +725,8 @@ raise source.error("bad group number", len(condname) + 1) if condgroup >= MAXGROUPS: - raise source.error("invalid group reference", - len(condname) + 1) + raise source.error("invalid group reference %r" % + condgroup, len(condname) + 1) state.checklookbehindgroup(condgroup, source) elif char in FLAGS or char == "-": # flags @@ -883,7 +883,9 @@ literals = [] literal = [] lappend = literal.append - def addgroup(index): + def addgroup(index, pos): + if index > pattern.groups: + raise s.error("invalid group reference %r" % index, pos) if literal: literals.append(''.join(literal)) del literal[:] @@ -916,9 +918,9 @@ raise s.error("bad character in group name %r" % name, len(name) + 1) from None if index >= MAXGROUPS: - raise s.error("invalid group reference", + raise s.error("invalid group reference %r" % index, len(name) + 1) - addgroup(index) + addgroup(index, len(name) + 1) elif c == "0": if s.next in OCTDIGITS: this += sget() @@ -939,7 +941,7 @@ 'range 0-0o377' % this, len(this)) lappend(chr(c)) if not isoctal: - addgroup(int(this[1:])) + addgroup(int(this[1:]), len(this)-1) else: try: this = chr(ESCAPES[this][1]) @@ -966,5 +968,5 @@ for index, group in groups: literals[index] = g(group) or empty except IndexError: - raise error("invalid group reference") + raise error("invalid group reference %r" % index) return empty.join(literals) diff -r c2f3b7c56dff Lib/test/test_re.py --- a/Lib/test/test_re.py Sat Oct 15 15:41:05 2016 +0900 +++ b/Lib/test/test_re.py Sun Oct 23 09:44:41 2016 +0200 @@ -5,7 +5,6 @@ import re from re import Scanner import sre_compile -import sre_constants import sys import string import traceback @@ -186,18 +185,19 @@ r'octal escape value \777 outside of ' r'range 0-0o377', 0) - self.checkTemplateError('x', r'\1', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\8', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\9', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\11', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\18', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\90', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\99', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8' - self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1' - self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0' + self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1) + self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1) + self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1) + self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1) + self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1) + self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1) + self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1) + self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1) + self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1) + self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1) # in python2.3 (etc), these loop endlessly in sre_parser.py self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') @@ -271,9 +271,9 @@ self.checkTemplateError('(?Px)', r'\g<1a1>', 'xx', "bad character in group name '1a1'", 3) self.checkTemplateError('(?Px)', r'\g<2>', 'xx', - 'invalid group reference') + 'invalid group reference 2', 3) self.checkTemplateError('(?Px)', r'\2', 'xx', - 'invalid group reference') + 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?Px)', r'\g', 'xx') self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') @@ -558,10 +558,11 @@ 'two branches', 10) def test_re_groupref_overflow(self): - self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx', - 'invalid group reference', 3) - self.checkPatternError(r'(?P)(?(%d))' % sre_constants.MAXGROUPS, - 'invalid group reference', 10) + from sre_constants import MAXGROUPS + self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx', + 'invalid group reference %d' % MAXGROUPS, 3) + self.checkPatternError(r'(?P)(?(%d))' % MAXGROUPS, + 'invalid group reference %d' % MAXGROUPS, 10) def test_re_groupref(self): self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),