diff -r 8a2755f6ae96 Doc/library/re.rst --- a/Doc/library/re.rst Thu Sep 18 19:45:04 2014 +0300 +++ b/Doc/library/re.rst Thu Sep 18 23:27:28 2014 +0300 @@ -255,6 +255,7 @@ The special characters are: | Context of reference to group "quote" | Ways to reference it | +=======================================+==================================+ | in the same pattern itself | * ``(?P=quote)`` (as shown) | + | | * ``(?P=1)`` | | | * ``\1`` | +---------------------------------------+----------------------------------+ | when processing match object ``m`` | * ``m.group('quote')`` | @@ -265,9 +266,12 @@ The special characters are: | | * ``\1`` | +---------------------------------------+----------------------------------+ -``(?P=name)`` - A backreference to a named group; it matches whatever text was matched by the - earlier group named *name*. +``(?P=name)``, ``(?P=number)`` + A backreference to a group; it matches whatever text was matched by the + earlier group named *name* or numbered *number*. + + .. versionchanged:: 3.5 + Added support of group numbers. ``(?#...)`` A comment; the contents of the parentheses are simply ignored. diff -r 8a2755f6ae96 Lib/sre_compile.py --- a/Lib/sre_compile.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/sre_compile.py Thu Sep 18 23:27:28 2014 +0300 @@ -470,12 +470,6 @@ def compile(p, flags=0): # print code - # XXX: get rid of this limitation! - if p.pattern.groups > 100: - raise AssertionError( - "sorry, but this version only supports 100 named groups" - ) - # map in either direction groupindex = p.pattern.groupdict indexgroup = [None] * p.pattern.groups diff -r 8a2755f6ae96 Lib/sre_constants.py --- a/Lib/sre_constants.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/sre_constants.py Thu Sep 18 23:27:28 2014 +0300 @@ -15,7 +15,7 @@ MAGIC = 20031017 -from _sre import MAXREPEAT +from _sre import MAXREPEAT, MAXGROUPS # SRE standard exception (access as sre.error) # should this really be here? diff -r 8a2755f6ae96 Lib/sre_parse.py --- a/Lib/sre_parse.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/sre_parse.py Thu Sep 18 23:27:28 2014 +0300 @@ -72,6 +72,8 @@ class Pattern: def opengroup(self, name=None): gid = self.groups self.groups = gid + 1 + if self.groups > MAXGROUPS: + raise error("groups number is too large") if name is not None: ogid = self.groupdict.get(name, None) if ogid is not None: @@ -601,13 +603,23 @@ def _parse(source, state): name += char if not name: raise error("missing group name") - if not name.isidentifier(): - raise error("bad character in backref group name " - "%r" % name) - gid = state.groupdict.get(name) - if gid is None: - msg = "unknown group name: {0!r}".format(name) - raise error(msg) + if name.isidentifier(): + gid = state.groupdict.get(name) + if gid is None: + msg = "unknown group name: {0!r}".format(name) + raise error(msg) + else: + try: + gid = int(name) + if gid < 0: + raise ValueError + except ValueError: + raise error("bad character in backref group name " + "%r" % name) from None + if not gid: + raise error("bad group number") + if gid >= MAXGROUPS: + raise error("the group number is too large") subpatternappend((GROUPREF, gid)) continue else: @@ -663,8 +675,14 @@ def _parse(source, state): else: try: condgroup = int(condname) + if condgroup < 0: + raise ValueError except ValueError: raise error("bad character in group name") + if not condgroup: + raise error("bad group number") + if condgroup >= MAXGROUPS: + raise error("the group number is too large") elif char in FLAGS: # flags state.flags |= FLAGS[char] @@ -788,6 +806,8 @@ def parse_template(source, pattern): index = int(name) if index < 0: raise error("negative group number") + if index >= MAXGROUPS: + raise error("the group number is too large") except ValueError: if not name.isidentifier(): raise error("bad character in group name") diff -r 8a2755f6ae96 Lib/test/test_re.py --- a/Lib/test/test_re.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/test/test_re.py Thu Sep 18 23:27:28 2014 +0300 @@ -193,10 +193,12 @@ class ReTests(unittest.TestCase): def test_symbolic_groups(self): re.compile('(?Px)(?P=a)(?(a)y)') re.compile('(?Px)(?P=a1)(?(a1)y)') + re.compile('(?Px)(?P=1)(?(1)y)') self.assertRaises(re.error, re.compile, '(?P)(?P)') self.assertRaises(re.error, re.compile, '(?Px)') self.assertRaises(re.error, re.compile, '(?P=)') - self.assertRaises(re.error, re.compile, '(?P=1)') + self.assertRaises(re.error, re.compile, '(?P=0)') + self.assertRaises(re.error, re.compile, '(?P=-1)') self.assertRaises(re.error, re.compile, '(?P=a)') self.assertRaises(re.error, re.compile, '(?P=a1)') self.assertRaises(re.error, re.compile, '(?P=a.)') @@ -212,6 +214,10 @@ class ReTests(unittest.TestCase): re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)') re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)') self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)') + # Support > 100 groups. + pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) + pat = '(?:%s)(?P=200)' % pat + self.assertEqual(re.match(pat, 'xc8yc8').span(), (0, 6)) def test_symbolic_refs(self): self.assertRaises(re.error, re.sub, '(?Px)', '\gx)', r'\g<ยต>', 'xx'), 'xx') self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx') self.assertRaises(re.error, re.sub, '(?Px)', r'\g<ยฉ>', 'xx') + # Support > 100 groups. + pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) + self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8') def test_re_subn(self): self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) @@ -404,6 +413,10 @@ class ReTests(unittest.TestCase): self.assertIsNone(p.match('abd')) self.assertIsNone(p.match('ac')) + # Support > 100 groups. + pat = '|'.join('x(?P%x)y' % (i, i) for i in range(1, 200 + 1)) + pat = '(?:%s)(?(200)z)' % pat + self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) def test_re_groupref(self): self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), diff -r 8a2755f6ae96 Modules/_sre.c --- a/Modules/_sre.c Thu Sep 18 19:45:04 2014 +0300 +++ b/Modules/_sre.c Thu Sep 18 23:27:28 2014 +0300 @@ -1933,10 +1933,11 @@ static int static int _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) { - if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS) + if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || + code >= end || end[-1] != SRE_OP_SUCCESS) FAIL; if (groups == 0) /* fix for simplejson */ - groups = 100; /* 100 groups should always be safe */ + groups = SRE_MAXGROUPS; /* should always be safe */ return _validate_inner(code, end-1, groups); } @@ -2747,6 +2748,12 @@ PyMODINIT_FUNC PyInit__sre(void) Py_DECREF(x); } + x = PyLong_FromUnsignedLong(SRE_MAXGROUPS); + if (x) { + PyDict_SetItemString(d, "MAXGROUPS", x); + Py_DECREF(x); + } + x = PyUnicode_FromString(copyright); if (x) { PyDict_SetItemString(d, "copyright", x); diff -r 8a2755f6ae96 Modules/sre.h --- a/Modules/sre.h Thu Sep 18 19:45:04 2014 +0300 +++ b/Modules/sre.h Thu Sep 18 23:27:28 2014 +0300 @@ -52,8 +52,7 @@ typedef struct { typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch); -/* FIXME: shouldn't be a constant, really... */ -#define SRE_MARK_SIZE 200 +#define SRE_MAXGROUPS 1000 typedef struct SRE_REPEAT_T { Py_ssize_t count; @@ -76,7 +75,8 @@ typedef struct { /* registers */ Py_ssize_t lastindex; Py_ssize_t lastmark; - void* mark[SRE_MARK_SIZE]; + /* FIXME: shouldn't be a constant, really... */ + void* mark[SRE_MAXGROUPS*2]; /* dynamically allocated stuff */ char* data_stack; size_t data_stack_size;