diff -r e6cc582cafce Lib/sre_compile.py --- a/Lib/sre_compile.py Thu Jan 31 16:11:47 2013 +0200 +++ b/Lib/sre_compile.py Thu Jan 31 17:21:55 2013 +0200 @@ -13,6 +13,7 @@ import _sre, sys import sre_parse from sre_constants import * +from _sre import MAXREPEAT assert _sre.MAGIC == MAGIC, "SRE module mismatch" diff -r e6cc582cafce Lib/sre_constants.py --- a/Lib/sre_constants.py Thu Jan 31 16:11:47 2013 +0200 +++ b/Lib/sre_constants.py Thu Jan 31 17:21:55 2013 +0200 @@ -15,10 +15,6 @@ MAGIC = 20031017 -# max code word in this release - -MAXREPEAT = 65535 - # SRE standard exception (access as sre.error) # should this really be here? diff -r e6cc582cafce Lib/sre_parse.py --- a/Lib/sre_parse.py Thu Jan 31 16:11:47 2013 +0200 +++ b/Lib/sre_parse.py Thu Jan 31 17:21:55 2013 +0200 @@ -15,6 +15,7 @@ import sys from sre_constants import * +from _sre import MAXREPEAT SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" @@ -537,10 +538,18 @@ continue if lo: min = int(lo) + if MAXREPEAT <= min <= sys.maxsize: + raise error("the repetition number is too large") if hi: max = int(hi) - if max < min: - raise error("bad repeat interval") + if max < min: + raise error("bad repeat interval") + if max >= MAXREPEAT: + if max <= sys.maxsize: + raise error("the repetition number is too large") + max = MAXREPEAT + if min > MAXREPEAT: + min = MAXREPEAT else: raise error("not supported") # figure out which item to repeat diff -r e6cc582cafce Lib/test/test_re.py --- a/Lib/test/test_re.py Thu Jan 31 16:11:47 2013 +0200 +++ b/Lib/test/test_re.py Thu Jan 31 17:21:55 2013 +0200 @@ -1,4 +1,5 @@ -from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G +from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \ + cpython_only import io import re from re import Scanner @@ -980,6 +981,39 @@ self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a']) self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2)) + def test_repeat_minmax_overflow(self): + # Issue #13169 + string = "x" * 100000 + self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) + self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) + self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) + self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) + self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) + self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) + # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. + self.assertIsNone(re.match(r".{%d}" % 2**128, string)) + self.assertEqual(re.match(r".{,%d}" % 2**128, string).span(), + (0, 100000)) + self.assertIsNone(re.match(r".{%d,}?" % 2**128, string)) + self.assertRaises(re.error, re.compile, r".{%d,%d}" % (2**129, 2**128)) + + @cpython_only + def test_repeat_minmax_overflow_maxrepeat(self): + try: + from _sre import MAXREPEAT + except ImportError: + self.skipTest('requires _sre.MAXREPEAT constant') + if MAXREPEAT > sys.maxsize: + self.skipTest('requires _sre.MAXREPEAT <= sys.maxsize') + self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) + self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), + (0, 100000)) + self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) + self.assertRaises(re.error, re.compile, r".{%d}" % MAXREPEAT) + self.assertRaises(re.error, re.compile, r".{,%d}" % MAXREPEAT) + self.assertRaises(re.error, re.compile, r".{%d,}?" % MAXREPEAT) + + def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff -r e6cc582cafce Modules/_sre.c --- a/Modules/_sre.c Thu Jan 31 16:11:47 2013 +0200 +++ b/Modules/_sre.c Thu Jan 31 17:21:55 2013 +0200 @@ -492,7 +492,7 @@ Py_ssize_t i; /* adjust end */ - if (maxcount < (end - ptr) / state->charsize && maxcount != 65535) + if (maxcount < (end - ptr) / state->charsize && maxcount != SRE_MAXREPEAT) end = ptr + maxcount*state->charsize; switch (pattern[0]) { @@ -1109,7 +1109,7 @@ } else { /* general case */ LASTMARK_SAVE(); - while ((Py_ssize_t)ctx->pattern[2] == 65535 + while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { state->ptr = ctx->ptr; DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, @@ -1195,7 +1195,7 @@ } if ((ctx->count < ctx->u.rep->pattern[2] || - ctx->u.rep->pattern[2] == 65535) && + ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && state->ptr != ctx->u.rep->last_ptr) { /* we may have enough matches, but if we can match another item, do so */ @@ -1273,7 +1273,7 @@ LASTMARK_RESTORE(); if (ctx->count >= ctx->u.rep->pattern[2] - && ctx->u.rep->pattern[2] != 65535) + && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) RETURN_FAILURE; ctx->u.rep->count = ctx->count; @@ -3037,7 +3037,7 @@ GET_ARG; max = arg; if (min > max) FAIL; - if (max > 65535) + if (max > SRE_MAXREPEAT) FAIL; if (!_validate_inner(code, code+skip-4, groups)) FAIL; @@ -3056,7 +3056,7 @@ GET_ARG; max = arg; if (min > max) FAIL; - if (max > 65535) + if (max > SRE_MAXREPEAT) FAIL; if (!_validate_inner(code, code+skip-3, groups)) FAIL; @@ -3942,6 +3942,12 @@ Py_DECREF(x); } + x = PyLong_FromUnsignedLong(SRE_MAXREPEAT); + if (x) { + PyDict_SetItemString(d, "MAXREPEAT", x); + Py_DECREF(x); + } + x = PyUnicode_FromString(copyright); if (x) { PyDict_SetItemString(d, "copyright", x); diff -r e6cc582cafce Modules/sre.h --- a/Modules/sre.h Thu Jan 31 16:11:47 2013 +0200 +++ b/Modules/sre.h Thu Jan 31 17:21:55 2013 +0200 @@ -16,6 +16,11 @@ /* size of a code word (must be unsigned short or larger, and large enough to hold a UCS4 character) */ #define SRE_CODE Py_UCS4 +#if SIZEOF_SIZE_T > 4 +# define SRE_MAXREPEAT (~(SRE_CODE)0) +#else +# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u) +#endif typedef struct { PyObject_VAR_HEAD