diff -r 30a6c74ad87f Doc/library/re.rst --- a/Doc/library/re.rst Tue Nov 11 21:13:28 2014 +0200 +++ b/Doc/library/re.rst Tue Nov 11 22:20:28 2014 +0200 @@ -119,6 +119,21 @@ The special characters are: characters as possible will be matched. Using ``.*?`` in the previous expression will match only ``'

'``. +``*+``, ``++``, ``?+`` + Like the ``'*'``, ``'+'``, and ``'?'`` qualifiers, those where ``'+'`` is + appended also match as many times as possible. However, unlike the true greedy + qualifiers, these do not allow back-tracking when the expression following it + fails to match. These are known as :dfn:`Possessive` qualifiers. For example, + ``a*a`` will match ``'aaaa'`` because the ``a*`` will match all 4 ``'a'``s, but, + when the final ``'a'`` is encountered, the expression is backtracked so that in the + end the ``a*`` ends up matching 3 ``'a'``s total, and the fourth ``'a'`` is matched + by the final ``'a'``. However, when ``a*+a`` is used to match ``'aaaa'``, the + ``a*+`` will match all 4 ``'a'``, but when the final ``'a'`` fails to find any more + characters to match, the expression cannot be backtracked and will thus fail to + match. + + .. versionadded:: 3.5 + ``{m}`` Specifies that exactly *m* copies of the previous RE should be matched; fewer matches cause the entire RE not to match. For example, ``a{6}`` will match @@ -140,6 +155,18 @@ The special characters are: 6-character string ``'aaaaaa'``, ``a{3,5}`` will match 5 ``'a'`` characters, while ``a{3,5}?`` will only match 3 characters. +``{m,n}+`` + Causes the resulting RE to match from *m* to *n* repetitions of the preceding + RE, attempting to match as many repetitions as possible *without* establishing any + backtracking points. This is the possessive version of the qualifier above. For + example, on the 6-character string ``'aaaaaa'``, ``a{3,5}aa`` attempt to match 5 + ``'a'`` characters, then, requiring 2 more ``'a'``s, will need more characters than + available and thus fail, while ``a{3,5}aa`` will match with ``a{3,5}`` capturing + 5, then 4 ``'a'``s by backtracking and then the final 2 ``'a'``s are matched by the + final ``aa`` in the pattern. + + .. versionadded:: 3.5 + ``'\'`` Either escapes special characters (permitting you to match characters like ``'*'``, ``'?'``, and so forth), or signals a special sequence; special @@ -304,6 +331,20 @@ The special characters are: some fixed length. Patterns which start with negative lookbehind assertions may match at the beginning of the string being searched. +``(?>...)`` + Attempts to match ``...`` as if it was a separate Regular Expression, and if + successful, continues to match the rest of the pattern following it. If the + subsequent pattern fails to match, the stack can only be unwound to a point + *before* the ``(?>...)`` because once exited, the expression, known as an + :dfn:`Atomic Group`, has thrown away all stack points within itself. Thus, + ``(?>.*).`` would never match anything because first the ``.*`` would match all + characters possible, then, having nothing left to match, the final ``.`` would + fail to match. Since there are no stack points saved in the Atomic Group, and + there is no stack point before it, the entire expression would thus fail to + match. + + .. versionadded:: 3.5 + ``(?(id/name)yes-pattern|no-pattern)`` Will try to match with ``yes-pattern`` if the group with given *id* or *name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is diff -r 30a6c74ad87f Lib/sre_compile.py --- a/Lib/sre_compile.py Tue Nov 11 21:13:28 2014 +0200 +++ b/Lib/sre_compile.py Tue Nov 11 22:20:28 2014 +0200 @@ -17,7 +17,7 @@ from sre_constants import * assert _sre.MAGIC == MAGIC, "SRE module mismatch" _LITERAL_CODES = {LITERAL, NOT_LITERAL} -_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT} +_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT} _SUCCESS_CODES = {SUCCESS, FAILURE} _ASSERT_CODES = {ASSERT, ASSERT_NOT} @@ -117,6 +117,8 @@ def _compile(code, pattern, flags): elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: emit(REPEAT_ONE) + elif op is POSSESSIVE_REPEAT: + emit(POSSESSIVE_ONE) else: emit(MIN_REPEAT_ONE) skip = _len(code); emit(0) @@ -125,6 +127,14 @@ def _compile(code, pattern, flags): _compile(code, av[2], flags) emit(SUCCESS) code[skip] = _len(code) - skip + elif op is POSSESSIVE_REPEAT: + emit(POSSESSIVE_REPEAT) + skip = _len(code); emit(0) + emit(av[0]) + emit(av[1]) + _compile(code, av[2], flags) + code[skip] = _len(code) - skip + emit(SUCCESS) else: emit(REPEAT) skip = _len(code); emit(0) @@ -132,6 +142,8 @@ def _compile(code, pattern, flags): emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip + # TODO: What if op is REPEAT, not MIN_REPEAT; + # Default of MIN_UNTIL may be wrong if op is MAX_REPEAT: emit(MAX_UNTIL) else: @@ -145,6 +157,17 @@ def _compile(code, pattern, flags): if av[0]: emit(MARK) emit((av[0]-1)*2+1) + elif op is ATOMIC_GROUP: + # Atomic Groups are handled by starting with an Atomic + # Group op code, then putting in the atomic group pattern + # and finally a success op code to tell any repeat + # operations within the Atomic Group to stop eating and + # pop their stack if they reach it + emit(ATOMIC_GROUP) + skip = _len(code); emit(0) + _compile(code, av, flags) + emit(SUCCESS) + code[skip] = _len(code) - skip elif op in SUCCESS_CODES: emit(op) elif op in ASSERT_CODES: diff -r 30a6c74ad87f Lib/sre_constants.py --- a/Lib/sre_constants.py Tue Nov 11 21:13:28 2014 +0200 +++ b/Lib/sre_constants.py Tue Nov 11 22:20:28 2014 +0200 @@ -13,7 +13,7 @@ # update when constants are added or removed -MAGIC = 20140917 +MAGIC = 20141113 from _sre import MAXREPEAT, MAXGROUPS @@ -99,6 +99,9 @@ OPCODES = _makecodes(""" SUBPATTERN MIN_REPEAT_ONE RANGE_IGNORE + ATOMIC_GROUP + POSSESSIVE_REPEAT + POSSESSIVE_ONE MIN_REPEAT MAX_REPEAT """) @@ -126,6 +129,10 @@ CHCODES = _makecodes(""" CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK """) +# group types +SRE_GROUP_IGNORE = "ignore_this_group" +SRE_GROUP_CAPTURE = "capture_this_group" +SRE_GROUP_NON_CAPTURE = "non_capturing_group" # replacement operations for "ignore case" mode OP_IGNORE = { diff -r 30a6c74ad87f Lib/sre_parse.py --- a/Lib/sre_parse.py Tue Nov 11 21:13:28 2014 +0200 +++ b/Lib/sre_parse.py Tue Nov 11 22:20:28 2014 +0200 @@ -24,7 +24,7 @@ HEXDIGITS = frozenset("0123456789abcdefA WHITESPACE = frozenset(" \t\n\r\v\f") -_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT}) +_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT}) _UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY}) ESCAPES = { @@ -173,6 +173,10 @@ class SubPattern: i, j = av.getwidth() lo = lo + i hi = hi + j + elif op is ATOMIC_GROUP: + i, j = av.getwidth() + lo = lo + i + hi = hi + j elif op is SUBPATTERN: i, j = av[1].getwidth() lo = lo + i @@ -614,19 +618,25 @@ def _parse(source, state): raise source.error("multiple repeat", source.tell() - here + len(this)) if sourcematch("?"): + # Non-Greedy Match subpattern[-1] = (MIN_REPEAT, (min, max, item)) + elif sourcematch("+"): + # Possessive Match (Always Greedy) + subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item)) else: + # Greedy Match subpattern[-1] = (MAX_REPEAT, (min, max, item)) elif this == ".": subpatternappend((ANY, None)) elif this == "(": - group = 1 + grouptype = SRE_GROUP_CAPTURE name = None condgroup = None + atomic = False if sourcematch("?"): - group = 0 + grouptype = SRE_GROUP_IGNORE # options char = sourceget() if char is None: @@ -636,7 +646,7 @@ def _parse(source, state): if sourcematch("<"): # named group: skip forward to end of name name = source.getuntil(">") - group = 1 + grouptype = SRE_GROUP_CAPTURE if not name: raise source.error("missing group name", 1) if not name.isidentifier(): @@ -666,7 +676,7 @@ def _parse(source, state): len(char)) elif char == ":": # non-capturing group - group = 2 + grouptype = SRE_GROUP_NON_CAPTURE elif char == "#": # comment while True: @@ -694,7 +704,7 @@ def _parse(source, state): elif char == "(": # conditional backreference group condname = source.getuntil(")") - group = 2 + grouptype = SRE_GROUP_NON_CAPTURE if not condname: raise source.error("missing group name", 1) if condname.isidentifier(): @@ -716,6 +726,10 @@ def _parse(source, state): if condgroup >= MAXGROUPS: raise source.error("the group number is too large", len(condname) + 1) + elif char == ">": + # non-capturing, atomic group + grouptype = SRE_GROUP_NON_CAPTURE + atomic = True elif char in FLAGS: # flags state.flags |= FLAGS[char] @@ -724,9 +738,9 @@ def _parse(source, state): verbose = state.flags & SRE_FLAG_VERBOSE else: raise source.error("unexpected end of pattern") - if group: + if grouptype != SRE_GROUP_IGNORE: # parse group contents - if group == 2: + if grouptype == SRE_GROUP_NON_CAPTURE: # anonymous group group = None else: @@ -742,7 +756,12 @@ def _parse(source, state): raise source.error("unbalanced parenthesis") if group is not None: state.closegroup(group, p) - subpatternappend((SUBPATTERN, (group, p))) + if atomic: + # TODO: Assert that group is always None in this + # case + subpatternappend((ATOMIC_GROUP, p)) + else: + subpatternappend((SUBPATTERN, (group, p))) else: while True: char = sourceget() diff -r 30a6c74ad87f Lib/test/test_re.py --- a/Lib/test/test_re.py Tue Nov 11 21:13:28 2014 +0200 +++ b/Lib/test/test_re.py Tue Nov 11 22:20:28 2014 +0200 @@ -67,6 +67,23 @@ class ReTests(unittest.TestCase): self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) self.assertIsNone(re.match('a+', 'xxx')) + def test_branching(self): + """Test Branching + Test expressions using the OR ('|') operator.""" + self.assertEqual(re.match('(ab|ba)', 'ab').span(), (0, 2)) + self.assertEqual(re.match('(ab|ba)', 'ba').span(), (0, 2)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'abc').span(), + (0, 3)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'bac').span(), + (0, 3)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'ca').span(), + (0, 2)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'cb').span(), + (0, 2)) + self.assertEqual(re.match('((a)|(b)|(c))', 'a').span(), (0, 1)) + self.assertEqual(re.match('((a)|(b)|(c))', 'b').span(), (0, 1)) + self.assertEqual(re.match('((a)|(b)|(c))', 'c').span(), (0, 1)) + def bump_num(self, matchobj): int_value = int(matchobj.group(0)) return str(int_value + 1) @@ -1129,7 +1146,8 @@ class ReTests(unittest.TestCase): self.assertTrue(q) def test_dollar_matches_twice(self): - "$ matches the end of string, and just before the terminating \n" + """Test that $ does not include \\n + $ matches the end of string, and just before the terminating \n""" pattern = re.compile('$') self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') @@ -1469,6 +1487,63 @@ SUBPATTERN None self.assertIn(' at position 77', str(err)) self.assertIn('(line 5, column 17)', str(err)) + def test_possessive_qualifiers(self): + """Test Possessive Qualifiers + test qualifiers of the form @+ for some repetition operator @, + e.g. x{3,5}+ meaning match from 3 to 5 greadily and proceed + without creating a stack frame for rolling the stack back and + trying 1 or more fewer matches.""" + self.assertIsNone(re.match('e*+e', 'eeee')) + self.assertEqual(re.match('e++a', 'eeea').group(0), 'eeea') + self.assertEqual(re.match('e?+a', 'ea').group(0), 'ea') + self.assertEqual(re.match('e{2,4}+a', 'eeea').group(0), 'eeea') + self.assertIsNone(re.match('(.)++.', 'ee')) + self.assertEqual(re.match('(ae)*+a', 'aea').groups(), ('ae',)) + self.assertEqual(re.match('([ae][ae])?+a', 'aea').groups(), + ('ae',)) + self.assertEqual(re.match('(e?){2,4}+a', 'eeea').groups(), + ('',)) + self.assertEqual(re.match('()*+a', 'a').groups(), ('',)) + self.assertEqual(re.search('x*+', 'axx').span(0), (0, 0)) + self.assertEqual(re.search('x*+', 'axx').span(), (0, 0)) + self.assertEqual(re.search('x++', 'axx').span(0), (1, 3)) + self.assertEqual(re.search('x++', 'axx').span(), (1, 3)) + self.assertEqual(re.match('a*+', 'xxx').span(0), (0, 0)) + self.assertEqual(re.match('a*+', 'xxx').span(), (0, 0)) + self.assertEqual(re.match('x*+', 'xxxa').span(0), (0, 3)) + self.assertEqual(re.match('x*+', 'xxxa').span(), (0, 3)) + self.assertIsNone(re.match('a++', 'xxx')) + self.assertIsNone(re.match("^(\w){1}+$", "abc")) + self.assertIsNone(re.match("^(\w){1,2}+$", "abc")) + + self.assertEqual(re.match("^(\w){3}+$", "abc").group(1), "c") + self.assertEqual(re.match("^(\w){1,3}+$", "abc").group(1), "c") + self.assertEqual(re.match("^(\w){1,4}+$", "abc").group(1), "c") + + self.assertIsNone(re.match("^x{1}+$", "xxx")) + self.assertIsNone(re.match("^x{1,2}+$", "xxx")) + + self.assertTrue(re.match("^x{3}+$", "xxx")) + self.assertTrue(re.match("^x{1,3}+$", "xxx")) + self.assertTrue(re.match("^x{1,4}+$", "xxx")) + + self.assertIsNone(re.match("^x{}+$", "xxx")) + self.assertTrue(re.match("^x{}+$", "x{}")) + + def test_atomic_grouping(self): + """Test Atomic Grouping + test non-capturing groups of the form (?>...), which acts does + not maintain any stack point created within the group once the + group is finished being evaluated.""" + pattern1 = re.compile(r'a(?>bc|b)c') + self.assertIsNone(pattern1.match('abc')) + self.assertTrue(pattern1.match('abcc')) + self.assertIsNone(re.match(r'(?>.*).', 'abc')) + self.assertTrue(re.match(r'(?>x)++', 'xxx')) + self.assertTrue(re.match(r'(?>x++)', 'xxx')) + self.assertIsNone(re.match(r'(?>x)++x', 'xxx')) + self.assertIsNone(re.match(r'(?>x++)x', 'xxx')) + class PatternReprTests(unittest.TestCase): def check(self, pattern, expected): diff -r 30a6c74ad87f Modules/_sre.c --- a/Modules/_sre.c Tue Nov 11 21:13:28 2014 +0200 +++ b/Modules/_sre.c Tue Nov 11 22:20:28 2014 +0200 @@ -56,8 +56,8 @@ static char copyright[] = #define SRE_PY_MODULE "re" -/* defining this one enables tracing */ -#undef VERBOSE +/* uncomment this define to enable tracing */ +/* #define VERBOSE_SRE_ENGINE */ /* -------------------------------------------------------------------- */ /* optional features */ @@ -88,7 +88,7 @@ static char copyright[] = #define SRE_ERROR_MEMORY -9 /* out of memory */ #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ -#if defined(VERBOSE) +#if defined(VERBOSE_SRE_ENGINE) #define TRACE(v) printf v #else #define TRACE(v) @@ -1818,6 +1818,7 @@ static int case SRE_OP_REPEAT_ONE: case SRE_OP_MIN_REPEAT_ONE: + case SRE_OP_POSSESSIVE_ONE: { SRE_CODE min, max; GET_SKIP; @@ -1855,6 +1856,37 @@ static int } break; + case SRE_OP_POSSESSIVE_REPEAT: + { + SRE_CODE min, max; + GET_SKIP; + GET_ARG; min = arg; + GET_ARG; max = arg; + if (min > max) + FAIL; + if (max > SRE_MAXREPEAT) + FAIL; + if (!_validate_inner(code, code+skip-3, groups)) + FAIL; + code += skip-3; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + } + break; + + case SRE_OP_ATOMIC_GROUP: + { + GET_SKIP; + if (!_validate_inner(code, code+skip-2, groups)) + FAIL; + code += skip-2; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + } + break; + case SRE_OP_GROUPREF: case SRE_OP_GROUPREF_IGNORE: GET_ARG; diff -r 30a6c74ad87f Modules/sre_constants.h --- a/Modules/sre_constants.h Tue Nov 11 21:13:28 2014 +0200 +++ b/Modules/sre_constants.h Tue Nov 11 22:20:28 2014 +0200 @@ -11,7 +11,7 @@ * See the _sre.c file for information on usage and redistribution. */ -#define SRE_MAGIC 20140917 +#define SRE_MAGIC 20141113 #define SRE_OP_FAILURE 0 #define SRE_OP_SUCCESS 1 #define SRE_OP_ANY 2 @@ -45,6 +45,9 @@ #define SRE_OP_SUBPATTERN 30 #define SRE_OP_MIN_REPEAT_ONE 31 #define SRE_OP_RANGE_IGNORE 32 +#define SRE_OP_ATOMIC_GROUP 33 +#define SRE_OP_POSSESSIVE_REPEAT 34 +#define SRE_OP_POSSESSIVE_ONE 35 #define SRE_AT_BEGINNING 0 #define SRE_AT_BEGINNING_LINE 1 #define SRE_AT_BEGINNING_STRING 2 diff -r 30a6c74ad87f Modules/sre_lib.h --- a/Modules/sre_lib.h Tue Nov 11 21:13:28 2014 +0200 +++ b/Modules/sre_lib.h Tue Nov 11 22:20:28 2014 +0200 @@ -467,6 +467,9 @@ do { \ #define JUMP_BRANCH 11 #define JUMP_ASSERT 12 #define JUMP_ASSERT_NOT 13 +#define JUMP_POSS_REPEAT_1 14 +#define JUMP_POSS_REPEAT_2 15 +#define JUMP_ATOMIC_GROUP 16 #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \ DATA_ALLOC(SRE(match_context), nextctx); \ @@ -871,6 +874,57 @@ entrance: } RETURN_FAILURE; + case SRE_OP_POSSESSIVE_ONE: + /* match repeated sequence (maximizing regexp) without + backtracking */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MAX_REPEAT operator */ + + /* <1=min> <2=max> item + tail */ + + TRACE(("|%p|%p|POSSESSIVE_ONE %d %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1], ctx->pattern[2])); + + if (ctx->ptr + ctx->pattern[1] > end) { + RETURN_FAILURE; /* cannot match */ + } + + state->ptr = ctx->ptr; + + ret = SRE(count)(state, ctx->pattern + 3, ctx->pattern[2]); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + ctx->count = ret; + ctx->ptr += ctx->count; + + /* when we arrive here, count contains the number of + matches, and ctx->ptr points to the tail of the target + string. check if the rest of the pattern matches, + and fail if not. */ + + /* Test for not enough repetitions in match */ + if (ctx->count < (Py_ssize_t) ctx->pattern[1]) { + RETURN_FAILURE; + } + + /* Update the pattern to point to the next op code */ + ctx->pattern += ctx->pattern[0]; + + /* Let the tail be evaluated separately and consider this + match successful. */ + if (*ctx->pattern == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + state->ptr = ctx->ptr; + RETURN_SUCCESS; + } + + /* Attempt to match the rest of the string */ + break; + case SRE_OP_REPEAT: /* create repeat context. all the hard work is done by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ @@ -1031,6 +1085,138 @@ entrance: state->ptr = ctx->ptr; RETURN_FAILURE; + case SRE_OP_POSSESSIVE_REPEAT: + /* create possessive repeat contexts. */ + /* <1=min> <2=max> pattern + tail */ + TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1], ctx->pattern[2])); + + /* Set the global Input pointer to this context's Input + pointer */ + state->ptr = ctx->ptr; + + /* Initialize Count to 0 */ + ctx->count = 0; + + /* Check for minimum required matches. */ + while (ctx->count < (Py_ssize_t)ctx->pattern[1]) { + /* not enough matches */ + DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, + &ctx->pattern[3]); + if (ret) { + RETURN_ON_ERROR(ret); + ctx->count++; + } + else { + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + } + + /* Clear the context's Input stream pointer so that it + doesn't match the global state so that the while loop can + be entered. */ + ctx->ptr = NULL; + + /* Keep trying to parse the sub-pattern until the + end is reached, creating a new context each time. */ + while ((ctx->count < (Py_ssize_t)ctx->pattern[2] || + (Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT) && + state->ptr != ctx->ptr) { + /* Save the Capture Group Marker state into the current + Context and back up the current highest number + Capture Group marker. */ + LASTMARK_SAVE(); + MARK_PUSH(ctx->lastmark); + + /* zero-width match protection */ + /* Set the context's Input Stream pointer to be the + current Input Stream pointer from the global + state. When the loop reaches the next iteration, + the context will then store the last known good + position with the global state holding the Input + Input Stream position that has been updated with + the most recent match. Thus, if state's Input + stream remains the same as the one stored in the + current Context, we know we have successfully + matched an empty string and that all subsequent + matches will also be the empty string until the + maximum number of matches are counted, and because + of this, we could immediately stop at that point and + consider this match successful. */ + ctx->ptr = state->ptr; + + /* We have not reached the maximin matches, so try to + match once more. */ + DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, + &ctx->pattern[3]); + + /* Check to see if the last attempted match + succeeded. */ + if (ret) { + /* Drop the saved highest number Capture Group + marker saved above and use the newly updated + value. */ + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + + /* Success, increment the count. */ + ctx->count++; + } + /* Last attempted match failed. */ + else { + /* Restore the previously saved highest number + Capture Group marker since the last iteration + did not match, then restore that to the global + state. */ + MARK_POP(ctx->lastmark); + LASTMARK_RESTORE(); + + /* We have sufficient matches, so exit loop. */ + break; + } + } + + /* Evaluate Tail */ + /* Jump to end of pattern indicated by skip, and then skip + the SUCCESS op code that follows it. */ + ctx->pattern += ctx->pattern[0] + 1; + ctx->ptr = state->ptr; + break; + + case SRE_OP_ATOMIC_GROUP: + /* Atomic Group Sub Pattern */ + /* pattern tail */ + TRACE(("|%p|%p|ATOMIC_GROUP\n", ctx->pattern, ctx->ptr)); + + /* Set the global Input pointer to this context's Input + pointer */ + state->ptr = ctx->ptr; + + /* Evaluate the Atomic Group in a new context, terminating + when the end of the group, represented by a SUCCESS op + code, is reached. */ + /* Group Pattern begins at an offset of 1 code. */ + DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group, + &ctx->pattern[1]); + + /* Test Exit Condition */ + RETURN_ON_ERROR(ret); + + if (ret == 0) { + /* Atomic Group failed to Match. */ + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + + /* Evaluate Tail */ + /* Jump to end of pattern indicated by skip, and then skip + the SUCCESS op code that follows it. */ + ctx->pattern += ctx->pattern[0]; + ctx->ptr = state->ptr; + break; + case SRE_OP_GROUPREF: /* match backreference */ TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, @@ -1175,6 +1361,12 @@ exit: case JUMP_MIN_UNTIL_1: TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); goto jump_min_until_1; + case JUMP_POSS_REPEAT_1: + TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", ctx->pattern, ctx->ptr)); + goto jump_poss_repeat_1; + case JUMP_POSS_REPEAT_2: + TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", ctx->pattern, ctx->ptr)); + goto jump_poss_repeat_2; case JUMP_REPEAT: TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); goto jump_repeat; @@ -1187,6 +1379,9 @@ exit: case JUMP_MIN_REPEAT_ONE: TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); goto jump_min_repeat_one; + case JUMP_ATOMIC_GROUP: + TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", ctx->pattern, ctx->ptr)); + goto jump_atomic_group; case JUMP_ASSERT: TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); goto jump_assert;