diff -r 30a6c74ad87f Doc/library/re.rst
--- a/Doc/library/re.rst Tue Nov 11 21:13:28 2014 +0200
+++ b/Doc/library/re.rst Tue Nov 11 22:20:28 2014 +0200
@@ -119,6 +119,21 @@ The special characters are:
characters as possible will be matched. Using ``.*?`` in the previous
expression will match only ``'
'``.
+``*+``, ``++``, ``?+``
+ Like the ``'*'``, ``'+'``, and ``'?'`` qualifiers, those where ``'+'`` is
+ appended also match as many times as possible. However, unlike the true greedy
+ qualifiers, these do not allow back-tracking when the expression following it
+ fails to match. These are known as :dfn:`Possessive` qualifiers. For example,
+ ``a*a`` will match ``'aaaa'`` because the ``a*`` will match all 4 ``'a'``s, but,
+ when the final ``'a'`` is encountered, the expression is backtracked so that in the
+ end the ``a*`` ends up matching 3 ``'a'``s total, and the fourth ``'a'`` is matched
+ by the final ``'a'``. However, when ``a*+a`` is used to match ``'aaaa'``, the
+ ``a*+`` will match all 4 ``'a'``, but when the final ``'a'`` fails to find any more
+ characters to match, the expression cannot be backtracked and will thus fail to
+ match.
+
+ .. versionadded:: 3.5
+
``{m}``
Specifies that exactly *m* copies of the previous RE should be matched; fewer
matches cause the entire RE not to match. For example, ``a{6}`` will match
@@ -140,6 +155,18 @@ The special characters are:
6-character string ``'aaaaaa'``, ``a{3,5}`` will match 5 ``'a'`` characters,
while ``a{3,5}?`` will only match 3 characters.
+``{m,n}+``
+ Causes the resulting RE to match from *m* to *n* repetitions of the preceding
+ RE, attempting to match as many repetitions as possible *without* establishing any
+ backtracking points. This is the possessive version of the qualifier above. For
+ example, on the 6-character string ``'aaaaaa'``, ``a{3,5}aa`` attempt to match 5
+ ``'a'`` characters, then, requiring 2 more ``'a'``s, will need more characters than
+ available and thus fail, while ``a{3,5}aa`` will match with ``a{3,5}`` capturing
+ 5, then 4 ``'a'``s by backtracking and then the final 2 ``'a'``s are matched by the
+ final ``aa`` in the pattern.
+
+ .. versionadded:: 3.5
+
``'\'``
Either escapes special characters (permitting you to match characters like
``'*'``, ``'?'``, and so forth), or signals a special sequence; special
@@ -304,6 +331,20 @@ The special characters are:
some fixed length. Patterns which start with negative lookbehind assertions may
match at the beginning of the string being searched.
+``(?>...)``
+ Attempts to match ``...`` as if it was a separate Regular Expression, and if
+ successful, continues to match the rest of the pattern following it. If the
+ subsequent pattern fails to match, the stack can only be unwound to a point
+ *before* the ``(?>...)`` because once exited, the expression, known as an
+ :dfn:`Atomic Group`, has thrown away all stack points within itself. Thus,
+ ``(?>.*).`` would never match anything because first the ``.*`` would match all
+ characters possible, then, having nothing left to match, the final ``.`` would
+ fail to match. Since there are no stack points saved in the Atomic Group, and
+ there is no stack point before it, the entire expression would thus fail to
+ match.
+
+ .. versionadded:: 3.5
+
``(?(id/name)yes-pattern|no-pattern)``
Will try to match with ``yes-pattern`` if the group with given *id* or
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is
diff -r 30a6c74ad87f Lib/sre_compile.py
--- a/Lib/sre_compile.py Tue Nov 11 21:13:28 2014 +0200
+++ b/Lib/sre_compile.py Tue Nov 11 22:20:28 2014 +0200
@@ -17,7 +17,7 @@ from sre_constants import *
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
_LITERAL_CODES = {LITERAL, NOT_LITERAL}
-_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
+_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT}
_SUCCESS_CODES = {SUCCESS, FAILURE}
_ASSERT_CODES = {ASSERT, ASSERT_NOT}
@@ -117,6 +117,8 @@ def _compile(code, pattern, flags):
elif _simple(av) and op is not REPEAT:
if op is MAX_REPEAT:
emit(REPEAT_ONE)
+ elif op is POSSESSIVE_REPEAT:
+ emit(POSSESSIVE_ONE)
else:
emit(MIN_REPEAT_ONE)
skip = _len(code); emit(0)
@@ -125,6 +127,14 @@ def _compile(code, pattern, flags):
_compile(code, av[2], flags)
emit(SUCCESS)
code[skip] = _len(code) - skip
+ elif op is POSSESSIVE_REPEAT:
+ emit(POSSESSIVE_REPEAT)
+ skip = _len(code); emit(0)
+ emit(av[0])
+ emit(av[1])
+ _compile(code, av[2], flags)
+ code[skip] = _len(code) - skip
+ emit(SUCCESS)
else:
emit(REPEAT)
skip = _len(code); emit(0)
@@ -132,6 +142,8 @@ def _compile(code, pattern, flags):
emit(av[1])
_compile(code, av[2], flags)
code[skip] = _len(code) - skip
+ # TODO: What if op is REPEAT, not MIN_REPEAT;
+ # Default of MIN_UNTIL may be wrong
if op is MAX_REPEAT:
emit(MAX_UNTIL)
else:
@@ -145,6 +157,17 @@ def _compile(code, pattern, flags):
if av[0]:
emit(MARK)
emit((av[0]-1)*2+1)
+ elif op is ATOMIC_GROUP:
+ # Atomic Groups are handled by starting with an Atomic
+ # Group op code, then putting in the atomic group pattern
+ # and finally a success op code to tell any repeat
+ # operations within the Atomic Group to stop eating and
+ # pop their stack if they reach it
+ emit(ATOMIC_GROUP)
+ skip = _len(code); emit(0)
+ _compile(code, av, flags)
+ emit(SUCCESS)
+ code[skip] = _len(code) - skip
elif op in SUCCESS_CODES:
emit(op)
elif op in ASSERT_CODES:
diff -r 30a6c74ad87f Lib/sre_constants.py
--- a/Lib/sre_constants.py Tue Nov 11 21:13:28 2014 +0200
+++ b/Lib/sre_constants.py Tue Nov 11 22:20:28 2014 +0200
@@ -13,7 +13,7 @@
# update when constants are added or removed
-MAGIC = 20140917
+MAGIC = 20141113
from _sre import MAXREPEAT, MAXGROUPS
@@ -99,6 +99,9 @@ OPCODES = _makecodes("""
SUBPATTERN
MIN_REPEAT_ONE
RANGE_IGNORE
+ ATOMIC_GROUP
+ POSSESSIVE_REPEAT
+ POSSESSIVE_ONE
MIN_REPEAT MAX_REPEAT
""")
@@ -126,6 +129,10 @@ CHCODES = _makecodes("""
CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK
""")
+# group types
+SRE_GROUP_IGNORE = "ignore_this_group"
+SRE_GROUP_CAPTURE = "capture_this_group"
+SRE_GROUP_NON_CAPTURE = "non_capturing_group"
# replacement operations for "ignore case" mode
OP_IGNORE = {
diff -r 30a6c74ad87f Lib/sre_parse.py
--- a/Lib/sre_parse.py Tue Nov 11 21:13:28 2014 +0200
+++ b/Lib/sre_parse.py Tue Nov 11 22:20:28 2014 +0200
@@ -24,7 +24,7 @@ HEXDIGITS = frozenset("0123456789abcdefA
WHITESPACE = frozenset(" \t\n\r\v\f")
-_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT})
+_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT})
_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY})
ESCAPES = {
@@ -173,6 +173,10 @@ class SubPattern:
i, j = av.getwidth()
lo = lo + i
hi = hi + j
+ elif op is ATOMIC_GROUP:
+ i, j = av.getwidth()
+ lo = lo + i
+ hi = hi + j
elif op is SUBPATTERN:
i, j = av[1].getwidth()
lo = lo + i
@@ -614,19 +618,25 @@ def _parse(source, state):
raise source.error("multiple repeat",
source.tell() - here + len(this))
if sourcematch("?"):
+ # Non-Greedy Match
subpattern[-1] = (MIN_REPEAT, (min, max, item))
+ elif sourcematch("+"):
+ # Possessive Match (Always Greedy)
+ subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item))
else:
+ # Greedy Match
subpattern[-1] = (MAX_REPEAT, (min, max, item))
elif this == ".":
subpatternappend((ANY, None))
elif this == "(":
- group = 1
+ grouptype = SRE_GROUP_CAPTURE
name = None
condgroup = None
+ atomic = False
if sourcematch("?"):
- group = 0
+ grouptype = SRE_GROUP_IGNORE
# options
char = sourceget()
if char is None:
@@ -636,7 +646,7 @@ def _parse(source, state):
if sourcematch("<"):
# named group: skip forward to end of name
name = source.getuntil(">")
- group = 1
+ grouptype = SRE_GROUP_CAPTURE
if not name:
raise source.error("missing group name", 1)
if not name.isidentifier():
@@ -666,7 +676,7 @@ def _parse(source, state):
len(char))
elif char == ":":
# non-capturing group
- group = 2
+ grouptype = SRE_GROUP_NON_CAPTURE
elif char == "#":
# comment
while True:
@@ -694,7 +704,7 @@ def _parse(source, state):
elif char == "(":
# conditional backreference group
condname = source.getuntil(")")
- group = 2
+ grouptype = SRE_GROUP_NON_CAPTURE
if not condname:
raise source.error("missing group name", 1)
if condname.isidentifier():
@@ -716,6 +726,10 @@ def _parse(source, state):
if condgroup >= MAXGROUPS:
raise source.error("the group number is too large",
len(condname) + 1)
+ elif char == ">":
+ # non-capturing, atomic group
+ grouptype = SRE_GROUP_NON_CAPTURE
+ atomic = True
elif char in FLAGS:
# flags
state.flags |= FLAGS[char]
@@ -724,9 +738,9 @@ def _parse(source, state):
verbose = state.flags & SRE_FLAG_VERBOSE
else:
raise source.error("unexpected end of pattern")
- if group:
+ if grouptype != SRE_GROUP_IGNORE:
# parse group contents
- if group == 2:
+ if grouptype == SRE_GROUP_NON_CAPTURE:
# anonymous group
group = None
else:
@@ -742,7 +756,12 @@ def _parse(source, state):
raise source.error("unbalanced parenthesis")
if group is not None:
state.closegroup(group, p)
- subpatternappend((SUBPATTERN, (group, p)))
+ if atomic:
+ # TODO: Assert that group is always None in this
+ # case
+ subpatternappend((ATOMIC_GROUP, p))
+ else:
+ subpatternappend((SUBPATTERN, (group, p)))
else:
while True:
char = sourceget()
diff -r 30a6c74ad87f Lib/test/test_re.py
--- a/Lib/test/test_re.py Tue Nov 11 21:13:28 2014 +0200
+++ b/Lib/test/test_re.py Tue Nov 11 22:20:28 2014 +0200
@@ -67,6 +67,23 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
self.assertIsNone(re.match('a+', 'xxx'))
+ def test_branching(self):
+ """Test Branching
+ Test expressions using the OR ('|') operator."""
+ self.assertEqual(re.match('(ab|ba)', 'ab').span(), (0, 2))
+ self.assertEqual(re.match('(ab|ba)', 'ba').span(), (0, 2))
+ self.assertEqual(re.match('(abc|bac|ca|cb)', 'abc').span(),
+ (0, 3))
+ self.assertEqual(re.match('(abc|bac|ca|cb)', 'bac').span(),
+ (0, 3))
+ self.assertEqual(re.match('(abc|bac|ca|cb)', 'ca').span(),
+ (0, 2))
+ self.assertEqual(re.match('(abc|bac|ca|cb)', 'cb').span(),
+ (0, 2))
+ self.assertEqual(re.match('((a)|(b)|(c))', 'a').span(), (0, 1))
+ self.assertEqual(re.match('((a)|(b)|(c))', 'b').span(), (0, 1))
+ self.assertEqual(re.match('((a)|(b)|(c))', 'c').span(), (0, 1))
+
def bump_num(self, matchobj):
int_value = int(matchobj.group(0))
return str(int_value + 1)
@@ -1129,7 +1146,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(q)
def test_dollar_matches_twice(self):
- "$ matches the end of string, and just before the terminating \n"
+ """Test that $ does not include \\n
+ $ matches the end of string, and just before the terminating \n"""
pattern = re.compile('$')
self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
@@ -1469,6 +1487,63 @@ SUBPATTERN None
self.assertIn(' at position 77', str(err))
self.assertIn('(line 5, column 17)', str(err))
+ def test_possessive_qualifiers(self):
+ """Test Possessive Qualifiers
+ test qualifiers of the form @+ for some repetition operator @,
+ e.g. x{3,5}+ meaning match from 3 to 5 greadily and proceed
+ without creating a stack frame for rolling the stack back and
+ trying 1 or more fewer matches."""
+ self.assertIsNone(re.match('e*+e', 'eeee'))
+ self.assertEqual(re.match('e++a', 'eeea').group(0), 'eeea')
+ self.assertEqual(re.match('e?+a', 'ea').group(0), 'ea')
+ self.assertEqual(re.match('e{2,4}+a', 'eeea').group(0), 'eeea')
+ self.assertIsNone(re.match('(.)++.', 'ee'))
+ self.assertEqual(re.match('(ae)*+a', 'aea').groups(), ('ae',))
+ self.assertEqual(re.match('([ae][ae])?+a', 'aea').groups(),
+ ('ae',))
+ self.assertEqual(re.match('(e?){2,4}+a', 'eeea').groups(),
+ ('',))
+ self.assertEqual(re.match('()*+a', 'a').groups(), ('',))
+ self.assertEqual(re.search('x*+', 'axx').span(0), (0, 0))
+ self.assertEqual(re.search('x*+', 'axx').span(), (0, 0))
+ self.assertEqual(re.search('x++', 'axx').span(0), (1, 3))
+ self.assertEqual(re.search('x++', 'axx').span(), (1, 3))
+ self.assertEqual(re.match('a*+', 'xxx').span(0), (0, 0))
+ self.assertEqual(re.match('a*+', 'xxx').span(), (0, 0))
+ self.assertEqual(re.match('x*+', 'xxxa').span(0), (0, 3))
+ self.assertEqual(re.match('x*+', 'xxxa').span(), (0, 3))
+ self.assertIsNone(re.match('a++', 'xxx'))
+ self.assertIsNone(re.match("^(\w){1}+$", "abc"))
+ self.assertIsNone(re.match("^(\w){1,2}+$", "abc"))
+
+ self.assertEqual(re.match("^(\w){3}+$", "abc").group(1), "c")
+ self.assertEqual(re.match("^(\w){1,3}+$", "abc").group(1), "c")
+ self.assertEqual(re.match("^(\w){1,4}+$", "abc").group(1), "c")
+
+ self.assertIsNone(re.match("^x{1}+$", "xxx"))
+ self.assertIsNone(re.match("^x{1,2}+$", "xxx"))
+
+ self.assertTrue(re.match("^x{3}+$", "xxx"))
+ self.assertTrue(re.match("^x{1,3}+$", "xxx"))
+ self.assertTrue(re.match("^x{1,4}+$", "xxx"))
+
+ self.assertIsNone(re.match("^x{}+$", "xxx"))
+ self.assertTrue(re.match("^x{}+$", "x{}"))
+
+ def test_atomic_grouping(self):
+ """Test Atomic Grouping
+ test non-capturing groups of the form (?>...), which acts does
+ not maintain any stack point created within the group once the
+ group is finished being evaluated."""
+ pattern1 = re.compile(r'a(?>bc|b)c')
+ self.assertIsNone(pattern1.match('abc'))
+ self.assertTrue(pattern1.match('abcc'))
+ self.assertIsNone(re.match(r'(?>.*).', 'abc'))
+ self.assertTrue(re.match(r'(?>x)++', 'xxx'))
+ self.assertTrue(re.match(r'(?>x++)', 'xxx'))
+ self.assertIsNone(re.match(r'(?>x)++x', 'xxx'))
+ self.assertIsNone(re.match(r'(?>x++)x', 'xxx'))
+
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
diff -r 30a6c74ad87f Modules/_sre.c
--- a/Modules/_sre.c Tue Nov 11 21:13:28 2014 +0200
+++ b/Modules/_sre.c Tue Nov 11 22:20:28 2014 +0200
@@ -56,8 +56,8 @@ static char copyright[] =
#define SRE_PY_MODULE "re"
-/* defining this one enables tracing */
-#undef VERBOSE
+/* uncomment this define to enable tracing */
+/* #define VERBOSE_SRE_ENGINE */
/* -------------------------------------------------------------------- */
/* optional features */
@@ -88,7 +88,7 @@ static char copyright[] =
#define SRE_ERROR_MEMORY -9 /* out of memory */
#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
-#if defined(VERBOSE)
+#if defined(VERBOSE_SRE_ENGINE)
#define TRACE(v) printf v
#else
#define TRACE(v)
@@ -1818,6 +1818,7 @@ static int
case SRE_OP_REPEAT_ONE:
case SRE_OP_MIN_REPEAT_ONE:
+ case SRE_OP_POSSESSIVE_ONE:
{
SRE_CODE min, max;
GET_SKIP;
@@ -1855,6 +1856,37 @@ static int
}
break;
+ case SRE_OP_POSSESSIVE_REPEAT:
+ {
+ SRE_CODE min, max;
+ GET_SKIP;
+ GET_ARG; min = arg;
+ GET_ARG; max = arg;
+ if (min > max)
+ FAIL;
+ if (max > SRE_MAXREPEAT)
+ FAIL;
+ if (!_validate_inner(code, code+skip-3, groups))
+ FAIL;
+ code += skip-3;
+ GET_OP;
+ if (op != SRE_OP_SUCCESS)
+ FAIL;
+ }
+ break;
+
+ case SRE_OP_ATOMIC_GROUP:
+ {
+ GET_SKIP;
+ if (!_validate_inner(code, code+skip-2, groups))
+ FAIL;
+ code += skip-2;
+ GET_OP;
+ if (op != SRE_OP_SUCCESS)
+ FAIL;
+ }
+ break;
+
case SRE_OP_GROUPREF:
case SRE_OP_GROUPREF_IGNORE:
GET_ARG;
diff -r 30a6c74ad87f Modules/sre_constants.h
--- a/Modules/sre_constants.h Tue Nov 11 21:13:28 2014 +0200
+++ b/Modules/sre_constants.h Tue Nov 11 22:20:28 2014 +0200
@@ -11,7 +11,7 @@
* See the _sre.c file for information on usage and redistribution.
*/
-#define SRE_MAGIC 20140917
+#define SRE_MAGIC 20141113
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
@@ -45,6 +45,9 @@
#define SRE_OP_SUBPATTERN 30
#define SRE_OP_MIN_REPEAT_ONE 31
#define SRE_OP_RANGE_IGNORE 32
+#define SRE_OP_ATOMIC_GROUP 33
+#define SRE_OP_POSSESSIVE_REPEAT 34
+#define SRE_OP_POSSESSIVE_ONE 35
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BEGINNING_STRING 2
diff -r 30a6c74ad87f Modules/sre_lib.h
--- a/Modules/sre_lib.h Tue Nov 11 21:13:28 2014 +0200
+++ b/Modules/sre_lib.h Tue Nov 11 22:20:28 2014 +0200
@@ -467,6 +467,9 @@ do { \
#define JUMP_BRANCH 11
#define JUMP_ASSERT 12
#define JUMP_ASSERT_NOT 13
+#define JUMP_POSS_REPEAT_1 14
+#define JUMP_POSS_REPEAT_2 15
+#define JUMP_ATOMIC_GROUP 16
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
DATA_ALLOC(SRE(match_context), nextctx); \
@@ -871,6 +874,57 @@ entrance:
}
RETURN_FAILURE;
+ case SRE_OP_POSSESSIVE_ONE:
+ /* match repeated sequence (maximizing regexp) without
+ backtracking */
+
+ /* this operator only works if the repeated item is
+ exactly one character wide, and we're not already
+ collecting backtracking points. for other cases,
+ use the MAX_REPEAT operator */
+
+ /* <1=min> <2=max> item
+ tail */
+
+ TRACE(("|%p|%p|POSSESSIVE_ONE %d %d\n", ctx->pattern,
+ ctx->ptr, ctx->pattern[1], ctx->pattern[2]));
+
+ if (ctx->ptr + ctx->pattern[1] > end) {
+ RETURN_FAILURE; /* cannot match */
+ }
+
+ state->ptr = ctx->ptr;
+
+ ret = SRE(count)(state, ctx->pattern + 3, ctx->pattern[2]);
+ RETURN_ON_ERROR(ret);
+ DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
+ ctx->count = ret;
+ ctx->ptr += ctx->count;
+
+ /* when we arrive here, count contains the number of
+ matches, and ctx->ptr points to the tail of the target
+ string. check if the rest of the pattern matches,
+ and fail if not. */
+
+ /* Test for not enough repetitions in match */
+ if (ctx->count < (Py_ssize_t) ctx->pattern[1]) {
+ RETURN_FAILURE;
+ }
+
+ /* Update the pattern to point to the next op code */
+ ctx->pattern += ctx->pattern[0];
+
+ /* Let the tail be evaluated separately and consider this
+ match successful. */
+ if (*ctx->pattern == SRE_OP_SUCCESS) {
+ /* tail is empty. we're finished */
+ state->ptr = ctx->ptr;
+ RETURN_SUCCESS;
+ }
+
+ /* Attempt to match the rest of the string */
+ break;
+
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
@@ -1031,6 +1085,138 @@ entrance:
state->ptr = ctx->ptr;
RETURN_FAILURE;
+ case SRE_OP_POSSESSIVE_REPEAT:
+ /* create possessive repeat contexts. */
+ /* <1=min> <2=max> pattern
+ tail */
+ TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", ctx->pattern,
+ ctx->ptr, ctx->pattern[1], ctx->pattern[2]));
+
+ /* Set the global Input pointer to this context's Input
+ pointer */
+ state->ptr = ctx->ptr;
+
+ /* Initialize Count to 0 */
+ ctx->count = 0;
+
+ /* Check for minimum required matches. */
+ while (ctx->count < (Py_ssize_t)ctx->pattern[1]) {
+ /* not enough matches */
+ DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
+ &ctx->pattern[3]);
+ if (ret) {
+ RETURN_ON_ERROR(ret);
+ ctx->count++;
+ }
+ else {
+ state->ptr = ctx->ptr;
+ RETURN_FAILURE;
+ }
+ }
+
+ /* Clear the context's Input stream pointer so that it
+ doesn't match the global state so that the while loop can
+ be entered. */
+ ctx->ptr = NULL;
+
+ /* Keep trying to parse the sub-pattern until the
+ end is reached, creating a new context each time. */
+ while ((ctx->count < (Py_ssize_t)ctx->pattern[2] ||
+ (Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT) &&
+ state->ptr != ctx->ptr) {
+ /* Save the Capture Group Marker state into the current
+ Context and back up the current highest number
+ Capture Group marker. */
+ LASTMARK_SAVE();
+ MARK_PUSH(ctx->lastmark);
+
+ /* zero-width match protection */
+ /* Set the context's Input Stream pointer to be the
+ current Input Stream pointer from the global
+ state. When the loop reaches the next iteration,
+ the context will then store the last known good
+ position with the global state holding the Input
+ Input Stream position that has been updated with
+ the most recent match. Thus, if state's Input
+ stream remains the same as the one stored in the
+ current Context, we know we have successfully
+ matched an empty string and that all subsequent
+ matches will also be the empty string until the
+ maximum number of matches are counted, and because
+ of this, we could immediately stop at that point and
+ consider this match successful. */
+ ctx->ptr = state->ptr;
+
+ /* We have not reached the maximin matches, so try to
+ match once more. */
+ DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
+ &ctx->pattern[3]);
+
+ /* Check to see if the last attempted match
+ succeeded. */
+ if (ret) {
+ /* Drop the saved highest number Capture Group
+ marker saved above and use the newly updated
+ value. */
+ MARK_POP_DISCARD(ctx->lastmark);
+ RETURN_ON_ERROR(ret);
+
+ /* Success, increment the count. */
+ ctx->count++;
+ }
+ /* Last attempted match failed. */
+ else {
+ /* Restore the previously saved highest number
+ Capture Group marker since the last iteration
+ did not match, then restore that to the global
+ state. */
+ MARK_POP(ctx->lastmark);
+ LASTMARK_RESTORE();
+
+ /* We have sufficient matches, so exit loop. */
+ break;
+ }
+ }
+
+ /* Evaluate Tail */
+ /* Jump to end of pattern indicated by skip, and then skip
+ the SUCCESS op code that follows it. */
+ ctx->pattern += ctx->pattern[0] + 1;
+ ctx->ptr = state->ptr;
+ break;
+
+ case SRE_OP_ATOMIC_GROUP:
+ /* Atomic Group Sub Pattern */
+ /* pattern tail */
+ TRACE(("|%p|%p|ATOMIC_GROUP\n", ctx->pattern, ctx->ptr));
+
+ /* Set the global Input pointer to this context's Input
+ pointer */
+ state->ptr = ctx->ptr;
+
+ /* Evaluate the Atomic Group in a new context, terminating
+ when the end of the group, represented by a SUCCESS op
+ code, is reached. */
+ /* Group Pattern begins at an offset of 1 code. */
+ DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group,
+ &ctx->pattern[1]);
+
+ /* Test Exit Condition */
+ RETURN_ON_ERROR(ret);
+
+ if (ret == 0) {
+ /* Atomic Group failed to Match. */
+ state->ptr = ctx->ptr;
+ RETURN_FAILURE;
+ }
+
+ /* Evaluate Tail */
+ /* Jump to end of pattern indicated by skip, and then skip
+ the SUCCESS op code that follows it. */
+ ctx->pattern += ctx->pattern[0];
+ ctx->ptr = state->ptr;
+ break;
+
case SRE_OP_GROUPREF:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
@@ -1175,6 +1361,12 @@ exit:
case JUMP_MIN_UNTIL_1:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_min_until_1;
+ case JUMP_POSS_REPEAT_1:
+ TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", ctx->pattern, ctx->ptr));
+ goto jump_poss_repeat_1;
+ case JUMP_POSS_REPEAT_2:
+ TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", ctx->pattern, ctx->ptr));
+ goto jump_poss_repeat_2;
case JUMP_REPEAT:
TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
goto jump_repeat;
@@ -1187,6 +1379,9 @@ exit:
case JUMP_MIN_REPEAT_ONE:
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
goto jump_min_repeat_one;
+ case JUMP_ATOMIC_GROUP:
+ TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", ctx->pattern, ctx->ptr));
+ goto jump_atomic_group;
case JUMP_ASSERT:
TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
goto jump_assert;