=== modified file Lib/re.py
--- Lib/re.py	2009-01-01 15:46:10 +0000
+++ Lib/re.py	2009-02-03 21:49:47 +0000
@@ -27,52 +27,81 @@
 concatenate ordinary characters, so last matches the string 'last'.
 
 The special characters are:
-    "."      Matches any character except a newline.
-    "^"      Matches the start of the string.
-    "$"      Matches the end of the string or just before the newline at
-             the end of the string.
-    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
-             Greedy means that it will match as many repetitions as possible.
-    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
-    "?"      Matches 0 or 1 (greedy) of the preceding RE.
-    *?,+?,?? Non-greedy versions of the previous three special characters.
-    {m,n}    Matches from m to n repetitions of the preceding RE.
-    {m,n}?   Non-greedy version of the above.
-    "\\"     Either escapes special characters or signals a special sequence.
-    []       Indicates a set of characters.
-             A "^" as the first character indicates a complementing set.
-    "|"      A|B, creates an RE that will match either A or B.
-    (...)    Matches the RE inside the parentheses.
-             The contents can be retrieved or matched later in the string.
-    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
-    (?:...)  Non-grouping version of regular parentheses.
-    (?P<name>...) The substring matched by the group is accessible by name.
-    (?P=name)     Matches the text matched earlier by the group named name.
-    (?#...)  A comment; ignored.
-    (?=...)  Matches if ... matches next, but doesn't consume the string.
-    (?!...)  Matches if ... doesn't match next.
-    (?<=...) Matches if preceded by ... (must be fixed length).
-    (?<!...) Matches if not preceded by ... (must be fixed length).
-    (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,
-                       the (optional) no pattern otherwise.
+    "."                Matches any character except a newline.
+    "^"                Matches the start of the string.
+    "$"                Matches the end of the string or just before the
+                       newline at the end of the string.
+    "*"                Matches 0 or more (greedy) repetitions of the
+                       preceding RE. Greedy means that it will match as
+                       many repetitions as possible.
+    "+"                Matches 1 or more (greedy) repetitions of the
+                       preceding RE.
+    "?"                Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,??           Non-greedy versions of the previous three special
+                       characters.
+    *+,++,?+           Possessive versions of the previous three special
+                       characters.
+    {m,n}              Matches from m to n repetitions of the preceding
+                       RE.
+    {m,n}?             Non-greedy version of the above.
+    {m,n}+             Possessive version of the above.
+    "\\"               Either escapes special characters or signals a
+                       special sequence.
+    []                 Indicates a set of characters. A "^" as the first
+                       character indicates a complementing set.
+    "|"                A|B, creates an RE that will match either A or B.
+    (...)              Matches the RE inside the parentheses. The contents
+                       can be retrieved or matched later in the string.
+    (?iLmrsuxz)        Set the I, L, M, R, S, U, X, or Z flag for the
+                       following RE (see below).
+    (?:...)            Non-capturing version of regular parentheses.
+    (?P<name>...)      The substring matched by the group isaccessible by
+                       name.
+    (?<name>...)       The substring matched by the group is accessible by
+                       name.
+    (?#...)            A comment; ignored.
+    (?>...)            Atomic group. Like (?:...) but won't retry the RE
+                       within the parentheses.
+    (?=...)            Matches if ... matches next, but doesn't consume
+                       the string.
+    (?!...)            Matches if ... doesn't match next.
+    (?<=...)           Matches if preceded by ... (must be fixed length).
+    (?<!...)           Matches if not preceded by ... (must be fixed
+                       length).
+    (?(id/name)yes|no) Matches yes pattern if the group with id/name
+                       matched, the (optional) no pattern otherwise.
 
 The special sequences consist of "\\" and a character from the list
 below.  If the ordinary character is not on the list, then the
 resulting RE will match the second character.
-    \number  Matches the contents of the group of the same number.
-    \A       Matches only at the start of the string.
-    \Z       Matches only at the end of the string.
-    \b       Matches the empty string, but only at the start or end of a word.
-    \B       Matches the empty string, but not at the start or end of a word.
-    \d       Matches any decimal digit; equivalent to the set [0-9].
-    \D       Matches any non-digit character; equivalent to the set [^0-9].
-    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
-    \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
-    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
-             With LOCALE, it will match the set [0-9_] plus characters defined
-             as letters for the current locale.
-    \W       Matches the complement of \w.
-    \\       Matches a literal backslash.
+    \number     Matches the contents of the group of the same number.
+    \A          Matches only at the start of the string.
+    \b          Matches the empty string, but only at the start or end of
+                a word.
+    \B          Matches the empty string, but not at the start or end of a
+                word.
+    \d          Matches any decimal digit; equivalent to the set [0-9].
+    \D          Matches any non-digit character; equivalent to the set
+                [^0-9].
+    \g<name>    Matches the text matched by the group named name.
+    \g<number>  Matches the contents of the group of the same number.
+    \g<+number> Matches the contents of the group of the relative number.
+    \g<-number> Matches the contents of the group of the relative number.
+    \k<name>    Matches the text matched earlier by the group named name.
+    \N{name}    Matches named Unicode character.
+    \p{name}    Matches any character having the named property.
+    \P{name}    Matches any character not having the named property.
+    \s          Matches any whitespace character; equivalent to
+                [ \t\n\r\f\v].
+    \S          Matches any non-whitespace character; equiv. to
+                [^ \t\n\r\f\v].
+    \w          Matches any alphanumeric character; equivalent to
+                [a-zA-Z0-9_]. With LOCALE, it will match the set
+                [0-9_] plus characters defined as letters for the current
+                locale.
+    \W          Matches the complement of \w.
+    \Z          Matches only at the end of the string.
+    \\          Matches a literal backslash.
 
 This module exports the following functions:
     match    Match a regular expression pattern to the beginning of a string.
@@ -87,15 +116,17 @@
     escape   Backslash all non-alphanumerics in a string.
 
 Some of the functions in this module takes flags as optional parameters:
-    I  IGNORECASE  Perform case-insensitive matching.
-    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
-    M  MULTILINE   "^" matches the beginning of lines (after a newline)
-                   as well as the string.
-                   "$" matches the end of lines (before a newline) as well
-                   as the end of the string.
-    S  DOTALL      "." matches any character at all, including the newline.
-    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
-    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
+    I  IGNORECASE Perform case-insensitive matching.
+    L  LOCALE     Make \w, \W, \b, \B, dependent on the current locale.
+    M  MULTILINE  "^" matches the beginning of lines (after a newline) as
+                  well as the string.
+                  "$" matches the end of lines (before a newline) as well
+                  as the end of the string.
+    R  REVERSE    Search backwards, from the end to the start.
+    S  DOTALL     "." matches any character at all, including the newline.
+    X  VERBOSE    Ignore whitespace and comments for nicer looking RE's.
+    U  UNICODE    Make \w, \W, \b, \B, dependent on the Unicode locale.
+    Z  ZEROWIDTH  Permit splitting on zero-width separators.
 
 This module also defines an exception 'error'.
 
@@ -109,18 +140,19 @@
 __all__ = [ "match", "search", "sub", "subn", "split", "findall",
     "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
     "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
-    "UNICODE", "error" ]
+    "UNICODE", "REVERSE", "error" ]
 
-__version__ = "2.2.1"
+__version__ = "2.2.2"
 
 # flags
 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
 M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
+R = REVERSE = sre_compile.SRE_FLAG_REVERSE # search backwards
 S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
-
+Z = ZEROWIDTH = sre_compile.SRE_FLAG_ZEROWIDTH # permit splitting on zero-width separators.
 # sre extensions (experimental, don't rely on these)
 T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
 DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
@@ -237,12 +269,12 @@
         if flags:
             raise ValueError('Cannot process flags argument with a compiled pattern')
         return pattern
-    if not sre_compile.isstring(pattern):
-        raise TypeError, "first argument must be string or compiled pattern"
+    if not isinstance(pattern, (str, unicode)):
+        raise TypeError("First argument must be string or compiled pattern")
     try:
         p = sre_compile.compile(pattern, flags)
     except error, v:
-        raise error, v # invalid expression
+        raise error(v) # invalid expression
     if len(_cache) >= _MAXCACHE:
         _cache.clear()
     _cache[cachekey] = p
@@ -257,7 +289,7 @@
     try:
         p = sre_parse.parse_template(repl, pattern)
     except error, v:
-        raise error, v # invalid expression
+        raise error(v) # invalid expression
     if len(_cache_repl) >= _MAXCACHE:
         _cache_repl.clear()
     _cache_repl[key] = p
@@ -266,7 +298,7 @@
 def _expand(pattern, match, template):
     # internal: match.expand implementation hook
     template = sre_parse.parse_template(template, pattern)
-    return sre_parse.expand_template(template, match)
+    return sre_parse.expand_template(template, match, True)
 
 def _subx(pattern, template):
     # internal: pattern.sub/subn implementation helper
@@ -275,7 +307,7 @@
         # literal replacement
         return template[1][0]
     def filter(match, template=template):
-        return sre_parse.expand_template(template, match)
+        return sre_parse.expand_template(template, match, True)
     return filter
 
 # register myself for pickling
@@ -292,36 +324,31 @@
 
 class Scanner:
     def __init__(self, lexicon, flags=0):
-        from sre_constants import BRANCH, SUBPATTERN
         self.lexicon = lexicon
         # combine phrases into a compound pattern
         p = []
         s = sre_parse.Pattern()
         s.flags = flags
-        for phrase, action in lexicon:
-            p.append(sre_parse.SubPattern(s, [
-                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
-                ]))
-        s.groups = len(p)+1
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        self.scanner = sre_compile.compile(p)
+        sep, template = map(type(lexicon[0][0]), ("|", "(%s)"))
+        regex = sep.join(template % phrase for phrase, action in lexicon)
+        self.scanner = sre_compile.compile(regex)
     def scan(self, string):
         result = []
         append = result.append
         match = self.scanner.scanner(string).match
         i = 0
-        while 1:
+        while True:
             m = match()
             if not m:
                 break
             j = m.end()
             if i == j:
                 break
-            action = self.lexicon[m.lastindex-1][1]
+            action = self.lexicon[m.lastindex - 1][1]
             if hasattr(action, '__call__'):
                 self.match = m
                 action = action(self, m.group())
             if action is not None:
                 append(action)
             i = j
-        return result, string[i:]
+        return result, string[i : ]
=== modified file Lib/test/re_tests.py
--- Lib/test/re_tests.py	2003-04-20 07:35:44 +0000
+++ Lib/test/re_tests.py	2009-02-03 18:18:47 +0000
@@ -1,674 +1,674 @@
-#!/usr/bin/env python
-# -*- mode: python -*-
-
-# Re test suite and benchmark suite v1.5
-
-# The 3 possible outcomes for each pattern
-[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
-
-# Benchmark suite (needs expansion)
-#
-# The benchmark suite does not test correctness, just speed.  The
-# first element of each tuple is the regex pattern; the second is a
-# string to match it against.  The benchmarking code will embed the
-# second string inside several sizes of padding, to test how regex
-# matching performs on large strings.
-
-benchmarks = [
-
-    # test common prefix
-    ('Python|Perl', 'Perl'),    # Alternation
-    ('(Python|Perl)', 'Perl'),  # Grouped alternation
-
-    ('Python|Perl|Tcl', 'Perl'),        # Alternation
-    ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
-
-    ('(Python)\\1', 'PythonPython'),    # Backreference
-    ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
-    ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
-
-    ('Python', 'Python'),               # Simple text literal
-    ('.*Python', 'Python'),             # Bad text literal
-    ('.*Python.*', 'Python'),           # Worse text literal
-    ('.*(Python)', 'Python'),           # Bad text literal with grouping
-
-]
-
-# Test suite (for verifying correctness)
-#
-# The test suite is a list of 5- or 3-tuples.  The 5 parts of a
-# complete tuple are:
-# element 0: a string containing the pattern
-#         1: the string to match against the pattern
-#         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
-#         3: a string that will be eval()'ed to produce a test string.
-#            This is an arbitrary Python expression; the available
-#            variables are "found" (the whole match), and "g1", "g2", ...
-#            up to "g99" contain the contents of each group, or the
-#            string 'None' if the group wasn't given a value, or the
-#            string 'Error' if the group index was out of range;
-#            also "groups", the return value of m.group() (a tuple).
-#         4: The expected result of evaluating the expression.
-#            If the two don't match, an error is reported.
-#
-# If the regex isn't expected to work, the latter two elements can be omitted.
-
-tests = [
-    # Test ?P< and ?P= extensions
-    ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
-    ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
-    ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
-    ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
-
-    # Same tests, for the ?P= form
-    ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
-    ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
-    ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
-    ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
-
-    ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
-    ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
-
-    # Test octal escapes
-    ('\\1', 'a', SYNTAX_ERROR),    # Backreference
-    ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
-    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
-    ('\\141', 'a', SUCCEED, 'found', 'a'),
-    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
-
-    # Test \0 is handled everywhere
-    (r'\0', '\0', SUCCEED, 'found', '\0'),
-    (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
-    (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
-    (r'[^a\0]', '\0', FAIL),
-
-    # Test various letter escapes
-    (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
-    (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
-    # NOTE: not an error under PCRE/PRE:
-    # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
-    (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
-    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
-    # new \x semantics
-    (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
-    (r'\x00f', '\017', FAIL, 'found', chr(15)),
-    (r'\x00fe', '\376', FAIL, 'found', chr(254)),
-    # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
-    # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
-    # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
-
-    (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
-     SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
-
-    # Test that . only matches \n in DOTALL mode
-    ('a.b', 'acb', SUCCEED, 'found', 'acb'),
-    ('a.b', 'a\nb', FAIL),
-    ('a.*b', 'acc\nccb', FAIL),
-    ('a.{4,5}b', 'acc\nccb', FAIL),
-    ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
-    ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
-    ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
-    ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
-    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
-
-    (')', '', SYNTAX_ERROR),           # Unmatched right bracket
-    ('', '', SUCCEED, 'found', ''),    # Empty pattern
-    ('abc', 'abc', SUCCEED, 'found', 'abc'),
-    ('abc', 'xbc', FAIL),
-    ('abc', 'axc', FAIL),
-    ('abc', 'abx', FAIL),
-    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
-    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
-    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
-    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
-    ('ab+bc', 'abc', FAIL),
-    ('ab+bc', 'abq', FAIL),
-    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
-    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab?bc', 'abbbbc', FAIL),
-    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
-    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
-    ('^abc$', 'abcc', FAIL),
-    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
-    ('^abc$', 'aabc', FAIL),
-    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
-    ('^', 'abc', SUCCEED, 'found+"-"', '-'),
-    ('$', 'abc', SUCCEED, 'found+"-"', '-'),
-    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
-    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
-    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
-    ('a.*c', 'axyzd', FAIL),
-    ('a[bc]d', 'abc', FAIL),
-    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
-    ('a[b-d]e', 'abd', FAIL),
-    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
-    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
-    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
-    ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
-    # NOTE: not an error under PCRE/PRE:
-    # ('a[b-]', 'a-', SYNTAX_ERROR),
-    ('a[]b', '-', SYNTAX_ERROR),
-    ('a[', '-', SYNTAX_ERROR),
-    ('a\\', '-', SYNTAX_ERROR),
-    ('abc)', '-', SYNTAX_ERROR),
-    ('(abc', '-', SYNTAX_ERROR),
-    ('a]', 'a]', SUCCEED, 'found', 'a]'),
-    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
-    ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
-    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
-    ('a[^bc]d', 'abd', FAIL),
-    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
-    ('a[^-b]c', 'a-c', FAIL),
-    ('a[^]b]c', 'a]c', FAIL),
-    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
-    ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
-    ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
-    ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
-    ('\\by\\b', 'xy', FAIL),
-    ('\\by\\b', 'yz', FAIL),
-    ('\\by\\b', 'xyz', FAIL),
-    ('x\\b', 'xyz', FAIL),
-    ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
-    ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
-    ('z\\B', 'xyz', FAIL),
-    ('\\Bx', 'xyz', FAIL),
-    ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
-    ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
-    ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
-    ('\\By\\B', 'xy', FAIL),
-    ('\\By\\B', 'yz', FAIL),
-    ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
-    ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
-    ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
-    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
-    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
-    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
-    ('$b', 'b', FAIL),
-    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
-    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
-    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
-    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
-    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
-    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
-    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
-    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
-    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
-    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
-    (')(', '-', SYNTAX_ERROR),
-    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
-    ('abc', '', FAIL),
-    ('a*', '', SUCCEED, 'found', ''),
-    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
-    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
-    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
-    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
-    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
-    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
-    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
-    ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
-    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
-    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
-    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
-    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
-    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
-    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
-    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
-    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
-    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
-    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
-    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
-    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
-    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
-    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
-    ('multiple words of text', 'uh-uh', FAIL),
-    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
-    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
-    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
-    ('[k]', 'ab', FAIL),
-    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
-    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
-    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
-    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
-    ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
-    ('^(a+).\\1$', 'aaaa', FAIL),
-    ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
-    ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
-    ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
-    ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
-    ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
-    ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
-    ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
-    ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
-    ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
-    ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
-    ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
-    ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
-    ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
-    ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
-    ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
-    ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
-    ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
-    ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
-    ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
-    ('([abc]*)x', 'abc', FAIL),
-    ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
-    ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
-
-    # Test symbolic groups
-
-    ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
-    ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
-    ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
-    ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
-
-    # Test octal escapes/memory references
-
-    ('\\1', 'a', SYNTAX_ERROR),
-    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
-    ('\\141', 'a', SUCCEED, 'found', 'a'),
-    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
-
-    # All tests from Perl
-
-    ('abc', 'abc', SUCCEED, 'found', 'abc'),
-    ('abc', 'xbc', FAIL),
-    ('abc', 'axc', FAIL),
-    ('abc', 'abx', FAIL),
-    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
-    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
-    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
-    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
-    ('ab+bc', 'abc', FAIL),
-    ('ab+bc', 'abq', FAIL),
-    ('ab{1,}bc', 'abq', FAIL),
-    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
-    ('ab{4,5}bc', 'abbbbc', FAIL),
-    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
-    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab?bc', 'abbbbc', FAIL),
-    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
-    ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
-    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
-    ('^abc$', 'abcc', FAIL),
-    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
-    ('^abc$', 'aabc', FAIL),
-    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
-    ('^', 'abc', SUCCEED, 'found', ''),
-    ('$', 'abc', SUCCEED, 'found', ''),
-    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
-    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
-    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
-    ('a.*c', 'axyzd', FAIL),
-    ('a[bc]d', 'abc', FAIL),
-    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
-    ('a[b-d]e', 'abd', FAIL),
-    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
-    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
-    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
-    ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
-    ('a[b-a]', '-', SYNTAX_ERROR),
-    ('a[]b', '-', SYNTAX_ERROR),
-    ('a[', '-', SYNTAX_ERROR),
-    ('a]', 'a]', SUCCEED, 'found', 'a]'),
-    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
-    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
-    ('a[^bc]d', 'abd', FAIL),
-    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
-    ('a[^-b]c', 'a-c', FAIL),
-    ('a[^]b]c', 'a]c', FAIL),
-    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
-    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
-    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
-    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
-    ('*a', '-', SYNTAX_ERROR),
-    ('(*)b', '-', SYNTAX_ERROR),
-    ('$b', 'b', FAIL),
-    ('a\\', '-', SYNTAX_ERROR),
-    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
-    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
-    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
-    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
-    ('abc)', '-', SYNTAX_ERROR),
-    ('(abc', '-', SYNTAX_ERROR),
-    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
-    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
-    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
-    ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
-    ('a**', '-', SYNTAX_ERROR),
-    ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
-    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
-    ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
-    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
-    ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
-    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
-    ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
-    (')(', '-', SYNTAX_ERROR),
-    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
-    ('abc', '', FAIL),
-    ('a*', '', SUCCEED, 'found', ''),
-    ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
-    ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
-    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
-    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
-    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
-    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
-    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
-    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
-    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
-    ('^(ab|cd)e', 'abcde', FAIL),
-    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
-    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
-    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
-    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
-    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
-    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
-    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
-    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
-    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
-    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
-    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
-    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
-    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
-    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
-    ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
-    ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
-# Python does not have the same rules for \\41 so this is a syntax error
-#    ('((((((((((a))))))))))\\41', 'aa', FAIL),
-#    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
-    ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
-    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
-    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
-    ('multiple words of text', 'uh-uh', FAIL),
-    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
-    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
-    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
-    ('[k]', 'ab', FAIL),
-    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
-    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
-    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
-    ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)abc', 'XBC', FAIL),
-    ('(?i)abc', 'AXC', FAIL),
-    ('(?i)abc', 'ABX', FAIL),
-    ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
-    ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
-    ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
-    ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
-    ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
-    ('(?i)ab+bc', 'ABC', FAIL),
-    ('(?i)ab+bc', 'ABQ', FAIL),
-    ('(?i)ab{1,}bc', 'ABQ', FAIL),
-    ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
-    ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
-    ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
-    ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
-    ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
-    ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
-    ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)ab??bc', 'ABBBBC', FAIL),
-    ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)^abc$', 'ABCC', FAIL),
-    ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
-    ('(?i)^abc$', 'AABC', FAIL),
-    ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)^', 'ABC', SUCCEED, 'found', ''),
-    ('(?i)$', 'ABC', SUCCEED, 'found', ''),
-    ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
-    ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
-    ('(?i)a.*c', 'AXYZD', FAIL),
-    ('(?i)a[bc]d', 'ABC', FAIL),
-    ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
-    ('(?i)a[b-d]e', 'ABD', FAIL),
-    ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
-    ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
-    ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
-    ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
-    ('(?i)a[b-a]', '-', SYNTAX_ERROR),
-    ('(?i)a[]b', '-', SYNTAX_ERROR),
-    ('(?i)a[', '-', SYNTAX_ERROR),
-    ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
-    ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
-    ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
-    ('(?i)a[^bc]d', 'ABD', FAIL),
-    ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
-    ('(?i)a[^-b]c', 'A-C', FAIL),
-    ('(?i)a[^]b]c', 'A]C', FAIL),
-    ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
-    ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
-    ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
-    ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
-    ('(?i)*a', '-', SYNTAX_ERROR),
-    ('(?i)(*)b', '-', SYNTAX_ERROR),
-    ('(?i)$b', 'B', FAIL),
-    ('(?i)a\\', '-', SYNTAX_ERROR),
-    ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
-    ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
-    ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
-    ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
-    ('(?i)abc)', '-', SYNTAX_ERROR),
-    ('(?i)(abc', '-', SYNTAX_ERROR),
-    ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
-    ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
-    ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)a**', '-', SYNTAX_ERROR),
-    ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
-    ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
-    ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
-    ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
-    ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
-    ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
-    ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
-    ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
-    ('(?i))(', '-', SYNTAX_ERROR),
-    ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
-    ('(?i)abc', '', FAIL),
-    ('(?i)a*', '', SUCCEED, 'found', ''),
-    ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
-    ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
-    ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
-    ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
-    ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
-    ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
-    ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
-    ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
-    ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
-    ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
-    ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
-    ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
-    ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
-    ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
-    ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
-    ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
-    ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
-    ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
-    ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
-    ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
-    ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
-    ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
-    ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
-    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
-    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
-    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
-    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
-    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
-    ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
-    ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
-    #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
-    #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
-    ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
-    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
-    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
-    ('(?i)multiple words of text', 'UH-UH', FAIL),
-    ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
-    ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
-    ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
-    ('(?i)[k]', 'AB', FAIL),
-#    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
-#    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
-    ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
-    ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
-    ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
-    ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
-    ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
-    ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
-    ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
-    ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
-    ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
-    ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
-    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
-
-    # lookbehind: split by : but not if it is escaped by -.
-    ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
-    # escaping with \ as we know it
-    ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
-    # terminating with ' and escaping with ? as in edifact
-    ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
-
-    # Comments using the (?#...) syntax
-
-    ('w(?# comment', 'w', SYNTAX_ERROR),
-    ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
-
-    # Check odd placement of embedded pattern modifiers
-
-    # not an error under PCRE/PRE:
-    ('w(?i)', 'W', SUCCEED, 'found', 'W'),
-    # ('w(?i)', 'W', SYNTAX_ERROR),
-
-    # Comments using the x embedded pattern modifier
-
-    ("""(?x)w# comment 1
-        x y
-        # comment 2
-        z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
-
-    # using the m embedded pattern modifier
-
-    ('^abc', """jkl
-abc
-xyz""", FAIL),
-    ('(?m)^abc', """jkl
-abc
-xyz""", SUCCEED, 'found', 'abc'),
-
-    ('(?m)abc$', """jkl
-xyzabc
-123""", SUCCEED, 'found', 'abc'),
-
-    # using the s embedded pattern modifier
-
-    ('a.b', 'a\nb', FAIL),
-    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
-
-    # test \w, etc. both inside and outside character classes
-
-    ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
-    ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
-    ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
-    ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
-    ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
-    # not an error under PCRE/PRE:
-    # ('[\\d-x]', '-', SYNTAX_ERROR),
-    (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
-    (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
-
-    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
-    # new \x semantics
-    (r'\x00ff', '\377', FAIL),
-    # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
-    (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
-    ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
-    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
-    (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
-
-    #
-    # post-1.5.2 additions
-
-    # xmllib problem
-    (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
-    # bug 110866: reference to undefined group
-    (r'((.)\1+)', '', SYNTAX_ERROR),
-    # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
-    (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
-    # bug 112468: various expected syntax errors
-    (r'(', '', SYNTAX_ERROR),
-    (r'[\41]', '!', SUCCEED, 'found', '!'),
-    # bug 114033: nothing to repeat
-    (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
-    # bug 115040: rescan if flags are modified inside pattern
-    (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
-    # bug 115618: negative lookahead
-    (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
-    # bug 116251: character class bug
-    (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
-    # bug 123769+127259: non-greedy backtracking bug
-    (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
-    (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
-    (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
-    # bug 127259: \Z shouldn't depend on multiline mode
-    (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
-    # bug 128899: uppercase literals under the ignorecase flag
-    (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
-    (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
-    (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
-    (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
-    # bug 130748: ^* should be an error (nothing to repeat)
-    (r'^*', '', SYNTAX_ERROR),
-    # bug 133283: minimizing repeat problem
-    (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
-    # bug 477728: minimizing repeat problem
-    (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
-    # bug 483789: minimizing repeat problem
-    (r'a[^>]*?b', 'a>b', FAIL),
-    # bug 490573: minimizing repeat problem
-    (r'^a*?$', 'foo', FAIL),
-    # bug 470582: nested groups problem
-    (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
-    # another minimizing repeat problem (capturing groups in assertions)
-    ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
-    ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
-    ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
-]
-
-try:
-    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
-    pass
-else:
-    tests.extend([
-    # bug 410271: \b broken under locales
-    (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
-    (r'(?u)\b.\b', u, SUCCEED, 'found', u),
-    (r'(?u)\w', u, SUCCEED, 'found', u),
-    ])
+#!/usr/bin/env python
+# -*- mode: python -*-
+
+# Re test suite and benchmark suite v1.5
+
+# The 3 possible outcomes for each pattern
+[SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
+
+# Benchmark suite (needs expansion)
+#
+# The benchmark suite does not test correctness, just speed.  The
+# first element of each tuple is the regex pattern; the second is a
+# string to match it against.  The benchmarking code will embed the
+# second string inside several sizes of padding, to test how regex
+# matching performs on large strings.
+
+benchmarks = [
+
+    # test common prefix
+    ('Python|Perl', 'Perl'),    # Alternation
+    ('(Python|Perl)', 'Perl'),  # Grouped alternation
+
+    ('Python|Perl|Tcl', 'Perl'),        # Alternation
+    ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
+
+    ('(Python)\\1', 'PythonPython'),    # Backreference
+    ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
+    ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
+
+    ('Python', 'Python'),               # Simple text literal
+    ('.*Python', 'Python'),             # Bad text literal
+    ('.*Python.*', 'Python'),           # Worse text literal
+    ('.*(Python)', 'Python'),           # Bad text literal with grouping
+
+]
+
+# Test suite (for verifying correctness)
+#
+# The test suite is a list of 5- or 3-tuples.  The 5 parts of a
+# complete tuple are:
+# element 0: a string containing the pattern
+#         1: the string to match against the pattern
+#         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
+#         3: a string that will be eval()'ed to produce a test string.
+#            This is an arbitrary Python expression; the available
+#            variables are "found" (the whole match), and "g1", "g2", ...
+#            up to "g99" contain the contents of each group, or the
+#            string 'None' if the group wasn't given a value, or the
+#            string 'Error' if the group index was out of range;
+#            also "groups", the return value of m.group() (a tuple).
+#         4: The expected result of evaluating the expression.
+#            If the two don't match, an error is reported.
+#
+# If the regex isn't expected to work, the latter two elements can be omitted.
+
+tests = [
+    # Test ?P< and ?P= extensions
+    ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
+    ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
+    ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
+    ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
+
+    # Same tests, for the ?P= form
+    ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
+    ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
+    ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
+    ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
+
+    ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
+    ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
+
+    # Test octal escapes
+    ('\\1', 'a', SYNTAX_ERROR),    # Backreference
+    ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
+    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
+    ('\\141', 'a', SUCCEED, 'found', 'a'),
+    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
+
+    # Test \0 is handled everywhere
+    (r'\0', '\0', SUCCEED, 'found', '\0'),
+    (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
+    (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
+    (r'[^a\0]', '\0', FAIL),
+
+    # Test various letter escapes
+    (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
+    (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
+    # NOTE: not an error under PCRE/PRE:
+    # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
+    (r'\c\e\h\i\j\m\q\y\z', 'cehijmqyz', SUCCEED, 'found', 'cehijmqyz'),
+    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
+    # new \x semantics
+    (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
+    (r'\x00f', '\017', FAIL, 'found', chr(15)),
+    (r'\x00fe', '\376', FAIL, 'found', chr(254)),
+    # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
+    # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
+    # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
+
+    (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
+     SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
+
+    # Test that . only matches \n in DOTALL mode
+    ('a.b', 'acb', SUCCEED, 'found', 'acb'),
+    ('a.b', 'a\nb', FAIL),
+    ('a.*b', 'acc\nccb', FAIL),
+    ('a.{4,5}b', 'acc\nccb', FAIL),
+    ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+    ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+    ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+
+    (')', '', SYNTAX_ERROR),           # Unmatched right bracket
+    ('', '', SUCCEED, 'found', ''),    # Empty pattern
+    ('abc', 'abc', SUCCEED, 'found', 'abc'),
+    ('abc', 'xbc', FAIL),
+    ('abc', 'axc', FAIL),
+    ('abc', 'abx', FAIL),
+    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
+    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
+    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab+bc', 'abc', FAIL),
+    ('ab+bc', 'abq', FAIL),
+    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab?bc', 'abbbbc', FAIL),
+    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abcc', FAIL),
+    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'aabc', FAIL),
+    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
+    ('^', 'abc', SUCCEED, 'found+"-"', '-'),
+    ('$', 'abc', SUCCEED, 'found+"-"', '-'),
+    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
+    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
+    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
+    ('a.*c', 'axyzd', FAIL),
+    ('a[bc]d', 'abc', FAIL),
+    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
+    ('a[b-d]e', 'abd', FAIL),
+    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
+    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
+    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
+    ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
+    # NOTE: not an error under PCRE/PRE:
+    # ('a[b-]', 'a-', SYNTAX_ERROR),
+    ('a[]b', '-', SYNTAX_ERROR),
+    ('a[', '-', SYNTAX_ERROR),
+    ('a\\', '-', SYNTAX_ERROR),
+    ('abc)', '-', SYNTAX_ERROR),
+    ('(abc', '-', SYNTAX_ERROR),
+    ('a]', 'a]', SUCCEED, 'found', 'a]'),
+    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+    ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
+    ('a[^bc]d', 'abd', FAIL),
+    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('a[^-b]c', 'a-c', FAIL),
+    ('a[^]b]c', 'a]c', FAIL),
+    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
+    ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
+    ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
+    ('\\by\\b', 'xy', FAIL),
+    ('\\by\\b', 'yz', FAIL),
+    ('\\by\\b', 'xyz', FAIL),
+    ('x\\b', 'xyz', FAIL),
+    ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
+    ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
+    ('z\\B', 'xyz', FAIL),
+    ('\\Bx', 'xyz', FAIL),
+    ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
+    ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
+    ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
+    ('\\By\\B', 'xy', FAIL),
+    ('\\By\\B', 'yz', FAIL),
+    ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
+    ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
+    ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
+    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
+    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
+    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('$b', 'b', FAIL),
+    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
+    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
+    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
+    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
+    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
+    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
+    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
+    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+    (')(', '-', SYNTAX_ERROR),
+    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
+    ('abc', '', FAIL),
+    ('a*', '', SUCCEED, 'found', ''),
+    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
+    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
+    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
+    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
+    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
+    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
+    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
+    ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
+    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
+    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
+    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
+    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
+    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
+    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
+    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
+    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
+    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
+    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
+    ('multiple words of text', 'uh-uh', FAIL),
+    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
+    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
+    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
+    ('[k]', 'ab', FAIL),
+    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
+    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
+    ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
+    ('^(a+).\\1$', 'aaaa', FAIL),
+    ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
+    ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
+    ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
+    ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
+    ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
+    ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
+    ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
+    ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
+    ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
+    ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
+    ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
+    ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
+    ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
+    ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
+    ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
+    ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
+    ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
+    ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
+    ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
+    ('([abc]*)x', 'abc', FAIL),
+    ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
+    ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
+
+    # Test symbolic groups
+
+    ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
+    ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
+    ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
+    ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
+
+    # Test octal escapes/memory references
+
+    ('\\1', 'a', SYNTAX_ERROR),
+    ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
+    ('\\141', 'a', SUCCEED, 'found', 'a'),
+    ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
+
+    # All tests from Perl
+
+    ('abc', 'abc', SUCCEED, 'found', 'abc'),
+    ('abc', 'xbc', FAIL),
+    ('abc', 'axc', FAIL),
+    ('abc', 'abx', FAIL),
+    ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
+    ('abc', 'ababc', SUCCEED, 'found', 'abc'),
+    ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab+bc', 'abc', FAIL),
+    ('ab+bc', 'abq', FAIL),
+    ('ab{1,}bc', 'abq', FAIL),
+    ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
+    ('ab{4,5}bc', 'abbbbc', FAIL),
+    ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
+    ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab?bc', 'abbbbc', FAIL),
+    ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
+    ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'abcc', FAIL),
+    ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
+    ('^abc$', 'aabc', FAIL),
+    ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
+    ('^', 'abc', SUCCEED, 'found', ''),
+    ('$', 'abc', SUCCEED, 'found', ''),
+    ('a.c', 'abc', SUCCEED, 'found', 'abc'),
+    ('a.c', 'axc', SUCCEED, 'found', 'axc'),
+    ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
+    ('a.*c', 'axyzd', FAIL),
+    ('a[bc]d', 'abc', FAIL),
+    ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
+    ('a[b-d]e', 'abd', FAIL),
+    ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
+    ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
+    ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
+    ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
+    ('a[b-a]', '-', SYNTAX_ERROR),
+    ('a[]b', '-', SYNTAX_ERROR),
+    ('a[', '-', SYNTAX_ERROR),
+    ('a]', 'a]', SUCCEED, 'found', 'a]'),
+    ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
+    ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
+    ('a[^bc]d', 'abd', FAIL),
+    ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('a[^-b]c', 'a-c', FAIL),
+    ('a[^]b]c', 'a]c', FAIL),
+    ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
+    ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
+    ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
+    ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('*a', '-', SYNTAX_ERROR),
+    ('(*)b', '-', SYNTAX_ERROR),
+    ('$b', 'b', FAIL),
+    ('a\\', '-', SYNTAX_ERROR),
+    ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
+    ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
+    ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
+    ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
+    ('abc)', '-', SYNTAX_ERROR),
+    ('(abc', '-', SYNTAX_ERROR),
+    ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
+    ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
+    ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
+    ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
+    ('a**', '-', SYNTAX_ERROR),
+    ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
+    ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
+    ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+    ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
+    (')(', '-', SYNTAX_ERROR),
+    ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
+    ('abc', '', FAIL),
+    ('a*', '', SUCCEED, 'found', ''),
+    ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
+    ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
+    ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
+    ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
+    ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
+    ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
+    ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
+    ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
+    ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
+    ('^(ab|cd)e', 'abcde', FAIL),
+    ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
+    ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
+    ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
+    ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
+    ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
+    ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
+    ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
+    ('a[bcd]+dcdcde', 'adcdcde', FAIL),
+    ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
+    ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
+    ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
+    ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
+    ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
+    ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
+    ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
+# Python does not have the same rules for \\41 so this is a syntax error
+#    ('((((((((((a))))))))))\\41', 'aa', FAIL),
+#    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
+    ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+    ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
+    ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
+    ('multiple words of text', 'uh-uh', FAIL),
+    ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
+    ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
+    ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
+    ('[k]', 'ab', FAIL),
+    ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
+    ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
+    ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)abc', 'XBC', FAIL),
+    ('(?i)abc', 'AXC', FAIL),
+    ('(?i)abc', 'ABX', FAIL),
+    ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
+    ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
+    ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
+    ('(?i)ab+bc', 'ABC', FAIL),
+    ('(?i)ab+bc', 'ABQ', FAIL),
+    ('(?i)ab{1,}bc', 'ABQ', FAIL),
+    ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
+    ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
+    ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
+    ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab??bc', 'ABBBBC', FAIL),
+    ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^abc$', 'ABCC', FAIL),
+    ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^abc$', 'AABC', FAIL),
+    ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)^', 'ABC', SUCCEED, 'found', ''),
+    ('(?i)$', 'ABC', SUCCEED, 'found', ''),
+    ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
+    ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
+    ('(?i)a.*c', 'AXYZD', FAIL),
+    ('(?i)a[bc]d', 'ABC', FAIL),
+    ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
+    ('(?i)a[b-d]e', 'ABD', FAIL),
+    ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
+    ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
+    ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
+    ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
+    ('(?i)a[b-a]', '-', SYNTAX_ERROR),
+    ('(?i)a[]b', '-', SYNTAX_ERROR),
+    ('(?i)a[', '-', SYNTAX_ERROR),
+    ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
+    ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
+    ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
+    ('(?i)a[^bc]d', 'ABD', FAIL),
+    ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
+    ('(?i)a[^-b]c', 'A-C', FAIL),
+    ('(?i)a[^]b]c', 'A]C', FAIL),
+    ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
+    ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
+    ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
+    ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
+    ('(?i)*a', '-', SYNTAX_ERROR),
+    ('(?i)(*)b', '-', SYNTAX_ERROR),
+    ('(?i)$b', 'B', FAIL),
+    ('(?i)a\\', '-', SYNTAX_ERROR),
+    ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
+    ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
+    ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
+    ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
+    ('(?i)abc)', '-', SYNTAX_ERROR),
+    ('(?i)(abc', '-', SYNTAX_ERROR),
+    ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
+    ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
+    ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a**', '-', SYNTAX_ERROR),
+    ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
+    ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
+    ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
+    ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
+    ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
+    ('(?i))(', '-', SYNTAX_ERROR),
+    ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
+    ('(?i)abc', '', FAIL),
+    ('(?i)a*', '', SUCCEED, 'found', ''),
+    ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
+    ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
+    ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
+    ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
+    ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
+    ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
+    ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
+    ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
+    ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
+    ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
+    ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
+    ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
+    ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
+    ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
+    ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
+    ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
+    ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
+    ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
+    ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
+    ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
+    ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
+    ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
+    ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
+    ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
+    ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
+    ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
+    #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
+    #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
+    ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
+    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
+    ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
+    ('(?i)multiple words of text', 'UH-UH', FAIL),
+    ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
+    ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
+    ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
+    ('(?i)[k]', 'AB', FAIL),
+#    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
+#    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
+    ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
+    ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
+    ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
+    ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
+    ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
+    ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
+    ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
+    ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
+    ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
+    ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
+    ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
+
+    # lookbehind: split by : but not if it is escaped by -.
+    ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
+    # escaping with \ as we know it
+    ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
+    # terminating with ' and escaping with ? as in edifact
+    ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
+
+    # Comments using the (?#...) syntax
+
+    ('w(?# comment', 'w', SYNTAX_ERROR),
+    ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
+
+    # Check odd placement of embedded pattern modifiers
+
+    # not an error under PCRE/PRE:
+    ('(?i)w', 'W', SUCCEED, 'found', 'W'),
+    # ('w(?i)', 'W', SYNTAX_ERROR),
+
+    # Comments using the x embedded pattern modifier
+
+    ("""(?x)w# comment 1
+        x y
+        # comment 2
+        z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
+
+    # using the m embedded pattern modifier
+
+    ('^abc', """jkl
+abc
+xyz""", FAIL),
+    ('(?m)^abc', """jkl
+abc
+xyz""", SUCCEED, 'found', 'abc'),
+
+    ('(?m)abc$', """jkl
+xyzabc
+123""", SUCCEED, 'found', 'abc'),
+
+    # using the s embedded pattern modifier
+
+    ('a.b', 'a\nb', FAIL),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+
+    # test \w, etc. both inside and outside character classes
+
+    ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
+    ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
+    ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
+    ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
+    ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
+    # not an error under PCRE/PRE:
+    # ('[\\d-x]', '-', SYNTAX_ERROR),
+    (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+    (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
+
+    (r'\xff', '\377', SUCCEED, 'found', chr(255)),
+    # new \x semantics
+    (r'\x00ff', '\377', FAIL),
+    # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
+    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
+    ('\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
+    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
+    (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
+
+    #
+    # post-1.5.2 additions
+
+    # xmllib problem
+    (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
+    # bug 110866: reference to undefined group
+    (r'((.)\1+)', '', SYNTAX_ERROR),
+    # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
+    (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
+    # bug 112468: various expected syntax errors
+    (r'(', '', SYNTAX_ERROR),
+    (r'[\41]', '!', SUCCEED, 'found', '!'),
+    # bug 114033: nothing to repeat
+    (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
+    # bug 115040: rescan if flags are modified inside pattern
+    (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
+    # bug 115618: negative lookahead
+    (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
+    # bug 116251: character class bug
+    (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
+    # bug 123769+127259: non-greedy backtracking bug
+    (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
+    (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
+    (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
+    # bug 127259: \Z shouldn't depend on multiline mode
+    (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
+    # bug 128899: uppercase literals under the ignorecase flag
+    (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
+    (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
+    (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
+    (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
+    # bug 130748: ^* should be an error (nothing to repeat)
+    (r'^*', '', SYNTAX_ERROR),
+    # bug 133283: minimizing repeat problem
+    (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
+    # bug 477728: minimizing repeat problem
+    (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
+    # bug 483789: minimizing repeat problem
+    (r'a[^>]*?b', 'a>b', FAIL),
+    # bug 490573: minimizing repeat problem
+    (r'^a*?$', 'foo', FAIL),
+    # bug 470582: nested groups problem
+    (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
+    # another minimizing repeat problem (capturing groups in assertions)
+    ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
+    ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
+    ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
+]
+
+try:
+    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
+except SyntaxError:
+    pass
+else:
+    tests.extend([
+    # bug 410271: \b broken under locales
+    (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
+    (r'(?u)\b.\b', u, SUCCEED, 'found', u),
+    (r'(?u)\w', u, SUCCEED, 'found', u),
+    ])
=== modified file Lib/test/test_re.py
--- Lib/test/test_re.py	2008-09-10 14:27:00 +0000
+++ Lib/test/test_re.py	2009-02-03 18:32:06 +0000
@@ -1,818 +1,897 @@
-import sys
-sys.path = ['.'] + sys.path
-
-from test.test_support import verbose, run_unittest
-import re
-from re import Scanner
-import sys, os, traceback
-from weakref import proxy
-
-# Misc tests from Tim Peters' re.doc
-
-# WARNING: Don't change details in these tests if you don't know
-# what you're doing. Some of these tests were carefuly modeled to
-# cover most of the code.
-
-import unittest
-
-class ReTests(unittest.TestCase):
-
-    def test_weakref(self):
-        s = 'QabbbcR'
-        x = re.compile('ab+c')
-        y = proxy(x)
-        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
-
-    def test_search_star_plus(self):
-        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
-        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
-        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
-        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
-        self.assertEqual(re.search('x', 'aaa'), None)
-        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
-        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
-        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
-        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
-        self.assertEqual(re.match('a+', 'xxx'), None)
-
-    def bump_num(self, matchobj):
-        int_value = int(matchobj.group(0))
-        return str(int_value + 1)
-
-    def test_basic_re_sub(self):
-        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
-        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
-                         '9.3 -3 24x100y')
-        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
-                         '9.3 -3 23x99y')
-
-        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
-        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
-
-        s = r"\1\1"
-        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
-        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
-        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
-
-        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
-        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
-        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
-        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
-
-        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
-                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
-        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
-        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
-                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
-
-        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
-
-    def test_bug_449964(self):
-        # fails for group followed by other escape
-        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
-                         'xx\bxx\b')
-
-    def test_bug_449000(self):
-        # Test for sub() on escaped characters
-        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
-                         'abc\ndef\n')
-        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
-                         'abc\ndef\n')
-        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
-                         'abc\ndef\n')
-        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
-                         'abc\ndef\n')
-
-    def test_bug_1140(self):
-        # re.sub(x, y, u'') should return u'', not '', and
-        # re.sub(x, y, '') should return '', not u''.
-        # Also:
-        # re.sub(x, y, unicode(x)) should return unicode(y), and
-        # re.sub(x, y, str(x)) should return
-        #     str(y) if isinstance(y, str) else unicode(y).
-        for x in 'x', u'x':
-            for y in 'y', u'y':
-                z = re.sub(x, y, u'')
-                self.assertEqual(z, u'')
-                self.assertEqual(type(z), unicode)
-                #
-                z = re.sub(x, y, '')
-                self.assertEqual(z, '')
-                self.assertEqual(type(z), str)
-                #
-                z = re.sub(x, y, unicode(x))
-                self.assertEqual(z, y)
-                self.assertEqual(type(z), unicode)
-                #
-                z = re.sub(x, y, str(x))
-                self.assertEqual(z, y)
-                self.assertEqual(type(z), type(y))
-
-    def test_bug_1661(self):
-        # Verify that flags do not get silently ignored with compiled patterns
-        pattern = re.compile('.')
-        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
-        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
-        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
-        self.assertRaises(ValueError, re.compile, pattern, re.I)
-
-    def test_bug_3629(self):
-        # A regex that triggered a bug in the sre-code validator
-        re.compile("(?P<quote>)(?(quote))")
-
-    def test_sub_template_numeric_escape(self):
-        # bug 776311 and friends
-        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
-        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
-        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
-        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
-        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
-        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
-        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
-
-        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
-        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
-
-        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
-        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
-        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
-        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
-        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
-
-        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
-        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
-
-        self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
-        self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
-        self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
-        self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
-
-        # in python2.3 (etc), these loop endlessly in sre_parser.py
-        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
-        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
-                         'xz8')
-        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
-                         'xza')
-
-    def test_qualified_re_sub(self):
-        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
-        self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
-
-    def test_bug_114660(self):
-        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
-                         'hello there')
-
-    def test_bug_462270(self):
-        # Test for empty sub() behaviour, see SF bug #462270
-        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
-        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
-
-    def test_symbolic_refs(self):
-        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
-        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
-
-    def test_re_subn(self):
-        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
-        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
-        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
-        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
-        self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
-
-    def test_re_split(self):
-        self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
-        self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
-        self.assertEqual(re.split("(:*)", ":a:b::c"),
-                         ['', ':', 'a', ':', 'b', '::', 'c'])
-        self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
-        self.assertEqual(re.split("(:)*", ":a:b::c"),
-                         ['', ':', 'a', ':', 'b', ':', 'c'])
-        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
-                         ['', ':', 'a', ':b::', 'c'])
-        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
-                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
-                          None, '::', 'c'])
-        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
-                         ['', 'a', '', '', 'c'])
-
-    def test_qualified_re_split(self):
-        self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
-        self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
-        self.assertEqual(re.split("(:)", ":a:b::c", 2),
-                         ['', ':', 'a', ':', 'b::c'])
-        self.assertEqual(re.split("(:*)", ":a:b::c", 2),
-                         ['', ':', 'a', ':', 'b::c'])
-
-    def test_re_findall(self):
-        self.assertEqual(re.findall(":+", "abc"), [])
-        self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
-        self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
-        self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
-                                                               (":", ":"),
-                                                               (":", "::")])
-
-    def test_bug_117612(self):
-        self.assertEqual(re.findall(r"(a|(b))", "aba"),
-                         [("a", ""),("b", "b"),("a", "")])
-
-    def test_re_match(self):
-        self.assertEqual(re.match('a', 'a').groups(), ())
-        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
-        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
-        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
-        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
-
-        pat = re.compile('((a)|(b))(c)?')
-        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
-        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
-        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
-        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
-        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
-
-        # A single group
-        m = re.match('(a)', 'a')
-        self.assertEqual(m.group(0), 'a')
-        self.assertEqual(m.group(0), 'a')
-        self.assertEqual(m.group(1), 'a')
-        self.assertEqual(m.group(1, 1), ('a', 'a'))
-
-        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
-        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
-        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
-                         (None, 'b', None))
-        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
-
-    def test_re_groupref_exists(self):
-        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
-                         ('(', 'a'))
-        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
-                         (None, 'a'))
-        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
-        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
-        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
-                         ('a', 'b'))
-        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
-                         (None, 'd'))
-        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
-                         (None, 'd'))
-        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
-                         ('a', ''))
-
-        # Tests for bug #1177831: exercise groups other than the first group
-        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
-        self.assertEqual(p.match('abc').groups(),
-                         ('a', 'b', 'c'))
-        self.assertEqual(p.match('ad').groups(),
-                         ('a', None, 'd'))
-        self.assertEqual(p.match('abd'), None)
-        self.assertEqual(p.match('ac'), None)
-
-
-    def test_re_groupref(self):
-        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
-                         ('|', 'a'))
-        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
-                         (None, 'a'))
-        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
-        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
-        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
-                         ('a', 'a'))
-        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
-                         (None, None))
-
-    def test_groupdict(self):
-        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
-                                  'first second').groupdict(),
-                         {'first':'first', 'second':'second'})
-
-    def test_expand(self):
-        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
-                                  "first second")
-                                  .expand(r"\2 \1 \g<second> \g<first>"),
-                         "second first second first")
-
-    def test_repeat_minmax(self):
-        self.assertEqual(re.match("^(\w){1}$", "abc"), None)
-        self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
-        self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
-        self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
-
-        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
-        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
-
-        self.assertEqual(re.match("^x{1}$", "xxx"), None)
-        self.assertEqual(re.match("^x{1}?$", "xxx"), None)
-        self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
-        self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
-
-        self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
-
-        self.assertEqual(re.match("^x{}$", "xxx"), None)
-        self.assertNotEqual(re.match("^x{}$", "x{}"), None)
-
-    def test_getattr(self):
-        self.assertEqual(re.match("(a)", "a").pos, 0)
-        self.assertEqual(re.match("(a)", "a").endpos, 1)
-        self.assertEqual(re.match("(a)", "a").string, "a")
-        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
-        self.assertNotEqual(re.match("(a)", "a").re, None)
-
-    def test_special_escapes(self):
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   "abcd abc bcd bx").group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   "abc bcd bc abxd").group(1), "bx")
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
-        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
-        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
-        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   u"abcd abc bcd bx").group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   u"abc bcd bc abxd").group(1), "bx")
-        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
-        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
-        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
-        self.assertEqual(re.search(r"\d\D\w\W\s\S",
-                                   "1aa! a").group(0), "1aa! a")
-        self.assertEqual(re.search(r"\d\D\w\W\s\S",
-                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
-        self.assertEqual(re.search(r"\d\D\w\W\s\S",
-                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
-
-    def test_bigcharset(self):
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222").group(1), u"\u2222")
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
-
-    def test_anyall(self):
-        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
-                         "a\nb")
-        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
-                         "a\n\nb")
-
-    def test_non_consuming(self):
-        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
-        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
-        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
-        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
-        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
-        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
-        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
-
-        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
-        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
-        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
-        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
-
-    def test_ignore_case(self):
-        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
-        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
-        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
-        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
-        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
-        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
-        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
-        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
-        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
-        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
-
-    def test_category(self):
-        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
-
-    def test_getlower(self):
-        import _sre
-        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
-        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
-        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
-
-        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
-        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
-
-    def test_not_literal(self):
-        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
-        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
-
-    def test_search_coverage(self):
-        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
-        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
-
-    def test_re_escape(self):
-        p=""
-        for i in range(0, 256):
-            p = p + chr(i)
-            self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
-                             True)
-            self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
-
-        pat=re.compile(re.escape(p))
-        self.assertEqual(pat.match(p) is not None, True)
-        self.assertEqual(pat.match(p).span(), (0,256))
-
-    def test_pickling(self):
-        import pickle
-        self.pickle_test(pickle)
-        import cPickle
-        self.pickle_test(cPickle)
-        # old pickles expect the _compile() reconstructor in sre module
-        import warnings
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", "The sre module is deprecated",
-                                    DeprecationWarning)
-            from sre import _compile
-
-    def pickle_test(self, pickle):
-        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
-        s = pickle.dumps(oldpat)
-        newpat = pickle.loads(s)
-        self.assertEqual(oldpat, newpat)
-
-    def test_constants(self):
-        self.assertEqual(re.I, re.IGNORECASE)
-        self.assertEqual(re.L, re.LOCALE)
-        self.assertEqual(re.M, re.MULTILINE)
-        self.assertEqual(re.S, re.DOTALL)
-        self.assertEqual(re.X, re.VERBOSE)
-
-    def test_flags(self):
-        for flag in [re.I, re.M, re.X, re.S, re.L]:
-            self.assertNotEqual(re.compile('^pattern$', flag), None)
-
-    def test_sre_character_literals(self):
-        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
-            self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
-            self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
-            self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
-            self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
-        self.assertRaises(re.error, re.match, "\911", "")
-
-    def test_sre_character_class_literals(self):
-        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
-            self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
-            self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
-        self.assertRaises(re.error, re.match, "[\911]", "")
-
-    def test_bug_113254(self):
-        self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
-        self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
-        self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
-
-    def test_bug_527371(self):
-        # bug described in patches 527371/672491
-        self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
-        self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
-        self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
-        self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
-        self.assertEqual(re.match("((a))", "a").lastindex, 1)
-
-    def test_bug_545855(self):
-        # bug 545855 -- This pattern failed to cause a compile error as it
-        # should, instead provoking a TypeError.
-        self.assertRaises(re.error, re.compile, 'foo[a-')
-
-    def test_bug_418626(self):
-        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
-        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
-        # pattern '*?' on a long string.
-        self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
-        self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
-                         20003)
-        self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
-        # non-simple '*?' still used to hit the recursion limit, before the
-        # non-recursive scheme was implemented.
-        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
-
-    def test_bug_612074(self):
-        pat=u"["+re.escape(u"\u2039")+u"]"
-        self.assertEqual(re.compile(pat) and 1, 1)
-
-    def test_stack_overflow(self):
-        # nasty cases that used to overflow the straightforward recursive
-        # implementation of repeated groups.
-        self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
-        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
-        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
-
-    def test_scanner(self):
-        def s_ident(scanner, token): return token
-        def s_operator(scanner, token): return "op%s" % token
-        def s_float(scanner, token): return float(token)
-        def s_int(scanner, token): return int(token)
-
-        scanner = Scanner([
-            (r"[a-zA-Z_]\w*", s_ident),
-            (r"\d+\.\d*", s_float),
-            (r"\d+", s_int),
-            (r"=|\+|-|\*|/", s_operator),
-            (r"\s+", None),
-            ])
-
-        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
-
-        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
-                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
-                           'op+', 'bar'], ''))
-
-    def test_bug_448951(self):
-        # bug 448951 (similar to 429357, but with single char match)
-        # (Also test greedy matches.)
-        for op in '','?','*':
-            self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
-                             (None, None))
-            self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
-                             ('a:', 'a'))
-
-    def test_bug_725106(self):
-        # capturing groups in alternatives in repeats
-        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
-                         ('b', 'a'))
-        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
-                         ('c', 'b'))
-        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
-                         ('b', None))
-        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
-                         ('b', None))
-        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
-                         ('b', 'a'))
-        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
-                         ('c', 'b'))
-        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
-                         ('b', None))
-        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
-                         ('b', None))
-
-    def test_bug_725149(self):
-        # mark_stack_base restoring before restoring marks
-        self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
-                         ('a', None))
-        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
-                         ('a', None, None))
-
-    def test_bug_764548(self):
-        # bug 764548, re.compile() barfs on str/unicode subclasses
-        try:
-            unicode
-        except NameError:
-            return  # no problem if we have no unicode
-        class my_unicode(unicode): pass
-        pat = re.compile(my_unicode("abc"))
-        self.assertEqual(pat.match("xyz"), None)
-
-    def test_finditer(self):
-        iter = re.finditer(r":+", "a:b::c:::d")
-        self.assertEqual([item.group(0) for item in iter],
-                         [":", "::", ":::"])
-
-    def test_bug_926075(self):
-        try:
-            unicode
-        except NameError:
-            return # no problem if we have no unicode
-        self.assert_(re.compile('bug_926075') is not
-                     re.compile(eval("u'bug_926075'")))
-
-    def test_bug_931848(self):
-        try:
-            unicode
-        except NameError:
-            pass
-        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
-        self.assertEqual(re.compile(pattern).split("a.b.c"),
-                         ['a','b','c'])
-
-    def test_bug_581080(self):
-        iter = re.finditer(r"\s", "a b")
-        self.assertEqual(iter.next().span(), (1,2))
-        self.assertRaises(StopIteration, iter.next)
-
-        scanner = re.compile(r"\s").scanner("a b")
-        self.assertEqual(scanner.search().span(), (1, 2))
-        self.assertEqual(scanner.search(), None)
-
-    def test_bug_817234(self):
-        iter = re.finditer(r".*", "asdf")
-        self.assertEqual(iter.next().span(), (0, 4))
-        self.assertEqual(iter.next().span(), (4, 4))
-        self.assertRaises(StopIteration, iter.next)
-
-    def test_empty_array(self):
-        # SF buf 1647541
-        import array
-        for typecode in 'cbBuhHiIlLfd':
-            a = array.array(typecode)
-            self.assertEqual(re.compile("bla").match(a), None)
-            self.assertEqual(re.compile("").match(a).groups(), ())
-
-    def test_inline_flags(self):
-        # Bug #1700
-        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
-        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
-
-        p = re.compile(upper_char, re.I | re.U)
-        q = p.match(lower_char)
-        self.assertNotEqual(q, None)
-
-        p = re.compile(lower_char, re.I | re.U)
-        q = p.match(upper_char)
-        self.assertNotEqual(q, None)
-
-        p = re.compile('(?i)' + upper_char, re.U)
-        q = p.match(lower_char)
-        self.assertNotEqual(q, None)
-
-        p = re.compile('(?i)' + lower_char, re.U)
-        q = p.match(upper_char)
-        self.assertNotEqual(q, None)
-
-        p = re.compile('(?iu)' + upper_char)
-        q = p.match(lower_char)
-        self.assertNotEqual(q, None)
-
-        p = re.compile('(?iu)' + lower_char)
-        q = p.match(upper_char)
-        self.assertNotEqual(q, None)
-
-    def test_dollar_matches_twice(self):
-        "$ matches the end of string, and just before the terminating \n"
-        pattern = re.compile('$')
-        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
-        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
-        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
-
-        pattern = re.compile('$', re.MULTILINE)
-        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
-        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
-        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
-
-
-def run_re_tests():
-    from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
-    if verbose:
-        print 'Running re_tests test suite'
-    else:
-        # To save time, only run the first and last 10 tests
-        #tests = tests[:10] + tests[-10:]
-        pass
-
-    for t in tests:
-        sys.stdout.flush()
-        pattern = s = outcome = repl = expected = None
-        if len(t) == 5:
-            pattern, s, outcome, repl, expected = t
-        elif len(t) == 3:
-            pattern, s, outcome = t
-        else:
-            raise ValueError, ('Test tuples should have 3 or 5 fields', t)
-
-        try:
-            obj = re.compile(pattern)
-        except re.error:
-            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
-            else:
-                print '=== Syntax error:', t
-        except KeyboardInterrupt: raise KeyboardInterrupt
-        except:
-            print '*** Unexpected error ***', t
-            if verbose:
-                traceback.print_exc(file=sys.stdout)
-        else:
-            try:
-                result = obj.search(s)
-            except re.error, msg:
-                print '=== Unexpected exception', t, repr(msg)
-            if outcome == SYNTAX_ERROR:
-                # This should have been a syntax error; forget it.
-                pass
-            elif outcome == FAIL:
-                if result is None: pass   # No match, as expected
-                else: print '=== Succeeded incorrectly', t
-            elif outcome == SUCCEED:
-                if result is not None:
-                    # Matched, as expected, so now we compute the
-                    # result string and compare it to our expected result.
-                    start, end = result.span(0)
-                    vardict={'found': result.group(0),
-                             'groups': result.group(),
-                             'flags': result.re.flags}
-                    for i in range(1, 100):
-                        try:
-                            gi = result.group(i)
-                            # Special hack because else the string concat fails:
-                            if gi is None:
-                                gi = "None"
-                        except IndexError:
-                            gi = "Error"
-                        vardict['g%d' % i] = gi
-                    for i in result.re.groupindex.keys():
-                        try:
-                            gi = result.group(i)
-                            if gi is None:
-                                gi = "None"
-                        except IndexError:
-                            gi = "Error"
-                        vardict[i] = gi
-                    repl = eval(repl, vardict)
-                    if repl != expected:
-                        print '=== grouping error', t,
-                        print repr(repl) + ' should be ' + repr(expected)
-                else:
-                    print '=== Failed incorrectly', t
-
-                # Try the match on a unicode string, and check that it
-                # still succeeds.
-                try:
-                    result = obj.search(unicode(s, "latin-1"))
-                    if result is None:
-                        print '=== Fails on unicode match', t
-                except NameError:
-                    continue # 1.5.2
-                except TypeError:
-                    continue # unicode test case
-
-                # Try the match on a unicode pattern, and check that it
-                # still succeeds.
-                obj=re.compile(unicode(pattern, "latin-1"))
-                result = obj.search(s)
-                if result is None:
-                    print '=== Fails on unicode pattern match', t
-
-                # Try the match with the search area limited to the extent
-                # of the match and see if it still succeeds.  \B will
-                # break (because it won't match at the end or start of a
-                # string), so we'll ignore patterns that feature it.
-
-                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
-                               and result is not None:
-                    obj = re.compile(pattern)
-                    result = obj.search(s, result.start(0), result.end(0) + 1)
-                    if result is None:
-                        print '=== Failed on range-limited match', t
-
-                # Try the match with IGNORECASE enabled, and check that it
-                # still succeeds.
-                obj = re.compile(pattern, re.IGNORECASE)
-                result = obj.search(s)
-                if result is None:
-                    print '=== Fails on case-insensitive match', t
-
-                # Try the match with LOCALE enabled, and check that it
-                # still succeeds.
-                obj = re.compile(pattern, re.LOCALE)
-                result = obj.search(s)
-                if result is None:
-                    print '=== Fails on locale-sensitive match', t
-
-                # Try the match with UNICODE locale enabled, and check
-                # that it still succeeds.
-                obj = re.compile(pattern, re.UNICODE)
-                result = obj.search(s)
-                if result is None:
-                    print '=== Fails on unicode-sensitive match', t
-
-def test_main():
-    run_unittest(ReTests)
-    run_re_tests()
-
-if __name__ == "__main__":
-    test_main()
+import sys
+sys.path = ['.'] + sys.path
+
+from test.test_support import verbose, run_unittest
+import re
+from re import Scanner
+import sys, os, traceback
+from weakref import proxy
+import unicodedata
+
+# Misc tests from Tim Peters' re.doc
+
+# WARNING: Don't change details in these tests if you don't know
+# what you're doing. Some of these tests were carefuly modeled to
+# cover most of the code.
+
+import unittest
+
+class ReTests(unittest.TestCase):
+
+    def test_weakref(self):
+        s = 'QabbbcR'
+        x = re.compile('ab+c')
+        y = proxy(x)
+        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
+
+    def test_search_star_plus(self):
+        self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
+        self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
+        self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
+        self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
+        self.assertEqual(re.search('x', 'aaa'), None)
+        self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
+        self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
+        self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
+        self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
+        self.assertEqual(re.match('a+', 'xxx'), None)
+
+    def bump_num(self, matchobj):
+        int_value = int(matchobj.group(0))
+        return str(int_value + 1)
+
+    def test_basic_re_sub(self):
+        self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
+        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
+                         '9.3 -3 24x100y')
+        self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
+                         '9.3 -3 23x99y')
+
+        self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
+        self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
+
+        s = r"\1\1"
+        self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
+        self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
+        self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
+
+        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
+        self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
+
+        self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
+                         '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
+        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
+        self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
+                         (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
+
+        self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
+
+    def test_bug_449964(self):
+        # fails for group followed by other escape
+        self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
+                         'xx\bxx\b')
+
+    def test_bug_449000(self):
+        # Test for sub() on escaped characters
+        self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+        self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+        self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
+                         'abc\ndef\n')
+
+    def test_bug_1140(self):
+        # re.sub(x, y, u'') should return u'', not '', and
+        # re.sub(x, y, '') should return '', not u''.
+        # Also:
+        # re.sub(x, y, unicode(x)) should return unicode(y), and
+        # re.sub(x, y, str(x)) should return
+        #     str(y) if isinstance(y, str) else unicode(y).
+        for x in 'x', u'x':
+            for y in 'y', u'y':
+                z = re.sub(x, y, u'')
+                self.assertEqual(z, u'')
+                self.assertEqual(type(z), unicode)
+                #
+                z = re.sub(x, y, '')
+                self.assertEqual(z, '')
+                self.assertEqual(type(z), str)
+                #
+                z = re.sub(x, y, unicode(x))
+                self.assertEqual(z, y)
+                self.assertEqual(type(z), unicode)
+                #
+                z = re.sub(x, y, str(x))
+                self.assertEqual(z, y)
+                self.assertEqual(type(z), type(y))
+
+    def test_bug_1661(self):
+        # Verify that flags do not get silently ignored with compiled patterns
+        pattern = re.compile('.')
+        self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
+        self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
+        self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
+        self.assertRaises(ValueError, re.compile, pattern, re.I)
+
+    def test_bug_3629(self):
+        # A regex that triggered a bug in the sre-code validator
+        re.compile("(?P<quote>)(?(quote))")
+
+    def test_sub_template_numeric_escape(self):
+        # bug 776311 and friends
+        self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
+        self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
+        self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
+        self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
+        self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
+        self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
+        self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
+
+        self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
+        self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
+
+        self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
+        self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
+        self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
+        self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
+        self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
+
+        self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
+        self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
+
+        self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
+        self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
+        self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
+        self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
+
+        # in python2.3 (etc), these loop endlessly in sre_parser.py
+        self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
+        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
+                         'xz8')
+        self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
+                         'xza')
+
+    def test_qualified_re_sub(self):
+        self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
+        self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
+
+    def test_bug_114660(self):
+        self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
+                         'hello there')
+
+    def test_bug_462270(self):
+        # Test for empty sub() behaviour, see SF bug #462270
+        self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
+        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
+
+    def test_symbolic_refs(self):
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
+        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', '\g<b>', 'xx'), '')
+        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', '\\2', 'xx'), '')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
+
+    def test_re_subn(self):
+        self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
+        self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
+        self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
+        self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
+        self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
+
+    def test_re_split(self):
+        self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
+        self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
+        self.assertEqual(re.split("(:*)", ":a:b::c"),
+                         ['', ':', 'a', ':', 'b', '::', 'c'])
+        self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
+        self.assertEqual(re.split("(:)*", ":a:b::c"),
+                         ['', ':', 'a', ':', 'b', ':', 'c'])
+        self.assertEqual(re.split("([b:]+)", ":a:b::c"),
+                         ['', ':', 'a', ':b::', 'c'])
+        self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
+                         ['', None, ':', 'a', None, ':', '', 'b', None, '',
+                          None, '::', 'c'])
+        self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
+                         ['', 'a', '', '', 'c'])
+        self.assertEqual(re.split("(?z):*", ":a:b::c"), ['', 'a', 'b', 'c', ''])
+
+    def test_qualified_re_split(self):
+        self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
+        self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
+        self.assertEqual(re.split("(:)", ":a:b::c", 2),
+                         ['', ':', 'a', ':', 'b::c'])
+        self.assertEqual(re.split("(:*)", ":a:b::c", 2),
+                         ['', ':', 'a', ':', 'b::c'])
+
+    def test_re_findall(self):
+        self.assertEqual(re.findall(":+", "abc"), [])
+        self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
+        self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
+        self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
+                                                               (":", ":"),
+                                                               (":", "::")])
+
+    def test_bug_117612(self):
+        self.assertEqual(re.findall(r"(a|(b))", "aba"),
+                         [("a", ""),("b", "b"),("a", "")])
+
+    def test_re_match(self):
+        self.assertEqual(re.match('a', 'a').groups(), ())
+        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
+        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
+        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
+        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
+
+        pat = re.compile('((a)|(b))(c)?')
+        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
+        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
+        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
+        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
+        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
+
+        # A single group
+        m = re.match('(a)', 'a')
+        self.assertEqual(m.group(0), 'a')
+        self.assertEqual(m.group(0), 'a')
+        self.assertEqual(m.group(1), 'a')
+        self.assertEqual(m.group(1, 1), ('a', 'a'))
+
+        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
+        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
+        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
+                         (None, 'b', None))
+        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
+
+    def test_re_groupref_exists(self):
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
+                         ('(', 'a'))
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
+                         (None, 'a'))
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
+        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
+                         ('a', 'b'))
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
+                         (None, 'd'))
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
+                         (None, 'd'))
+        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
+                         ('a', ''))
+
+        # Tests for bug #1177831: exercise groups other than the first group
+        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
+        self.assertEqual(p.match('abc').groups(),
+                         ('a', 'b', 'c'))
+        self.assertEqual(p.match('ad').groups(),
+                         ('a', None, 'd'))
+        self.assertEqual(p.match('abd'), None)
+        self.assertEqual(p.match('ac'), None)
+
+
+    def test_re_groupref(self):
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
+                         ('|', 'a'))
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
+                         (None, 'a'))
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
+        self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
+        self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
+                         ('a', 'a'))
+        self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
+                         (None, None))
+
+    def test_groupdict(self):
+        self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
+                                  'first second').groupdict(),
+                         {'first':'first', 'second':'second'})
+
+    def test_expand(self):
+        self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
+                                  "first second")
+                                  .expand(r"\2 \1 \g<second> \g<first>"),
+                         "second first second first")
+
+    def test_repeat_minmax(self):
+        self.assertEqual(re.match("^(\w){1}$", "abc"), None)
+        self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
+        self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
+        self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
+
+        self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
+        self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
+
+        self.assertEqual(re.match("^x{1}$", "xxx"), None)
+        self.assertEqual(re.match("^x{1}?$", "xxx"), None)
+        self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
+        self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
+
+        self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
+
+        self.assertEqual(re.match("^x{}$", "xxx"), None)
+        self.assertNotEqual(re.match("^x{}$", "x{}"), None)
+
+    def test_getattr(self):
+        self.assertEqual(re.match("(a)", "a").pos, 0)
+        self.assertEqual(re.match("(a)", "a").endpos, 1)
+        self.assertEqual(re.match("(a)", "a").string, "a")
+        self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
+        self.assertNotEqual(re.match("(a)", "a").re, None)
+
+    def test_special_escapes(self):
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   "abcd abc bcd bx").group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   "abc bcd bc abxd").group(1), "bx")
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
+        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
+        self.assertEqual(re.search(r"\b(b.)\b",
+                                   u"abcd abc bcd bx").group(1), "bx")
+        self.assertEqual(re.search(r"\B(b.)\B",
+                                   u"abc bcd bc abxd").group(1), "bx")
+        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
+        self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                   "1aa! a").group(0), "1aa! a")
+        self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
+        self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
+
+    def test_bigcharset(self):
+        self.assertEqual(re.match(u"([\u2222\u2223])",
+                                  u"\u2222").group(1), u"\u2222")
+        self.assertEqual(re.match(u"([\u2222\u2223])",
+                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
+
+    def test_anyall(self):
+        self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
+                         "a\nb")
+        self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
+                         "a\n\nb")
+
+    def test_non_consuming(self):
+        self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
+        self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
+        self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
+        self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
+
+        self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
+        self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
+        self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
+
+    def test_ignore_case(self):
+        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
+        self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
+        self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
+        self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
+        self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
+        self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
+        self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
+        self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
+
+    def test_category(self):
+        self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
+
+    def test_getlower(self):
+        import _sre
+        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
+        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
+        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+
+        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
+
+    def test_not_literal(self):
+        self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
+        self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
+
+    def test_search_coverage(self):
+        self.assertEqual(re.search("\s(b)", " b").group(1), "b")
+        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
+
+    def test_re_escape(self):
+        p=""
+        for i in range(0, 256):
+            p = p + chr(i)
+            self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
+                             True)
+            self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
+
+        pat=re.compile(re.escape(p))
+        self.assertEqual(pat.match(p) is not None, True)
+        self.assertEqual(pat.match(p).span(), (0,256))
+
+    def test_pickling(self):
+        import pickle
+        self.pickle_test(pickle)
+        import cPickle
+        self.pickle_test(cPickle)
+        # old pickles expect the _compile() reconstructor in sre module
+        import warnings
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "The sre module is deprecated",
+                                    DeprecationWarning)
+            from sre import _compile
+
+    def pickle_test(self, pickle):
+        oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
+        s = pickle.dumps(oldpat)
+        newpat = pickle.loads(s)
+        self.assertEqual(oldpat, newpat)
+
+    def test_constants(self):
+        self.assertEqual(re.I, re.IGNORECASE)
+        self.assertEqual(re.L, re.LOCALE)
+        self.assertEqual(re.M, re.MULTILINE)
+        self.assertEqual(re.S, re.DOTALL)
+        self.assertEqual(re.X, re.VERBOSE)
+
+    def test_flags(self):
+        for flag in [re.I, re.M, re.X, re.S, re.L]:
+            self.assertNotEqual(re.compile('^pattern$', flag), None)
+
+    def test_sre_character_literals(self):
+        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
+            self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
+            self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
+            self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
+            self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
+        self.assertRaises(re.error, re.match, "\911", "")
+
+    def test_sre_character_class_literals(self):
+        for i in [0, 8, 16, 32, 64, 127, 128, 255]:
+            self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
+            self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
+        self.assertRaises(re.error, re.match, "[\911]", "")
+
+    def test_bug_113254(self):
+        self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
+        self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
+        self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
+
+    def test_bug_527371(self):
+        # bug described in patches 527371/672491
+        self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
+        self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
+        self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
+        self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
+        self.assertEqual(re.match("((a))", "a").lastindex, 1)
+
+    def test_bug_545855(self):
+        # bug 545855 -- This pattern failed to cause a compile error as it
+        # should, instead provoking a TypeError.
+        self.assertRaises(re.error, re.compile, 'foo[a-')
+
+    def test_bug_418626(self):
+        # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
+        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
+        # pattern '*?' on a long string.
+        self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
+        self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
+                         20003)
+        self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
+        # non-simple '*?' still used to hit the recursion limit, before the
+        # non-recursive scheme was implemented.
+        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
+
+    def test_bug_612074(self):
+        pat=u"["+re.escape(u"\u2039")+u"]"
+        self.assertEqual(re.compile(pat) and 1, 1)
+
+    def test_stack_overflow(self):
+        # nasty cases that used to overflow the straightforward recursive
+        # implementation of repeated groups.
+        self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
+        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
+        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
+
+    def test_scanner(self):
+        def s_ident(scanner, token): return token
+        def s_operator(scanner, token): return "op%s" % token
+        def s_float(scanner, token): return float(token)
+        def s_int(scanner, token): return int(token)
+
+        scanner = Scanner([
+            (r"[a-zA-Z_]\w*", s_ident),
+            (r"\d+\.\d*", s_float),
+            (r"\d+", s_int),
+            (r"=|\+|-|\*|/", s_operator),
+            (r"\s+", None),
+            ])
+
+        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
+
+        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
+                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
+                           'op+', 'bar'], ''))
+
+    def test_bug_448951(self):
+        # bug 448951 (similar to 429357, but with single char match)
+        # (Also test greedy matches.)
+        for op in '','?','*':
+            self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
+                             (None, None))
+            self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
+                             ('a:', 'a'))
+
+    def test_bug_725106(self):
+        # capturing groups in alternatives in repeats
+        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
+                         ('b', 'a'))
+        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
+                         ('c', 'b'))
+        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
+                         ('b', 'a'))
+        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
+                         ('c', 'b'))
+        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
+                         ('b', None))
+
+    def test_bug_725149(self):
+        # mark_stack_base restoring before restoring marks
+        self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
+                         ('a', None))
+        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
+                         ('a', None, None))
+
+    def test_bug_764548(self):
+        # bug 764548, re.compile() barfs on str/unicode subclasses
+        try:
+            unicode
+        except NameError:
+            return  # no problem if we have no unicode
+        class my_unicode(unicode): pass
+        pat = re.compile(my_unicode("abc"))
+        self.assertEqual(pat.match("xyz"), None)
+
+    def test_finditer(self):
+        iter = re.finditer(r":+", "a:b::c:::d")
+        self.assertEqual([item.group(0) for item in iter],
+                         [":", "::", ":::"])
+
+    def test_bug_926075(self):
+        try:
+            unicode
+        except NameError:
+            return # no problem if we have no unicode
+        self.assert_(re.compile('bug_926075') is not
+                     re.compile(eval("u'bug_926075'")))
+
+    def test_bug_931848(self):
+        try:
+            unicode
+        except NameError:
+            pass
+        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
+        self.assertEqual(re.compile(pattern).split("a.b.c"),
+                         ['a','b','c'])
+
+    def test_bug_581080(self):
+        iter = re.finditer(r"\s", "a b")
+        self.assertEqual(iter.next().span(), (1,2))
+        self.assertRaises(StopIteration, iter.next)
+
+        scanner = re.compile(r"\s").scanner("a b")
+        self.assertEqual(scanner.search().span(), (1, 2))
+        self.assertEqual(scanner.search(), None)
+
+    def test_bug_817234(self):
+        iter = re.finditer(r".*", "asdf")
+        self.assertEqual(iter.next().span(), (0, 4))
+        self.assertEqual(iter.next().span(), (4, 4))
+        self.assertRaises(StopIteration, iter.next)
+
+    def test_empty_array(self):
+        # SF buf 1647541
+        import array
+        for typecode in 'cbBuhHiIlLfd':
+            a = array.array(typecode)
+            self.assertEqual(re.compile("bla").match(a), None)
+            self.assertEqual(re.compile("").match(a).groups(), ())
+
+    def test_inline_flags(self):
+        # Bug #1700
+        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
+        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
+
+        p = re.compile(upper_char, re.I | re.U)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile(lower_char, re.I | re.U)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?i)' + upper_char, re.U)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?i)' + lower_char, re.U)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?iu)' + upper_char)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?iu)' + lower_char)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+
+    def test_dollar_matches_twice(self):
+        "$ matches the end of string, and just before the terminating \n"
+        pattern = re.compile('$')
+        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
+        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
+        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
+
+        pattern = re.compile('$', re.MULTILINE)
+        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
+        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
+        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
+
+    def test_atomic(self):
+        pattern = re.compile(r'a(?>bc|b)c')
+        self.assertEqual(pattern.match('abc'), None)
+        self.assertNotEqual(pattern.match('abcc'), None)
+        self.assertEqual(re.match(r'(?>.*).', 'abc'), None)
+        self.assertNotEqual(re.match(r'(?>x)++', 'xxx'), None)
+        self.assertNotEqual(re.match(r'(?>x++)', 'xxx'), None)
+        self.assertEqual(re.match(r'(?>x)++x', 'xxx'), None)
+        self.assertEqual(re.match(r'(?>x++)x', 'xxx'), None)
+
+    def test_bug_2537(self):
+        "nested repeat"
+        self.assertEqual(re.sub('((x|y)*)*', '(\\1, \\2)', 'xyyzy', 1), '(, y)zy')
+        self.assertEqual(re.sub('((x|y+)*)*', '(\\1, \\2)', 'xyyzy', 1), '(, yy)zy')
+
+    def test_word_chars(self):
+        word_chars, all_chars = [], []
+        accept_set = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'No', 'Mc', 'Me', 'Mn', 'Pc'])
+        for i in range(sys.maxunicode):
+            c = unichr(i)
+            if c == '_' or unicodedata.category(c) in accept_set:
+                word_chars.append(c)
+            all_chars.append(c)
+        word_chars = u''.join(word_chars)
+        found_chars = u''.join(re.findall(r'(?u)(\w)', u''.join(all_chars)))
+        self.assertEqual(found_chars, word_chars)
+
+    def test_digit_chars(self):
+        digit_chars, all_chars = [], []
+        accept_set = set(['Nd'])
+        for i in range(sys.maxunicode):
+            c = unichr(i)
+            if unicodedata.category(c) in accept_set:
+                digit_chars.append(c)
+            all_chars.append(c)
+        digit_chars = u''.join(digit_chars)
+        found_chars = u''.join(re.findall(r'(?u)(\d)', u''.join(all_chars)))
+        self.assertEqual(found_chars, digit_chars)
+
+    def test_named_chars(self):
+        self.assertNotEqual(re.match(r"\N{LATIN CAPITAL LETTER A}", u"A"), None)
+        self.assertNotEqual(re.match(r"[\N{LATIN CAPITAL LETTER A}]", u"A"), None)
+        self.assertEqual(re.match(r"\N{LATIN CAPITAL LETTER A}", u"B"), None)
+        self.assertEqual(re.match(r"[\N{LATIN CAPITAL LETTER A}]", u"a"), None)
+
+    def test_unicode_properties(self):
+        self.assertNotEqual(re.match(r"\p{Lu}", u"A"), None)
+        self.assertEqual(re.match(r"\p{Lu}", u"a"), None)
+        self.assertNotEqual(re.match(r"\p{L&}", u"A"), None)
+        
+        ascii_chars = "".join(chr(c) for c in range(0x0, 0x80))
+        charsets = r"""
+\p{Alnum}   [\p{L&}\p{Nd}]          [a-zA-Z0-9]
+\p{Alpha}   \p{L&}                  [a-zA-Z]
+\p{ASCII}                           [\x00-\x7F]
+\p{Blank}   [\p{Zs}\t]              [ \t]
+\p{Cntrl}   \p{Cc}                  [\x00-\x1F\x7F]
+\p{Digit}   \p{Nd}              \d  [0-9]
+\p{Graph}   [^\p{Z}\p{C}]           [\x21-\x7E]
+\p{Lower}   \p{Ll}                  [a-z]  
+\p{Print}   \P{C}                   [\x20-\x7E]  
+\p{Punct}   [\p{P}\p{S}]            [!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]  
+\p{Space}   [\p{Z}\t\r\n\v\f]   \s  [ \t\r\n\v\f]  
+\p{Upper}   \p{Lu}                  [A-Z]  
+            [\p{L}\p{N}\p{Pc}]  \w  [A-Za-z0-9_]  
+\p{XDigit}                          [A-Fa-f0-9]
+"""
+        for line in charsets.splitlines():
+            parts = [p.strip() for p in line.split("  ")]
+            parts = [p for p in parts if p]
+            if parts:
+                matched = [re.findall(p, ascii_chars, re.U) for p in parts]
+                self.assertEqual(self.all_same(matched), True)
+
+    def all_same(self, items):
+        first = items[0]
+        return all(i == first for i in items[1 : ])
+
+def run_re_tests():
+    from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
+    if verbose:
+        print 'Running re_tests test suite'
+    else:
+        # To save time, only run the first and last 10 tests
+        #tests = tests[:10] + tests[-10:]
+        pass
+
+    for t in tests:
+        sys.stdout.flush()
+        pattern = s = outcome = repl = expected = None
+        if len(t) == 5:
+            pattern, s, outcome, repl, expected = t
+        elif len(t) == 3:
+            pattern, s, outcome = t
+        else:
+            raise ValueError, ('Test tuples should have 3 or 5 fields', t)
+
+        try:
+            obj = re.compile(pattern)
+        except re.error:
+            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
+            else:
+                print '=== Syntax error:', t
+        except KeyboardInterrupt: raise KeyboardInterrupt
+        except:
+            print '*** Unexpected error ***', t
+            if verbose:
+                traceback.print_exc(file=sys.stdout)
+        else:
+            try:
+                result = obj.search(s)
+            except re.error, msg:
+                print '=== Unexpected exception', t, repr(msg)
+            if outcome == SYNTAX_ERROR:
+                # This should have been a syntax error; forget it.
+                pass
+            elif outcome == FAIL:
+                if result is None: pass   # No match, as expected
+                else: print '=== Succeeded incorrectly', t
+            elif outcome == SUCCEED:
+                if result is not None:
+                    # Matched, as expected, so now we compute the
+                    # result string and compare it to our expected result.
+                    start, end = result.span(0)
+                    vardict={'found': result.group(0),
+                             'groups': result.group(),
+                             'flags': result.re.flags}
+                    for i in range(1, 100):
+                        try:
+                            gi = result.group(i)
+                            # Special hack because else the string concat fails:
+                            if gi is None:
+                                gi = "None"
+                        except IndexError:
+                            gi = "Error"
+                        vardict['g%d' % i] = gi
+                    for i in result.re.groupindex.keys():
+                        try:
+                            gi = result.group(i)
+                            if gi is None:
+                                gi = "None"
+                        except IndexError:
+                            gi = "Error"
+                        vardict[i] = gi
+                    repl = eval(repl, vardict)
+                    if repl != expected:
+                        print '=== grouping error', t,
+                        print repr(repl) + ' should be ' + repr(expected)
+                else:
+                    print '=== Failed incorrectly', t
+
+                # Try the match on a unicode string, and check that it
+                # still succeeds.
+                try:
+                    result = obj.search(unicode(s, "latin-1"))
+                    if result is None:
+                        print '=== Fails on unicode match', t
+                except NameError:
+                    continue # 1.5.2
+                except TypeError:
+                    continue # unicode test case
+
+                # Try the match on a unicode pattern, and check that it
+                # still succeeds.
+                obj=re.compile(unicode(pattern, "latin-1"))
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on unicode pattern match', t
+
+                # Try the match with the search area limited to the extent
+                # of the match and see if it still succeeds.  \B will
+                # break (because it won't match at the end or start of a
+                # string), so we'll ignore patterns that feature it.
+
+                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
+                               and result is not None:
+                    obj = re.compile(pattern)
+                    result = obj.search(s, result.start(0), result.end(0) + 1)
+                    if result is None:
+                        print '=== Failed on range-limited match', t
+
+                # Try the match with IGNORECASE enabled, and check that it
+                # still succeeds.
+                obj = re.compile(pattern, re.IGNORECASE)
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on case-insensitive match', t
+
+                # Try the match with LOCALE enabled, and check that it
+                # still succeeds.
+                obj = re.compile(pattern, re.LOCALE)
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on locale-sensitive match', t
+
+                # Try the match with UNICODE locale enabled, and check
+                # that it still succeeds.
+                obj = re.compile(pattern, re.UNICODE)
+                result = obj.search(s)
+                if result is None:
+                    print '=== Fails on unicode-sensitive match', t
+
+def test_main():
+    run_unittest(ReTests)
+    run_re_tests()
+
+if __name__ == "__main__":
+    test_main()
=== modified file Lib/sre_constants.py
--- Lib/sre_constants.py	2004-08-25 02:22:30 +0000
+++ Lib/sre_constants.py	2009-02-03 19:09:29 +0000
@@ -13,11 +13,22 @@
 
 # update when constants are added or removed
 
-MAGIC = 20031017
+MAGIC = 20081218
 
-# max code word in this release
-
-MAXREPEAT = 65535
+import operator
+import unicodedata
+from collections import defaultdict
+
+# size of code word in this release
+BYTES_PER_CODE = 4
+BITS_PER_CODE = 8 * BYTES_PER_CODE
+MAXCODE = (1 << BITS_PER_CODE) - 1
+
+MAXREPEAT = MAXCODE
+
+DIGITS = set("0123456789")
+OCTDIGITS = set("01234567")
+HEXDIGITS = set("0123456789abcdefABCDEF")
 
 # SRE standard exception (access as sre.error)
 # should this really be here?
@@ -25,181 +36,126 @@
 class error(Exception):
     pass
 
-# operators
+# list of all the operators
+# the fields are: name, op_type, negative, directional, end_marker
+# those with a negative form start with NOT_
+# those with a reverse directional form end with _REV
+OPERATOR_LIST = """
+FAILURE               INVALID         N N -
+SUCCESS               INVALID         N N -
+ANY                   SIMPLE_CATEGORY N Y -
+ANY_ALL               SIMPLE_CATEGORY N Y -
+ASSERT                ASSERT          N N END_ASSERT
+ASSERT_NOT            ASSERT          N N END_ASSERT_NOT
+ATOMIC                ATOMIC          N N END_ATOMIC
+BOUNDARY              POSITION        Y N -
+BRANCH                BRANCH          N N -
+CATEGORY              CATEGORY        Y Y -
+CHARSET               CHARSET         Y Y -
+CHARSET_IGNORE        CHARSET         Y Y -
+END_OF_LINE           POSITION        N N -
+END_OF_STRING         POSITION        N N -
+END_OF_STRING_LN      POSITION        N N -
+GROUPREF              GROUPREF        N Y -
+GROUPREF_EXISTS       GROUPREF_EXISTS N N -
+GROUPREF_IGNORE       GROUPREF        N Y -
+JUMP                  INVALID         N N -
+LITERAL               LITERAL         Y Y -
+LITERAL_IGNORE        LITERAL         Y Y -
+LITERAL_STRING        LITERAL_STRING  N Y -
+LITERAL_STRING_IGNORE LITERAL_STRING  N Y -
+MARK                  MARK            N N -
+RANGE                 RANGE           Y Y -
+RANGE_IGNORE          RANGE           Y Y -
+REPEAT_MAX            REPEAT          N Y END_REPEAT_MAX
+REPEAT_MIN            REPEAT          N Y END_REPEAT_MIN
+REPEAT_ONE_MAX        REPEAT_ONE      N Y -
+REPEAT_ONE_MIN        REPEAT_ONE      N Y -
+REPEAT_ONE_POSS       REPEAT_ONE      N Y -
+REPEAT_POSS           REPEAT          N Y END_REPEAT_POSS
+SET                   SET             Y Y -
+SET_IGNORE            SET             Y Y -
+START_OF_LINE         POSITION        N N -
+START_OF_STRING       POSITION        N N -
+SUBPATTERN            INVALID         N N -
+"""
+
+# enumerate the operators
+neg_prefix = {"N": [""], "Y": ["", "NOT_"]}
+dir_suffix = {"N": [(0, "")], "Y": [(1, ""), (-1, "_REV")]}
+
+operator_list = []
+for line in OPERATOR_LIST.splitlines():
+    fields = line.split()
+    if not fields:
+        continue
+    name, op_type, negative, directional, end_marker = fields
+    # some opcodes have a negative "NOT_x" form
+    for p in neg_prefix[negative]:
+        # some opcodes are directional; they have a reverse "x_REV" form
+        for d, s in dir_suffix[directional]:
+            operator_list.append((p + name + s, op_type, d, end_marker))
+            if end_marker != "-":
+                operator_list.append((p + end_marker + s, "INVALID", d, "-"))
+
+# build a dict of positive<->negative opcodes
+not_opcodes = [name for name, op_type, direction, end_marker in operator_list if name.startswith("NOT_")]
+not_opcodes = dict([(name, name[4 : ]) for name in not_opcodes] + [(name[4 : ], name) for name in not_opcodes])
+
+def not_op(op):
+    return not_opcodes[op[0]], op[1]
+
+# build a dict of normal<->ignore opcodes
+ignore_opcodes = [name for name, op_type, direction, end_marker in operator_list if name.endswith("_IGNORE")]
+ignore_opcodes = dict([(name, name[ : -7]) for name in ignore_opcodes] + [(name[ : -7], name) for name in ignore_opcodes])
+for op in ["CATEGORY", "NOT_CATEGORY"]:
+    ignore_opcodes[op] = op
+
+def ignore_op(op):
+    return ignore_opcodes[op[0]], op[1]
+
+# sort the operators (except FAILURE and SUCCESS) and assign opcode numbers
+operator_list = operator_list[ : 2] + sorted(operator_list[2 : ])
+operator_list = [(name, number, op_type, direction, end_marker) for number, (name, op_type, direction, end_marker) in enumerate(operator_list)]
+
+# build the OPCODES dict
+OPCODES = dict((name, number) for name, number, op_type, direction, end_marker in operator_list)
 
-FAILURE = "failure"
-SUCCESS = "success"
+# collect the op_types
+op_types = set(op_type for name, number, op_type, direction, end_marker in operator_list)
+
+# create an attribute in OP for each operator
+class Record(object):
+    pass
 
-ANY = "any"
-ANY_ALL = "any_all"
-ASSERT = "assert"
-ASSERT_NOT = "assert_not"
-AT = "at"
-BIGCHARSET = "bigcharset"
-BRANCH = "branch"
-CALL = "call"
-CATEGORY = "category"
-CHARSET = "charset"
-GROUPREF = "groupref"
-GROUPREF_IGNORE = "groupref_ignore"
-GROUPREF_EXISTS = "groupref_exists"
-IN = "in"
-IN_IGNORE = "in_ignore"
-INFO = "info"
-JUMP = "jump"
-LITERAL = "literal"
-LITERAL_IGNORE = "literal_ignore"
-MARK = "mark"
-MAX_REPEAT = "max_repeat"
-MAX_UNTIL = "max_until"
-MIN_REPEAT = "min_repeat"
-MIN_UNTIL = "min_until"
-NEGATE = "negate"
-NOT_LITERAL = "not_literal"
-NOT_LITERAL_IGNORE = "not_literal_ignore"
-RANGE = "range"
-REPEAT = "repeat"
-REPEAT_ONE = "repeat_one"
-SUBPATTERN = "subpattern"
-MIN_REPEAT_ONE = "min_repeat_one"
-
-# positions
-AT_BEGINNING = "at_beginning"
-AT_BEGINNING_LINE = "at_beginning_line"
-AT_BEGINNING_STRING = "at_beginning_string"
-AT_BOUNDARY = "at_boundary"
-AT_NON_BOUNDARY = "at_non_boundary"
-AT_END = "at_end"
-AT_END_LINE = "at_end_line"
-AT_END_STRING = "at_end_string"
-AT_LOC_BOUNDARY = "at_loc_boundary"
-AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
-AT_UNI_BOUNDARY = "at_uni_boundary"
-AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
-
-# categories
-CATEGORY_DIGIT = "category_digit"
-CATEGORY_NOT_DIGIT = "category_not_digit"
-CATEGORY_SPACE = "category_space"
-CATEGORY_NOT_SPACE = "category_not_space"
-CATEGORY_WORD = "category_word"
-CATEGORY_NOT_WORD = "category_not_word"
-CATEGORY_LINEBREAK = "category_linebreak"
-CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
-CATEGORY_LOC_WORD = "category_loc_word"
-CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
-CATEGORY_UNI_DIGIT = "category_uni_digit"
-CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
-CATEGORY_UNI_SPACE = "category_uni_space"
-CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
-CATEGORY_UNI_WORD = "category_uni_word"
-CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
-CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
-CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
-
-OPCODES = [
-
-    # failure=0 success=1 (just because it looks better that way :-)
-    FAILURE, SUCCESS,
-
-    ANY, ANY_ALL,
-    ASSERT, ASSERT_NOT,
-    AT,
-    BRANCH,
-    CALL,
-    CATEGORY,
-    CHARSET, BIGCHARSET,
-    GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
-    IN, IN_IGNORE,
-    INFO,
-    JUMP,
-    LITERAL, LITERAL_IGNORE,
-    MARK,
-    MAX_UNTIL,
-    MIN_UNTIL,
-    NOT_LITERAL, NOT_LITERAL_IGNORE,
-    NEGATE,
-    RANGE,
-    REPEAT,
-    REPEAT_ONE,
-    SUBPATTERN,
-    MIN_REPEAT_ONE
-
-]
-
-ATCODES = [
-    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
-    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
-    AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
-    AT_UNI_NON_BOUNDARY
-]
-
-CHCODES = [
-    CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
-    CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
-    CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
-    CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
-    CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
-    CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
-    CATEGORY_UNI_NOT_LINEBREAK
-]
-
-def makedict(list):
-    d = {}
-    i = 0
-    for item in list:
-        d[item] = i
-        i = i + 1
-    return d
-
-OPCODES = makedict(OPCODES)
-ATCODES = makedict(ATCODES)
-CHCODES = makedict(CHCODES)
-
-# replacement operations for "ignore case" mode
-OP_IGNORE = {
-    GROUPREF: GROUPREF_IGNORE,
-    IN: IN_IGNORE,
-    LITERAL: LITERAL_IGNORE,
-    NOT_LITERAL: NOT_LITERAL_IGNORE
-}
-
-AT_MULTILINE = {
-    AT_BEGINNING: AT_BEGINNING_LINE,
-    AT_END: AT_END_LINE
-}
-
-AT_LOCALE = {
-    AT_BOUNDARY: AT_LOC_BOUNDARY,
-    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
-}
-
-AT_UNICODE = {
-    AT_BOUNDARY: AT_UNI_BOUNDARY,
-    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
-}
-
-CH_LOCALE = {
-    CATEGORY_DIGIT: CATEGORY_DIGIT,
-    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
-    CATEGORY_SPACE: CATEGORY_SPACE,
-    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
-    CATEGORY_WORD: CATEGORY_LOC_WORD,
-    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
-    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
-    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
-}
-
-CH_UNICODE = {
-    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
-    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
-    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
-    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
-    CATEGORY_WORD: CATEGORY_UNI_WORD,
-    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
-    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
-    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
-}
+OP = Record()
+for name in OPCODES:
+    setattr(OP, name, name)
+
+# unicode codepoint categories (property "\p{Lu}", etc)
+# (these entries must have certain fixed values)
+UNI_CATEGORY_LIST = "- Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs Co - Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So -"
+
+# additional unicode categories (property "\p{Alpha}", etc)
+COMMON_CATEGORY_LIST = "Alpha Alnum ASCII Blank Cntrl Digit Graph LineBreak Lower Print Punct Space Upper Word XDigit"
+
+# build the unicode categories dict
+CATEGORIES = dict((name, value) for value, name in enumerate(UNI_CATEGORY_LIST.split()) if name != "-")
+assert len(CATEGORIES) <= 0x20
+
+# add the unicode supercategories (property "\p{L&}", etc)
+category_number = 0x20
+for name in UNI_CATEGORY_LIST.split():
+    if name == "-" or name[0] in CATEGORIES:
+        continue
+    CATEGORIES[name[0]] = category_number
+    CATEGORIES[name[0] + "&"] = category_number
+    category_number += 1
+
+COMMON_CATEGORY_START = category_number
+for name in COMMON_CATEGORY_LIST.split():
+    CATEGORIES[name] = category_number
+    category_number += 1
 
 # flags
 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
@@ -210,6 +166,8 @@
 SRE_FLAG_UNICODE = 32 # use unicode locale
 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
 SRE_FLAG_DEBUG = 128 # debugging
+SRE_FLAG_REVERSE = 256 # search backwards
+SRE_FLAG_ZEROWIDTH = 512 # permit split on zero-width
 
 # flags for INFO primitive
 SRE_INFO_PREFIX = 1 # has prefix
@@ -217,12 +175,8 @@
 SRE_INFO_CHARSET = 4 # pattern starts with character from given set
 
 if __name__ == "__main__":
-    def dump(f, d, prefix):
-        items = d.items()
-        items.sort(key=lambda a: a[1])
-        for k, v in items:
-            f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
-    f = open("sre_constants.h", "w")
+    f = open("sre_constants.h", "wb")
+
     f.write("""\
 /*
  * Secret Labs' Regular Expression Engine
@@ -240,22 +194,84 @@
 """)
 
     f.write("#define SRE_MAGIC %d\n" % MAGIC)
+    f.write("\n")
+    f.write("/* size of a code word (must be unsigned short or larger, and\n")
+    f.write("   large enough to hold a Py_UNICODE character) */\n")
+    if BYTES_PER_CODE == 4:
+        f.write("typedef unsigned int SRE_CODE;\n")
+    else:
+        f.write("typedef unsigned short SRE_CODE;\n")
+
+    f.write("\n")
+    f.write("#define SRE_BYTES_PER_CODE %d\n" % BYTES_PER_CODE)
+    f.write("#define SRE_BITS_PER_CODE %d\n" % BITS_PER_CODE)
+    f.write("#define SRE_UNLIMITED_REPEATS 0x%X\n" % MAXREPEAT)
+
+    f.write("\n")
+    for name, number, op_type, direction, end_marker in operator_list:
+        f.write("#define SRE_OP_%s %d\n" % (name, number))
+    f.write("#define SRE_MAX_OP %d\n" % (len(operator_list) - 1))
+
+    f.write("\n")
+    f.write("#define SRE_FLAG_TEMPLATE 0x%X\n" % SRE_FLAG_TEMPLATE)
+    f.write("#define SRE_FLAG_IGNORECASE 0x%X\n" % SRE_FLAG_IGNORECASE)
+    f.write("#define SRE_FLAG_LOCALE 0x%X\n" % SRE_FLAG_LOCALE)
+    f.write("#define SRE_FLAG_MULTILINE 0x%X\n" % SRE_FLAG_MULTILINE)
+    f.write("#define SRE_FLAG_DOTALL 0x%X\n" % SRE_FLAG_DOTALL)
+    f.write("#define SRE_FLAG_UNICODE 0x%X\n" % SRE_FLAG_UNICODE)
+    f.write("#define SRE_FLAG_VERBOSE 0x%X\n" % SRE_FLAG_VERBOSE)
+    f.write("#define SRE_FLAG_REVERSE 0x%X\n" % SRE_FLAG_REVERSE)
+    f.write("#define SRE_FLAG_ZEROWIDTH 0x%X\n" % SRE_FLAG_ZEROWIDTH)
+
+    f.write("\n")
+    f.write("#define SRE_INFO_PREFIX 0x%X\n" % SRE_INFO_PREFIX)
+    f.write("#define SRE_INFO_LITERAL 0x%X\n" % SRE_INFO_LITERAL)
+    f.write("#define SRE_INFO_CHARSET 0x%X\n" % SRE_INFO_CHARSET)
+
+    f.write("\n")
+    _categories = sorted(CATEGORIES.items(), key=operator.itemgetter(1))
+    for name, value in ((name, value) for name, value in _categories if value < COMMON_CATEGORY_START):
+        if name.isalnum():
+            f.write("#define SRE_UNI_CAT_%s 0x%X\n" % (name, value))
+
+    f.write("\n")
+    for name, value in ((name, value) for name, value in _categories if value >= COMMON_CATEGORY_START):
+        f.write("#define SRE_CAT_%s 0x%X\n" % (name, value))
+
+    f.write("\n")
+    groups = defaultdict(int)
+    for name, value in ((name, value) for name, value in _categories if value < COMMON_CATEGORY_START):
+        if len(name) == 2 and name.isalpha():
+            groups[name[ : 1]] |= 1 << value
+
+    for name, value in sorted(groups.items()):
+        f.write("#define SRE_CAT_GROUP_%s 0x%08X\n" % (name, value))
+
+    f.write("""
+// info for operator validation
+typedef struct SRE_OpInfo {
+    int type;
+    int direction;
+    int end_marker;
+} SRE_OpInfo;
 
-    dump(f, OPCODES, "SRE_OP")
-    dump(f, ATCODES, "SRE")
-    dump(f, CHCODES, "SRE")
-
-    f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
-    f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
-    f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
-    f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
-    f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
-    f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
-    f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
-
-    f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
-    f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
-    f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
+""")
+    # sort the op_types (putting "INVALID" first) and assign numbers
+    op_types = [(name, number) for number, name in enumerate(sorted(op_types, key=lambda name: ("" if name == "INVALID" else name)))]
+    for name, number in op_types:
+        f.write("#define SRE_TYPE_%s %d\n" % (name, number))
+
+    op_types = dict(op_types)
+    f.write("""
+static SRE_OpInfo op_info[] = {
+""")
+    for name, number, op_type, direction, end_marker in operator_list:
+        if end_marker == "-":
+            end_marker = "0"
+        else:
+            end_marker = "SRE_OP_%s" % end_marker
+        f.write("    {%s, %s, %s}, // SRE_OP_%s\n" % (op_types[op_type], direction, end_marker, name))
+    f.write("};\n")
 
     f.close()
     print "done"
=== modified file Lib/sre_compile.py
--- Lib/sre_compile.py	2008-04-08 21:27:42 +0000
+++ Lib/sre_compile.py	2009-02-03 19:10:17 +0000
@@ -3,7 +3,7 @@
 #
 # convert template to internal format
 #
-# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+# Copyright (c) 1997-2001 by Secret Labs MAGICAB.  All rights reserved.
 #
 # See the sre.py file for information on usage and redistribution.
 #
@@ -11,281 +11,263 @@
 """Internal support module for sre"""
 
 import _sre, sys
-import sre_parse
+
 from sre_constants import *
 
-assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+assert _sre.MAGIC == MAGIC, "MAGICSRE module mismatch"
+
+ASSERT_OP_CODES = {
+    OP.ASSERT: OP.END_ASSERT,
+    OP.ASSERT_NOT: OP.END_ASSERT_NOT,
+}
+
+REPEAT_OP_CODES = {
+    OP.REPEAT_MAX: OP.END_REPEAT_MAX,
+    OP.REPEAT_MIN: OP.END_REPEAT_MIN,
+    OP.REPEAT_POSS: OP.END_REPEAT_POSS,
+}
+
+SINGLE_CHAR_OP_CODES = set([
+    OP.ANY, OP.ANY_ALL,
+    OP.CATEGORY, OP.NOT_CATEGORY,
+    OP.CHARSET, OP.CHARSET_IGNORE, OP.NOT_CHARSET, OP.NOT_CHARSET_IGNORE,
+    OP.LITERAL, OP.LITERAL_IGNORE, OP.NOT_LITERAL, OP.NOT_LITERAL_IGNORE,
+    OP.RANGE, OP.RANGE_IGNORE,
+    OP.SET, OP.NOT_SET,
+])
+
+NORMAL_OP_CODES, REVERSE_OP_CODES = {}, {}
+for op in dir(OP):
+    if not op.startswith("_"):
+        NORMAL_OP_CODES[op] = op
+        if op.endswith("_REV"):
+            REVERSE_OP_CODES[op[ : -4]] = op
+        else:
+            REVERSE_OP_CODES.setdefault(op, op)
 
-if _sre.CODESIZE == 2:
-    MAXCODE = 65535
-else:
-    MAXCODE = 0xFFFFFFFFL
-
-def _identityfunction(x):
-    return x
-
-def set(seq):
-    s = {}
-    for elem in seq:
-        s[elem] = 1
-    return s
-
-_LITERAL_CODES = set([LITERAL, NOT_LITERAL])
-_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
-_SUCCESS_CODES = set([SUCCESS, FAILURE])
-_ASSERT_CODES = set([ASSERT, ASSERT_NOT])
+REPEAT_ONE_OP_CODES = {
+    OP.REPEAT_MAX: OP.REPEAT_ONE_MAX,
+    OP.REPEAT_MIN: OP.REPEAT_ONE_MIN,
+    OP.REPEAT_POSS: OP.REPEAT_ONE_POSS,
+}
+
+CATEGORY_OP_SET = set([OP.CATEGORY, OP.NOT_CATEGORY])
+CHARSET_OP_SET = set([OP.CHARSET, OP.CHARSET_IGNORE, OP.NOT_CHARSET, OP.NOT_CHARSET_IGNORE])
+GROUPREF_OP_SET = set([OP.GROUPREF, OP.GROUPREF_IGNORE])
+LITERAL_OP_SET = set([OP.LITERAL, OP.LITERAL_IGNORE, OP.NOT_LITERAL, OP.NOT_LITERAL_IGNORE])
+POSITION_OP_SET = set([OP.BOUNDARY, OP.END_OF_LINE, OP.END_OF_STRING, OP.END_OF_STRING_LN, OP.NOT_BOUNDARY, OP.START_OF_LINE, OP.START_OF_STRING])
+RANGE_OP_SET = set([OP.NOT_RANGE, OP.NOT_RANGE_IGNORE, OP.RANGE, OP.RANGE_IGNORE])
+REPEAT_OP_SET = set([OP.REPEAT_MAX, OP.REPEAT_MIN, OP.REPEAT_POSS])
+SET_OP_SET = set([OP.SET, OP.SET_IGNORE, OP.NOT_SET, OP.NOT_SET_IGNORE])
+SIMPLE_CATEGORY_OP_SET = set([OP.ANY, OP.ANY_ALL])
 
-def _compile(code, pattern, flags):
+def _compile(code, pattern, flags, info, dir=1):
     # internal: compile a (sub)pattern
     emit = code.append
-    _len = len
-    LITERAL_CODES = _LITERAL_CODES
-    REPEATING_CODES = _REPEATING_CODES
-    SUCCESS_CODES = _SUCCESS_CODES
-    ASSERT_CODES = _ASSERT_CODES
+    literal_op, literal_string = None, []
+    if dir < 0:
+        fix_direction = REVERSE_OP_CODES
+    else:
+        fix_direction = NORMAL_OP_CODES
+    if dir < 0:
+        # Within lookbehind, so reverse the order of the matching
+        pattern = reversed(pattern)
+    def flush_literal():
+        if literal_string:
+            emit_literal_string(code, literal_op, literal_string[ : : dir], fix_direction)
     for op, av in pattern:
-        if op in LITERAL_CODES:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-                emit(_sre.getlower(av, flags))
-            else:
-                emit(OPCODES[op])
-                emit(av)
-        elif op is IN:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-                def fixup(literal, flags=flags):
-                    return _sre.getlower(literal, flags)
-            else:
-                emit(OPCODES[op])
-                fixup = _identityfunction
-            skip = _len(code); emit(0)
-            _compile_charset(av, flags, code, fixup)
-            code[skip] = _len(code) - skip
-        elif op is ANY:
-            if flags & SRE_FLAG_DOTALL:
-                emit(OPCODES[ANY_ALL])
-            else:
-                emit(OPCODES[ANY])
-        elif op in REPEATING_CODES:
-            if flags & SRE_FLAG_TEMPLATE:
-                raise error, "internal: unsupported template operator"
-                emit(OPCODES[REPEAT])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                emit(OPCODES[SUCCESS])
-                code[skip] = _len(code) - skip
-            elif _simple(av) and op is not REPEAT:
-                if op is MAX_REPEAT:
-                    emit(OPCODES[REPEAT_ONE])
-                else:
-                    emit(OPCODES[MIN_REPEAT_ONE])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                emit(OPCODES[SUCCESS])
-                code[skip] = _len(code) - skip
+        if op in SET_OP_SET:
+            op, av = _optimize_set(op, av, flags)
+        if op == literal_op:
+            literal_string.append(av)
+        else:
+            flush_literal()
+            if op in (OP.LITERAL, OP.LITERAL_IGNORE):
+                literal_op, literal_string = op, [av]
             else:
-                emit(OPCODES[REPEAT])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                code[skip] = _len(code) - skip
-                if op is MAX_REPEAT:
-                    emit(OPCODES[MAX_UNTIL])
+                literal_op, literal_string = None, []
+                if op in ASSERT_OP_CODES:
+                    # <assert> <skip to end> ... <end_assert>
+                    emit(OPCODES[op])
+                    skip = len(code); emit(0)
+                    _compile(code, av[1], flags, info, av[0])
+                    emit(OPCODES[ASSERT_OP_CODES[op]])
+                    code[skip] = len(code) - skip
+                elif op == OP.ATOMIC:
+                    # <ATOMIC> ... <END_ATOMIC>
+                    emit(OPCODES[OP.ATOMIC])
+                    _compile(code, av[1], flags, info, dir)
+                    emit(OPCODES[OP.END_ATOMIC])
+                elif op == OP.BRANCH:
+                    # <BRANCH> <skip to next> ... <JUMP> <skip to end> <skip to next> ... <JUMP> <skip to end> 0
+                    emit(OPCODES[op])
+                    tail = []
+                    tailappend = tail.append
+                    for av in av[1]:
+                        skip = len(code); emit(0)
+                        _compile(code, av, flags, info, dir)
+                        emit(OPCODES[OP.JUMP])
+                        tailappend(len(code)); emit(0)
+                        code[skip] = len(code) - skip
+                    emit(0) # end of branchs
+                    for tail in tail:
+                        code[tail] = len(code) - tail
+                elif op in CATEGORY_OP_SET:
+                    # <category> category
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av)
+                elif op in CHARSET_OP_SET:
+                    # <charset> skip charset
+                    emit(OPCODES[fix_direction[op]])
+                    skip = len(code); emit(0)
+                    _compile_charset(code, av)
+                    code[skip] = len(code) - skip
+                elif op in GROUPREF_OP_SET:
+                    # <groupref> group_id
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av - 1)
+                elif op == OP.GROUPREF_EXISTS:
+                    # <GROUPREF_EXISTS> group_id <skip to code_no> code_yes <JUMP> <skip to end> code_no
+                    emit(OPCODES[op])
+                    emit(av[0] - 1)
+                    skipyes = len(code); emit(0)
+                    _compile(code, av[1], flags, info, dir)
+                    if av[2]:
+                        emit(OPCODES[OP.JUMP])
+                        skipno = len(code); emit(0)
+                        code[skipyes] = len(code) - skipyes + 1
+                        _compile(code, av[2], flags, info, dir)
+                        code[skipno] = len(code) - skipno
+                    else:
+                        code[skipyes] = len(code) - skipyes + 1
+                elif op in LITERAL_OP_SET:
+                    # <literal> code
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av)
+                elif op in POSITION_OP_SET:
+                    # <position>
+                    emit(OPCODES[fix_direction[op]])
+                elif op in RANGE_OP_SET:
+                    # <range> min max
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av[0])
+                    emit(av[1])
+                elif op in REPEAT_OP_SET:
+                    if flags & SRE_FLAG_TEMPLATE:
+                        raise error("internal: unsupported template operator")
+                    else:
+                        single = get_single_character(av[2])
+                        if single:
+                            # <repeat_one> <skip to end> <min> <max> ...
+                            emit(OPCODES[fix_direction[REPEAT_ONE_OP_CODES[op]]])
+                            skip = len(code); emit(0)
+                            emit(av[0])
+                            emit(av[1])
+                            _compile(code, single, flags, info, dir)
+                            code[skip] = len(code) - skip
+                        else:
+                            # <repeat> <skip to end> <min> <max> ... <end_repeat> <skip to start>
+                            emit(OPCODES[fix_direction[op]])
+                            skip = len(code); emit(0)
+                            emit(av[0])
+                            emit(av[1])
+                            _compile(code, av[2], flags, info, dir)
+                            emit(OPCODES[fix_direction[REPEAT_OP_CODES[op]]])
+                            offset = len(code) - skip
+                            code[skip] = offset
+                            emit(offset)
+                elif op in SET_OP_SET:
+                    # <set> set
+                    emit(OPCODES[fix_direction[op]])
+                    _compile_set(code, av)
+                elif op in SIMPLE_CATEGORY_OP_SET:
+                    # <category>
+                    emit(OPCODES[op])
+                elif op == OP.SUBPATTERN:
+                    if av[0]:
+                        number_id, name_id = av[0]
+                        info.group_count += 1
+                        number_start_mark, number_end_mark = number_id * 2 - 2, number_id * 2 - 1
+                        name_start_mark, name_end_mark = name_id * 2 - 2, name_id * 2 - 1
+                        if dir < 0:
+                            number_start_mark, number_end_mark = number_end_mark, number_start_mark
+                            name_start_mark, name_end_mark = name_end_mark, name_start_mark
+                        # <MARK> <numbered_id> <named_id>
+                        emit(OPCODES[OP.MARK])
+                        emit(number_start_mark)
+                        emit(name_start_mark)
+                    _compile(code, av[1], flags, info, dir)
+                    if av[0]:
+                        # <MARK> <numbered_id> <named_id>
+                        emit(OPCODES[OP.MARK])
+                        emit(number_end_mark)
+                        emit(name_end_mark)
                 else:
-                    emit(OPCODES[MIN_UNTIL])
-        elif op is SUBPATTERN:
-            if av[0]:
-                emit(OPCODES[MARK])
-                emit((av[0]-1)*2)
-            # _compile_info(code, av[1], flags)
-            _compile(code, av[1], flags)
-            if av[0]:
-                emit(OPCODES[MARK])
-                emit((av[0]-1)*2+1)
-        elif op in SUCCESS_CODES:
-            emit(OPCODES[op])
-        elif op in ASSERT_CODES:
-            emit(OPCODES[op])
-            skip = _len(code); emit(0)
-            if av[0] >= 0:
-                emit(0) # look ahead
-            else:
-                lo, hi = av[1].getwidth()
-                if lo != hi:
-                    raise error, "look-behind requires fixed-width pattern"
-                emit(lo) # look behind
-            _compile(code, av[1], flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = _len(code) - skip
-        elif op is CALL:
+                    raise ValueError("unsupported operand type: %s" % op)
+    flush_literal()
+
+def emit_literal_string(code, literal_op, literal_string, fix_direction):
+    emit = code.append
+    if len(literal_string) > 1:
+        # a string
+        if literal_op == OP.LITERAL_IGNORE:
+            # <literal_string> length ...
+            emit(OPCODES[fix_direction[OP.LITERAL_STRING_IGNORE]])
+        else:
+            # <literal_string> length ...
+            emit(OPCODES[fix_direction[OP.LITERAL_STRING]])
+        emit(len(literal_string))
+        code.extend(literal_string)
+    else:
+        # <literal> code
+        # a single character
+        emit(OPCODES[fix_direction[literal_op]])
+        emit(literal_string[0])
+
+def get_single_character(pattern):
+    if len(pattern) == 1 and pattern[0][0] in SINGLE_CHAR_OP_CODES:
+        return pattern
+    return None
+
+def _compile_set(code, charset):
+    emit = code.append
+    skip_set = len(code); emit(0)
+    for op, av in charset:
+        if op in CHARSET_OP_SET:
+            # <charset> skip charset
             emit(OPCODES[op])
-            skip = _len(code); emit(0)
-            _compile(code, av, flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = _len(code) - skip
-        elif op is AT:
+            skip = len(code); emit(0)
+            _compile_charset(code, av)
+            code[skip] = len(code) - skip
+        elif op in CATEGORY_OP_SET:
+            # <category> category
             emit(OPCODES[op])
-            if flags & SRE_FLAG_MULTILINE:
-                av = AT_MULTILINE.get(av, av)
-            if flags & SRE_FLAG_LOCALE:
-                av = AT_LOCALE.get(av, av)
-            elif flags & SRE_FLAG_UNICODE:
-                av = AT_UNICODE.get(av, av)
-            emit(ATCODES[av])
-        elif op is BRANCH:
+            emit(av)
+        elif op == OP.LITERAL:
+            # <literal> code
             emit(OPCODES[op])
-            tail = []
-            tailappend = tail.append
-            for av in av[1]:
-                skip = _len(code); emit(0)
-                # _compile_info(code, av, flags)
-                _compile(code, av, flags)
-                emit(OPCODES[JUMP])
-                tailappend(_len(code)); emit(0)
-                code[skip] = _len(code) - skip
-            emit(0) # end of branch
-            for tail in tail:
-                code[tail] = _len(code) - tail
-        elif op is CATEGORY:
+            emit(av)
+        elif op == OP.RANGE:
+            # <range> min max
             emit(OPCODES[op])
-            if flags & SRE_FLAG_LOCALE:
-                av = CH_LOCALE[av]
-            elif flags & SRE_FLAG_UNICODE:
-                av = CH_UNICODE[av]
-            emit(CHCODES[av])
-        elif op is GROUPREF:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-            else:
-                emit(OPCODES[op])
-            emit(av-1)
-        elif op is GROUPREF_EXISTS:
+            emit(av[0])
+            emit(av[1])
+        elif op in SIMPLE_CATEGORY_OP_SET:
+            # <category>
             emit(OPCODES[op])
-            emit(av[0]-1)
-            skipyes = _len(code); emit(0)
-            _compile(code, av[1], flags)
-            if av[2]:
-                emit(OPCODES[JUMP])
-                skipno = _len(code); emit(0)
-                code[skipyes] = _len(code) - skipyes + 1
-                _compile(code, av[2], flags)
-                code[skipno] = _len(code) - skipno
-            else:
-                code[skipyes] = _len(code) - skipyes + 1
         else:
-            raise ValueError, ("unsupported operand type", op)
+            raise error("internal: unsupported set member: %s" % op)
+    code[skip_set] = len(code) - skip_set
 
-def _compile_charset(charset, flags, code, fixup=None):
-    # compile charset subprogram
-    emit = code.append
-    if fixup is None:
-        fixup = _identityfunction
-    for op, av in _optimize_charset(charset, fixup):
-        emit(OPCODES[op])
-        if op is NEGATE:
-            pass
-        elif op is LITERAL:
-            emit(fixup(av))
-        elif op is RANGE:
-            emit(fixup(av[0]))
-            emit(fixup(av[1]))
-        elif op is CHARSET:
-            code.extend(av)
-        elif op is BIGCHARSET:
-            code.extend(av)
-        elif op is CATEGORY:
-            if flags & SRE_FLAG_LOCALE:
-                emit(CHCODES[CH_LOCALE[av]])
-            elif flags & SRE_FLAG_UNICODE:
-                emit(CHCODES[CH_UNICODE[av]])
-            else:
-                emit(CHCODES[av])
-        else:
-            raise error, "internal: unsupported set operator"
-    emit(OPCODES[FAILURE])
-
-def _optimize_charset(charset, fixup):
-    # internal: optimize character set
-    out = []
-    outappend = out.append
-    charmap = [0]*256
-    try:
-        for op, av in charset:
-            if op is NEGATE:
-                outappend((op, av))
-            elif op is LITERAL:
-                charmap[fixup(av)] = 1
-            elif op is RANGE:
-                for i in range(fixup(av[0]), fixup(av[1])+1):
-                    charmap[i] = 1
-            elif op is CATEGORY:
-                # XXX: could append to charmap tail
-                return charset # cannot compress
-    except IndexError:
-        # character set contains unicode characters
-        return _optimize_unicode(charset, fixup)
-    # compress character map
-    i = p = n = 0
-    runs = []
-    runsappend = runs.append
-    for c in charmap:
-        if c:
-            if n == 0:
-                p = i
-            n = n + 1
-        elif n:
-            runsappend((p, n))
-            n = 0
-        i = i + 1
-    if n:
-        runsappend((p, n))
-    if len(runs) <= 2:
-        # use literal/range
-        for p, n in runs:
-            if n == 1:
-                outappend((LITERAL, p))
-            else:
-                outappend((RANGE, (p, p+n-1)))
-        if len(out) < len(charset):
-            return out
-    else:
-        # use bitmap
-        data = _mk_bitmap(charmap)
-        outappend((CHARSET, data))
-        return out
-    return charset
-
-def _mk_bitmap(bits):
-    data = []
-    dataappend = data.append
-    if _sre.CODESIZE == 2:
-        start = (1, 0)
-    else:
-        start = (1L, 0L)
-    m, v = start
-    for c in bits:
-        if c:
-            v = v + m
-        m = m + m
-        if m > MAXCODE:
-            dataappend(v)
-            m, v = start
-    return data
+# The characters may be mapped to a bitmap.
 
-# To represent a big charset, first a bitmap of all characters in the
+# To represent a charset, first a bitmap of all characters in the
 # set is constructed. Then, this bitmap is sliced into chunks of 256
 # characters, duplicate chunks are eliminated, and each chunk is
 # given a number. In the compiled expression, the charset is
-# represented by a 16-bit word sequence, consisting of one word for
-# the number of different chunks, a sequence of 256 bytes (128 words)
-# of chunk numbers indexed by their original chunk position, and a
-# sequence of chunks (16 words each).
+# represented by a codeword sequence, consisting of one codeword for
+# the maximum character code, a sequence of chunk numbers
+# (2 per codeword), and a sequence of chunks (8 codewords each).
 
 # Compression is normally good: in a typical charset, large ranges of
 # Unicode will be either completely excluded (e.g. if only cyrillic
@@ -293,215 +275,145 @@
 # subranges of Kanji match). These ranges will be represented by
 # chunks of all one-bits or all zero-bits.
 
-# Matching can be also done efficiently: the more significant byte of
+# Matching can be also done efficiently: the most significant bits of
 # the Unicode character is an index into the chunk number, and the
-# less significant byte is a bit index in the chunk (just like the
-# CHARSET matching).
+# least significant byte is a bit index into the chunk.
 
-# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
-# of the basic multilingual plane; an efficient representation
-# for all of UTF-16 has not yet been developed. This means,
-# in particular, that negated charsets cannot be represented as
-# bigcharsets.
-
-def _optimize_unicode(charset, fixup):
-    try:
-        import array
-    except ImportError:
-        return charset
-    charmap = [0]*65536
-    negate = 0
-    try:
-        for op, av in charset:
-            if op is NEGATE:
-                negate = 1
-            elif op is LITERAL:
-                charmap[fixup(av)] = 1
-            elif op is RANGE:
-                for i in xrange(fixup(av[0]), fixup(av[1])+1):
-                    charmap[i] = 1
-            elif op is CATEGORY:
-                # XXX: could expand category
-                return charset # cannot compress
-    except IndexError:
-        # non-BMP characters
-        return charset
-    if negate:
-        if sys.maxunicode != 65535:
-            # XXX: negation does not work with big charsets
-            return charset
-        for i in xrange(65536):
-            charmap[i] = not charmap[i]
-    comps = {}
-    mapping = [0]*256
-    block = 0
-    data = []
-    for i in xrange(256):
-        chunk = tuple(charmap[i*256:(i+1)*256])
-        new = comps.setdefault(chunk, block)
-        mapping[i] = new
-        if new == block:
-            block = block + 1
-            data = data + _mk_bitmap(chunk)
-    header = [block]
-    if _sre.CODESIZE == 2:
-        code = 'H'
-    else:
-        code = 'I'
-    # Convert block indices to byte array of 256 bytes
-    mapping = array.array('b', mapping).tostring()
-    # Convert byte array to word array
-    mapping = array.array(code, mapping)
-    assert mapping.itemsize == _sre.CODESIZE
-    header = header + mapping.tolist()
-    data[0:0] = header
-    return [(BIGCHARSET, data)]
-
-def _simple(av):
-    # check if av is a "simple" operator
-    lo, hi = av[2].getwidth()
-    if lo == 0 and hi == MAXREPEAT:
-        raise error, "nothing to repeat"
-    return lo == hi == 1 and av[2][0][0] != SUBPATTERN
-
-def _compile_info(code, pattern, flags):
-    # internal: compile an info block.  in the current version,
-    # this contains min/max pattern width, and an optional literal
-    # prefix or a character map
-    lo, hi = pattern.getwidth()
-    if lo == 0:
-        return # not worth it
-    # look for a literal prefix
-    prefix = []
-    prefixappend = prefix.append
-    prefix_skip = 0
-    charset = [] # not used
-    charsetappend = charset.append
-    if not (flags & SRE_FLAG_IGNORECASE):
-        # look for literal prefix
-        for op, av in pattern.data:
-            if op is LITERAL:
-                if len(prefix) == prefix_skip:
-                    prefix_skip = prefix_skip + 1
-                prefixappend(av)
-            elif op is SUBPATTERN and len(av[1]) == 1:
-                op, av = av[1][0]
-                if op is LITERAL:
-                    prefixappend(av)
-                else:
-                    break
+# a charset is a 3-tuple, consisting of the maximum character code,
+# a list of indexes and a list of 256-bit bitsets
+def _compile_charset(code, charset):
+    # the maximum character code
+    code.append(charset[0])
+    # pack the 16-bit indexes into 32-bit codewords
+    # (adding an extra index ensures that zip() doesn't drop
+    # the last one if there are an odd number of them)
+    for lo, hi in zip(charset[1][0 : : 2], charset[1][1 : : 2] + [0]):
+        code.append(lo | (hi << 16))
+    # pack the 256-bit bitsets to 32-bit codewords
+    for chunk in charset[2]:
+        for i in range(256 // BITS_PER_CODE):
+            code.append(chunk & MAXCODE)
+            chunk >>= BITS_PER_CODE
+
+def _ones(n):
+    return (1 << n) - 1
+
+def _optimize_set(set_op, set_members, flags):
+    # consolidate the ranges (the bounds are inclusive)
+    charset = set()
+    categories = []
+    for o, a in set_members:
+        if o == OP.LITERAL:
+            charset.add(a)
+        elif o == OP.RANGE:
+            for c in xrange(a[0], a[1] + 1):
+                charset.add(c)
+        else:
+            categories.append((o, a))
+    categories = sorted(set(categories))
+    # convert charset to list of ranges
+    ranges = []
+    start, end = None, None
+    for c in sorted(charset):
+        try:
+            if c == end + 1:
+                end = c
             else:
-                break
-        # if no prefix, look for charset prefix
-        if not prefix and pattern.data:
-            op, av = pattern.data[0]
-            if op is SUBPATTERN and av[1]:
-                op, av = av[1][0]
-                if op is LITERAL:
-                    charsetappend((op, av))
-                elif op is BRANCH:
-                    c = []
-                    cappend = c.append
-                    for p in av[1]:
-                        if not p:
-                            break
-                        op, av = p[0]
-                        if op is LITERAL:
-                            cappend((op, av))
-                        else:
-                            break
-                    else:
-                        charset = c
-            elif op is BRANCH:
-                c = []
-                cappend = c.append
-                for p in av[1]:
-                    if not p:
-                        break
-                    op, av = p[0]
-                    if op is LITERAL:
-                        cappend((op, av))
-                    else:
-                        break
-                else:
-                    charset = c
-            elif op is IN:
-                charset = av
-##     if prefix:
-##         print "*** PREFIX", prefix, prefix_skip
-##     if charset:
-##         print "*** CHARSET", charset
-    # add an info block
-    emit = code.append
-    emit(OPCODES[INFO])
-    skip = len(code); emit(0)
-    # literal flag
-    mask = 0
-    if prefix:
-        mask = SRE_INFO_PREFIX
-        if len(prefix) == prefix_skip == len(pattern.data):
-            mask = mask + SRE_INFO_LITERAL
-    elif charset:
-        mask = mask + SRE_INFO_CHARSET
-    emit(mask)
-    # pattern length
-    if lo < MAXCODE:
-        emit(lo)
-    else:
-        emit(MAXCODE)
-        prefix = prefix[:MAXCODE]
-    if hi < MAXCODE:
-        emit(hi)
+                ranges.append((start, end))
+                start, end = c, c
+        except TypeError:
+            start, end = c, c
+    if start is not None:
+        ranges.append((start, end))
+    # try to optimise the set
+    if len(ranges) <= 1:
+        # only a few ranges
+        for r in ranges:
+            if r[0] == r[1]:
+                # a range of 1 character!
+                categories.append((OP.LITERAL, r[0]))
+            else:
+                categories.append((OP.RANGE, r))
     else:
-        emit(0)
-    # add literal prefix
-    if prefix:
-        emit(len(prefix)) # length
-        emit(prefix_skip) # skip
-        code.extend(prefix)
-        # generate overlap table
-        table = [-1] + ([0]*len(prefix))
-        for i in xrange(len(prefix)):
-            table[i+1] = table[i]+1
-            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
-                table[i+1] = table[table[i+1]-1]+1
-        code.extend(table[1:]) # don't store first entry
-    elif charset:
-        _compile_charset(charset, flags, code)
-    code[skip] = len(code) - skip
-
-try:
-    unicode
-except NameError:
-    STRING_TYPES = (type(""),)
-else:
-    STRING_TYPES = (type(""), type(unicode("")))
-
-def isstring(obj):
-    for tp in STRING_TYPES:
-        if isinstance(obj, tp):
-            return 1
-    return 0
+        # many ranges, so use a charset instead
+        max_char = ranges[-1][1]
+        subset_list = [0] * (max_char // 256 + 1)
+        for lo, hi in ranges:
+            base = lo - lo % 256
+            while lo <= hi:
+                subset_list[base // 256] |= _ones(min(hi - base + 1, 256)) ^ _ones(lo % 256)
+                base += 256
+                lo = base
+        # build the index and chunks, consolidating duplicate subsets/chunks
+        index_list, chunk_list = [], []
+        for subset in subset_list:
+            try:
+                index_list.append(chunk_list.index(subset))
+            except ValueError:
+                index_list.append(len(chunk_list))
+                chunk_list.append(subset)
+        categories.append((OP.CHARSET, (max_char, index_list, chunk_list)))
+    if len(categories) == 1:
+        # only 1 test in the set, so don't use a set
+        cat = categories[0]
+        if set_op.startswith("NOT_"):
+            cat = not_op(cat)
+        if set_op.endswith("_IGNORE"):
+            cat = ignore_op(cat)
+        return cat
+    return set_op, categories
+
+def create_charset(iterable):
+    # (UNUSED)
+    # enumerate the characters and create the subsets
+    subset_list = []
+    max_code = 0
+    for ch in iterable:
+        ch = ord(ch)
+        max_code = max(max_code, ch)
+        hi, lo = divmod(ch, 256)
+        mask = 1 << lo
+        try:
+            subset_list[hi] |= mask
+        except IndexError:
+            subset_list.extend([0] * (hi - len(subset_list)))
+            subset_list.append(mask)
+    # optimise the subsets
+    index_list, chunk_list = [], []
+    for subset in subset_list:
+        try:
+            index_list.append(chunk_list.index(subset))
+        except ValueError:
+            index_list.append(len(chunk_list))
+            chunk_list.append(subset)
+    return max_code, index_list, chunk_list
 
 def _code(p, flags):
-
     flags = p.pattern.flags | flags
     code = []
 
     # compile info block
-    _compile_info(code, p, flags)
+    #_compile_info(code, p, flags)
 
     # compile the pattern
-    _compile(code, p.data, flags)
-
-    code.append(OPCODES[SUCCESS])
+    class Record(object):
+        pass
+    info = Record()
+    info.group_count = 0
+    if flags & SRE_FLAG_REVERSE:
+        dir = -1
+    else:
+        dir = 1
+    _compile(code, p.data, flags, info, dir)
+    code.append(OPCODES[OP.SUCCESS])
 
     return code
 
 def compile(p, flags=0):
     # internal: convert pattern list to internal format
 
-    if isstring(p):
+    if isinstance(p, basestring):
+        import sre_parse
         pattern = p
         p = sre_parse.parse(p, flags)
     else:
@@ -511,20 +423,11 @@
 
     # print code
 
-    # XXX: <fl> get rid of this limitation!
-    if p.pattern.groups > 100:
-        raise AssertionError(
-            "sorry, but this version only supports 100 named groups"
-            )
-
     # map in either direction
-    groupindex = p.pattern.groupdict
-    indexgroup = [None] * p.pattern.groups
-    for k, i in groupindex.items():
-        indexgroup[i] = k
-
-    return _sre.compile(
-        pattern, flags | p.pattern.flags, code,
-        p.pattern.groups-1,
-        groupindex, indexgroup
-        )
+    groupindex = p.pattern.named_groups
+    indexgroup = [None] * (max(groupindex.values() + [-1]) + 1)
+
+    for name, index in groupindex.items():
+        indexgroup[index] = name
+
+    return _sre.compile(pattern, flags | p.pattern.flags, code, p.pattern.groups, groupindex, indexgroup)
=== modified file Lib/sre_parse.py
--- Lib/sre_parse.py	2008-05-27 01:18:39 +0000
+++ Lib/sre_parse.py	2009-02-03 21:38:45 +0000
@@ -15,81 +15,66 @@
 import sys
 
 from sre_constants import *
+import unicodedata
 
-def set(seq):
-    s = {}
-    for elem in seq:
-        s[elem] = 1
-    return s
-
-SPECIAL_CHARS = ".\\[{()*+?^$|"
-REPEAT_CHARS = "*+?{"
-
-DIGITS = set("0123456789")
-
-OCTDIGITS = set("01234567")
-HEXDIGITS = set("0123456789abcdefABCDEF")
-
-WHITESPACE = set(" \t\n\r\v\f")
+SPECIAL_CHARS = set(".\\[{()*+?^$|")
+REPEAT_CHARS = set("*+?{")
+WHITESPACE_CHARS = set(" \t\n\r\v\f")
 
 ESCAPES = {
-    r"\a": (LITERAL, ord("\a")),
-    r"\b": (LITERAL, ord("\b")),
-    r"\f": (LITERAL, ord("\f")),
-    r"\n": (LITERAL, ord("\n")),
-    r"\r": (LITERAL, ord("\r")),
-    r"\t": (LITERAL, ord("\t")),
-    r"\v": (LITERAL, ord("\v")),
-    r"\\": (LITERAL, ord("\\"))
+    r"\a": (OP.LITERAL, ord("\a")),
+    r"\b": (OP.LITERAL, ord("\b")),
+    r"\f": (OP.LITERAL, ord("\f")),
+    r"\n": (OP.LITERAL, ord("\n")),
+    r"\r": (OP.LITERAL, ord("\r")),
+    r"\t": (OP.LITERAL, ord("\t")),
+    r"\v": (OP.LITERAL, ord("\v")),
+    r"\\": (OP.LITERAL, ord("\\")),
 }
 
-CATEGORIES = {
-    r"\A": (AT, AT_BEGINNING_STRING), # start of string
-    r"\b": (AT, AT_BOUNDARY),
-    r"\B": (AT, AT_NON_BOUNDARY),
-    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
-    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
-    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
-    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
-    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
-    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
-    r"\Z": (AT, AT_END_STRING), # end of string
+POSITIONS = {
+    r"\A": (OP.START_OF_STRING, None),
+    r"\b": (OP.BOUNDARY, None),
+    r"\B": (OP.NOT_BOUNDARY, None),
+    r"\Z": (OP.END_OF_STRING, None),
+}
+
+STD_CATEGORIES = {
+    r"\d": (OP.CATEGORY, CATEGORIES["Digit"]),
+    r"\D": (OP.NOT_CATEGORY, CATEGORIES["Digit"]),
+    r"\s": (OP.CATEGORY, CATEGORIES["Space"]),
+    r"\S": (OP.NOT_CATEGORY, CATEGORIES["Space"]),
+    r"\w": (OP.CATEGORY, CATEGORIES["Word"]),
+    r"\W": (OP.NOT_CATEGORY, CATEGORIES["Word"]),
 }
 
 FLAGS = {
-    # standard flags
     "i": SRE_FLAG_IGNORECASE,
     "L": SRE_FLAG_LOCALE,
     "m": SRE_FLAG_MULTILINE,
+    "r": SRE_FLAG_REVERSE,
     "s": SRE_FLAG_DOTALL,
     "x": SRE_FLAG_VERBOSE,
-    # extensions
     "t": SRE_FLAG_TEMPLATE,
     "u": SRE_FLAG_UNICODE,
+    "z": SRE_FLAG_ZEROWIDTH,
 }
 
+SCOPED_FLAGS_MASK = SRE_FLAG_IGNORECASE | SRE_FLAG_MULTILINE | SRE_FLAG_DOTALL | SRE_FLAG_VERBOSE
+
 class Pattern:
-    # master pattern object.  keeps track of global attributes
+    # master pattern object. keeps track of global attributes
     def __init__(self):
         self.flags = 0
-        self.open = []
-        self.groups = 1
-        self.groupdict = {}
-    def opengroup(self, name=None):
-        gid = self.groups
-        self.groups = gid + 1
+        self.groups = 0
+        self.named_groups = {}
+        self.fix_list = []
+    def new_group(self, name=None):
+        self.groups += 1
+        group_number = self.groups
         if name is not None:
-            ogid = self.groupdict.get(name, None)
-            if ogid is not None:
-                raise error, ("redefinition of group name %s as group %d; "
-                              "was group %d" % (repr(name), gid,  ogid))
-            self.groupdict[name] = gid
-        self.open.append(gid)
-        return gid
-    def closegroup(self, gid):
-        self.open.remove(gid)
-    def checkgroup(self, gid):
-        return gid < self.groups and gid not in self.open
+            self.named_groups.setdefault(name, len(self.named_groups))
+        return group_number, name
 
 class SubPattern:
     # a subpattern, in intermediate form
@@ -101,23 +86,28 @@
         self.width = None
     def dump(self, level=0):
         nl = 1
-        seqtypes = type(()), type([])
+        seqtypes = tuple, list
         for op, av in self.data:
             print level*"  " + op,; nl = 0
-            if op == "in":
+            if op == OP.SET:
                 # member sublanguage
                 print; nl = 1
                 for op, a in av:
                     print (level+1)*"  " + op, a
-            elif op == "branch":
+            elif op == OP.NOT_SET:
+                # member sublanguage
+                print; nl = 1
+                for op, a in av:
+                    print (level+1)*"  " + op, a
+            elif op == OP.BRANCH:
                 print; nl = 1
                 i = 0
                 for a in av[1]:
                     if i > 0:
                         print level*"  " + "or"
                     a.dump(level+1); nl = 1
-                    i = i + 1
-            elif type(av) in seqtypes:
+                    i += 1
+            elif isinstance(av, seqtypes):
                 for a in av:
                     if isinstance(a, SubPattern):
                         if not nl: print
@@ -143,363 +133,434 @@
         self.data.insert(index, code)
     def append(self, code):
         self.data.append(code)
-    def getwidth(self):
-        # determine the width (min, max) for this subpattern
-        if self.width:
-            return self.width
-        lo = hi = 0L
-        UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
-        REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
-        for op, av in self.data:
-            if op is BRANCH:
-                i = sys.maxint
-                j = 0
-                for av in av[1]:
-                    l, h = av.getwidth()
-                    i = min(i, l)
-                    j = max(j, h)
-                lo = lo + i
-                hi = hi + j
-            elif op is CALL:
-                i, j = av.getwidth()
-                lo = lo + i
-                hi = hi + j
-            elif op is SUBPATTERN:
-                i, j = av[1].getwidth()
-                lo = lo + i
-                hi = hi + j
-            elif op in REPEATCODES:
-                i, j = av[2].getwidth()
-                lo = lo + long(i) * av[0]
-                hi = hi + long(j) * av[1]
-            elif op in UNITCODES:
-                lo = lo + 1
-                hi = hi + 1
-            elif op == SUCCESS:
-                break
-        self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
-        return self.width
 
 class Tokenizer:
     def __init__(self, string):
         self.string = string
         self.index = 0
-        self.__next()
-    def __next(self):
-        if self.index >= len(self.string):
+        self._next()
+    def _next(self):
+        try:
+            char = self.string[self.index]
+            if char == "\\":
+                try:
+                    char += self.string[self.index + 1]
+                except IndexError:
+                    raise error("bad escape (end of line)")
+            self.index += len(char)
+            self.next = char
+        except IndexError:
             self.next = None
-            return
-        char = self.string[self.index]
-        if char[0] == "\\":
-            try:
-                c = self.string[self.index + 1]
-            except IndexError:
-                raise error, "bogus escape (end of line)"
-            char = char + c
-        self.index = self.index + len(char)
-        self.next = char
-    def match(self, char, skip=1):
-        if char == self.next:
-            if skip:
-                self.__next()
-            return 1
-        return 0
+    def match(self, char, skip=True):
+        if char != self.next:
+            return False
+        if skip:
+            self._next()
+        return True
     def get(self):
         this = self.next
-        self.__next()
+        self._next()
         return this
     def tell(self):
         return self.index, self.next
     def seek(self, index):
         self.index, self.next = index
 
-def isident(char):
-    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
+def is_name(name):
+    # check that group name is a valid string
+    return (name[0] == "_" or name[0].isalpha()) and all(char == "_" or char.isalnum() for char in name[1 : ])
 
-def isdigit(char):
-    return "0" <= char <= "9"
+# names can be delimited in a number of ways
+NAME_DELIMITERS = {"<": ">", "{": "}"}
 
-def isname(name):
-    # check that group name is a valid string
-    if not isident(name[0]):
-        return False
-    for char in name[1:]:
-        if not isident(char) and not isdigit(char):
-            return False
-    return True
+def hex_escape(source, escape, max_digits):
+    # hexadecimal escape
+    if escape == r"\x" and source.next in NAME_DELIMITERS:
+        # hex escape \x{n}
+        start_delimiter = source.get()
+        end_delimiter = NAME_DELIMITERS[start_delimiter]
+        while source.next in HEXDIGITS:
+            digits += source.get()
+        if not 1 <= len(digits) <= 8 or not source.match(end_delimiter):
+            raise error("bad escape: %s" % (escape + start_delimiter + digits))
+    else:
+        digits = ""
+        while source.next in HEXDIGITS and len(digits) < max_digits:
+            digits += source.get()
+        if len(digits) != max_digits:
+            raise error("bad escape: %s" % (escape + digits))
+    return int(digits, 16)
+
+def oct_escape(source, escape, digits):
+    # octal escape
+    while source.next in OCTDIGITS and len(digits) < 3:
+        digits += source.get()
+    try:
+        return int(digits, 8) & 0xFF
+    except ValueError:
+        raise error("bad escape: %s" % (escape + digits))
 
-def _class_escape(source, escape):
+def parse_name(source, terminator, name_type, prefix):
+    name = ""
+    while True:
+        char = source.get()
+        if char is None:
+            raise error("unterminated %s name: %s" % (name_type, prefix))
+        if char == terminator:
+            break
+        name += char
+    return name
+
+HEX_ESCAPE_LENGTH = {"x": 2, "u": 4, "U": 8}
+
+def class_escape(source, escape):
     # handle escape code inside character class
-    code = ESCAPES.get(escape)
-    if code:
-        return code
-    code = CATEGORIES.get(escape)
+    code = STD_CATEGORIES.get(escape) or ESCAPES.get(escape)
     if code:
         return code
     try:
-        c = escape[1:2]
-        if c == "x":
-            # hexadecimal escape (exactly two digits)
-            while source.next in HEXDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            escape = escape[2:]
-            if len(escape) != 2:
-                raise error, "bogus escape: %s" % repr("\\" + escape)
-            return LITERAL, int(escape, 16) & 0xff
+        c = escape[1 : 2]
+        if c in HEX_ESCAPE_LENGTH:
+            # hex escape
+            return OP.LITERAL, hex_escape(source, escape, HEX_ESCAPE_LENGTH[c])
+        elif c == "o":
+            # octal escape
+            return OP.LITERAL, oct_escape(source, escape, "")
         elif c in OCTDIGITS:
-            # octal escape (up to three digits)
-            while source.next in OCTDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            escape = escape[1:]
-            return LITERAL, int(escape, 8) & 0xff
+            # octal escape
+            return OP.LITERAL, oct_escape(source, escape[ : 1], c)
         elif c in DIGITS:
-            raise error, "bogus escape: %s" % repr(escape)
-        if len(escape) == 2:
-            return LITERAL, ord(escape[1])
+            raise error("bad escape: %s" % escape)
+        elif c == "N":
+            # named character
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing character name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "character", escape + delimiter)
+            try:
+                return OP.LITERAL, ord(unicodedata.lookup(name))
+            except KeyError:
+                raise error("bad character name: %s" % name)
+        elif c == "p":
+            # character property
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing property name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "property", escape + delimiter)
+            try:
+                return OP.CATEGORY, CATEGORIES[name]
+            except KeyError:
+                raise error("bad property name: %s" % name)
+        else:
+            return OP.LITERAL, ord(c)
     except ValueError:
         pass
-    raise error, "bogus escape: %s" % repr(escape)
+    raise error("bad escape: %s" % escape)
+
+# group references can be delimited in a number of ways
+GROUP_DELIMITERS = {"<": ">", "{": "}", "'": "'", '"': '"'}
+
+# group references can be relative
+GROUP_DIRECTION = {"+": 1, "-": -1}
 
-def _escape(source, escape, state):
+def escape(source, escape, state):
     # handle escape code in expression
-    code = CATEGORIES.get(escape)
-    if code:
-        return code
-    code = ESCAPES.get(escape)
+    # group references returned as list instead of tuple so that they can be fixed later
+    code = POSITIONS.get(escape) or STD_CATEGORIES.get(escape) or ESCAPES.get(escape)
     if code:
         return code
+    if state.flags & SRE_FLAG_IGNORECASE:
+        literal_op, groupref_op = OP.LITERAL_IGNORE, OP.GROUPREF_IGNORE
+    else:
+        literal_op, groupref_op = OP.LITERAL, OP.GROUPREF
     try:
-        c = escape[1:2]
-        if c == "x":
-            # hexadecimal escape
-            while source.next in HEXDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            if len(escape) != 4:
-                raise ValueError
-            return LITERAL, int(escape[2:], 16) & 0xff
+        c = escape[1 : 2]
+        if c in HEX_ESCAPE_LENGTH:
+            # hex escape
+            return literal_op, hex_escape(source, escape, HEX_ESCAPE_LENGTH[c])
+        elif c == "o":
+            # octal escape
+            return literal_op, oct_escape(source, escape, "")
         elif c == "0":
             # octal escape
-            while source.next in OCTDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            return LITERAL, int(escape[1:], 8) & 0xff
+            return literal_op, oct_escape(source, escape[ : 1], c)
         elif c in DIGITS:
             # octal escape *or* decimal group reference (sigh)
             if source.next in DIGITS:
-                escape = escape + source.get()
-                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
-                    source.next in OCTDIGITS):
+                escape += source.get()
+                if set(escape[1 : ]) <= OCTDIGITS and source.next in OCTDIGITS:
                     # got three octal digits; this is an octal escape
-                    escape = escape + source.get()
-                    return LITERAL, int(escape[1:], 8) & 0xff
+                    escape += source.get()
+                    return literal_op, int(escape[1 : ], 8) & 0xFF
             # not an octal escape, so this is a group reference
-            group = int(escape[1:])
-            if group < state.groups:
-                if not state.checkgroup(group):
-                    raise error, "cannot refer to open group"
-                return GROUPREF, group
-            raise ValueError
-        if len(escape) == 2:
-            return LITERAL, ord(escape[1])
+            ref = [groupref_op, escape[1 : ]]
+            state.fix_list.append(ref)
+            return ref
+        elif c == "g":
+            # group reference
+            if source.next in GROUP_DELIMITERS:
+                # delimited group reference
+                delimiter = source.get()
+                name = parse_name(source, GROUP_DELIMITERS[delimiter], "group", escape + delimiter)
+                if name[0] in GROUP_DIRECTION and name[1 : ].isdigit():
+                    # relative group reference
+                    name = str(state.groups + GROUP_DIRECTION[name[0]] * int(name[1 : ]))
+                if not name.isdigit() and not is_name(name):
+                    raise error("bad group name: %s" % name)
+                # return the group reference
+                ref = [groupref_op, name]
+                state.fix_list.append(ref)
+                return ref
+            elif source.next in DIGITS:
+                # non-delimited group reference (single digit)
+                ref = [groupref_op, source.get()]
+                state.fix_list.append(ref)
+                return ref
+            else:
+                raise error("missing group name: %s" % escape)
+        elif c == "k":
+            # named group reference
+            if source.next in GROUP_DELIMITERS:
+                # delimited group reference
+                delimiter = source.get()
+                name = parse_name(source, GROUP_DELIMITERS[delimiter], "group", escape + delimiter)
+                if not is_name(name):
+                    raise error("bad group name: %s" % name)
+                ref = [groupref_op, name]
+                state.fix_list.append(ref)
+                return ref
+            else:
+                # non-delimited group reference; invalid for \k
+                raise error("missing group name: %s" % escape)
+        elif c == "N":
+            # named character
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing character name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "character", escape + delimiter)
+            try:
+                return literal_op, ord(unicodedata.lookup(name))
+            except KeyError:
+                raise error("bad character name: %s" % name)
+        elif c in "pP":
+            # character property
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing property name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "property", escape + delimiter)
+            try:
+                op = OP.CATEGORY, CATEGORIES[name]
+                if c == "P":
+                    op = not_op(op)
+                return op
+            except KeyError:
+                raise error("bad property name: %s" % name)
+        else:
+            return literal_op, ord(c)
     except ValueError:
         pass
-    raise error, "bogus escape: %s" % repr(escape)
+    raise error("bad escape: %s" % escape)
 
-def _parse_sub(source, state, nested=1):
+def _parse_sub(source, state, nested=True):
     # parse an alternation: a|b|c
-
     items = []
-    itemsappend = items.append
-    sourcematch = source.match
-    while 1:
-        itemsappend(_parse(source, state))
-        if sourcematch("|"):
+    while True:
+        items.append(_parse(source, state))
+        if source.match("|"):
             continue
         if not nested:
             break
-        if not source.next or sourcematch(")", 0):
+        if not source.next or source.match(")", False):
             break
         else:
-            raise error, "pattern not properly closed"
+            raise error("pattern not properly closed")
 
     if len(items) == 1:
         return items[0]
 
     subpattern = SubPattern(state)
-    subpatternappend = subpattern.append
 
-    # check if all items share a common prefix
-    while 1:
-        prefix = None
-        for item in items:
-            if not item:
-                break
-            if prefix is None:
-                prefix = item[0]
-            elif item[0] != prefix:
-                break
-        else:
-            # all subitems start with a common "prefix".
-            # move it out of the branch
-            for item in items:
-                del item[0]
-            subpatternappend(prefix)
-            continue # check next one
-        break
-
-    # check if the branch can be replaced by a character set
-    for item in items:
-        if len(item) != 1 or item[0][0] != LITERAL:
+    # check whether all branches share a common prefix
+    # (the prefix shouldn't contain a capture group)
+    prefix_len = 0
+    while prefix_len < len(items[0]) and not is_capture(items[0][prefix_len]):
+        prefix_len += 1
+
+    prefix = items[0][ : prefix_len]
+    for item in items[1 : ]:
+        prefix = prefix[ : len(item)]
+        while prefix and item[ : len(prefix)] != prefix:
+            prefix = prefix[ : -1]
+        if not prefix:
+            # no common prefix, so skip any further branches
             break
-    else:
-        # we can store this as a character set instead of a
+
+    if prefix:
+        subpattern.append(prefix)
+        items = [item[len(prefix) : ] for item in items]
+
+    # check whether the alternation can be replaced by a character set
+    if all(len(item) == 1 and item[0][0] == OP.LITERAL for item in items):
+        # we can store this as a set instead of a
         # branch (the compiler may optimize this even more)
-        set = []
-        setappend = set.append
-        for item in items:
-            setappend(item[0])
-        subpatternappend((IN, set))
-        return subpattern
+        subpattern.append((OP.SET, [item[0] for item in items]))
+    else:
+        subpattern.append((OP.BRANCH, (None, items)))
 
-    subpattern.append((BRANCH, (None, items)))
     return subpattern
 
+def is_capture(pattern):
+    if not pattern:
+        return False
+    o, a = pattern
+    if o in [OP.ASSERT, OP.ASSERT_NOT, OP.ATOMIC]:
+        return has_capture(a[1])
+    elif o == OP.BRANCH:
+        return any(has_capture(i) for i in a[1])
+    elif o == OP.GROUPREF_EXISTS:
+        return any(has_capture(i) for i in a[1 : 3])
+    elif o in [OP.REPEAT_MAX, OP.REPEAT_MIN, OP.REPEAT_POSS]:
+        return has_capture(a[2])
+    elif o in [OP.REPEAT_ONE_MAX, OP.REPEAT_ONE_MIN, OP.REPEAT_ONE_POSS]:
+        return is_capture(a[2])
+    elif o == OP.SUBPATTERN:
+        return a[0] is not None or has_capture(a[1])
+    else:
+        return False
+
+def has_capture(pattern):
+    if not pattern:
+        return False
+    return any(i for i in pattern)
+
 def _parse_sub_cond(source, state, condgroup):
     item_yes = _parse(source, state)
     if source.match("|"):
         item_no = _parse(source, state)
         if source.match("|"):
-            raise error, "conditional backref with more than two branches"
+            raise error("conditional reference with more than two branches")
     else:
         item_no = None
-    if source.next and not source.match(")", 0):
-        raise error, "pattern not properly closed"
+    if source.next and not source.match(")", False):
+        raise error("pattern not properly closed")
     subpattern = SubPattern(state)
-    subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
+    ref = (OP.GROUPREF_EXISTS, [condgroup, item_yes, item_no])
+    state.fix_list.append(ref)
+    subpattern.append(ref)
     return subpattern
 
-_PATTERNENDERS = set("|)")
-_ASSERTCHARS = set("=!<")
-_LOOKBEHINDASSERTCHARS = set("=!")
-_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
+PATTERN_ENDERS = set("|)")
+ASSERT_CHARS = set("=!<")
+LOOKBEHIND_ASSERT_CHARS = set("=!")
+POSITION_CODES = set([OP.BOUNDARY, OP.END_OF_LINE, OP.END_OF_STRING, OP.END_OF_STRING_LN, OP.NOT_BOUNDARY, OP.START_OF_LINE, OP.START_OF_STRING])
+QUERY_GROUP = 0
+CAPTURE_GROUP = 1
+NONCAPTURE_GROUP = 2
+ATOMIC_GROUP = 3
 
 def _parse(source, state):
     # parse a simple pattern
     subpattern = SubPattern(state)
 
-    # precompute constants into local variables
-    subpatternappend = subpattern.append
-    sourceget = source.get
-    sourcematch = source.match
-    _len = len
-    PATTERNENDERS = _PATTERNENDERS
-    ASSERTCHARS = _ASSERTCHARS
-    LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
-    REPEATCODES = _REPEATCODES
-
-    while 1:
-
-        if source.next in PATTERNENDERS:
+    while True:
+        if source.next in PATTERN_ENDERS:
             break # end of subpattern
-        this = sourceget()
-        if this is None:
-            break # end of pattern
+
+        this = source.get()
 
         if state.flags & SRE_FLAG_VERBOSE:
             # skip whitespace and comments
-            if this in WHITESPACE:
-                continue
+            while this in WHITESPACE_CHARS:
+                this = source.get()
             if this == "#":
-                while 1:
-                    this = sourceget()
+                while True:
+                    this = source.get()
                     if this in (None, "\n"):
                         break
                 continue
 
-        if this and this[0] not in SPECIAL_CHARS:
-            subpatternappend((LITERAL, ord(this)))
+        if this is None:
+            break # end of pattern
 
+        if this[0] not in SPECIAL_CHARS:
+            if state.flags & SRE_FLAG_IGNORECASE:
+                subpattern.append((OP.LITERAL_IGNORE, ord(this)))
+            else:
+                subpattern.append((OP.LITERAL, ord(this)))
         elif this == "[":
             # character set
-            set = []
-            setappend = set.append
-##          if sourcematch(":"):
-##              pass # handle character classes
-            if sourcematch("^"):
-                setappend((NEGATE, None))
+            char_set = []
+            negate = source.match("^")
             # check remaining characters
-            start = set[:]
-            while 1:
-                this = sourceget()
-                if this == "]" and set != start:
+            while True:
+                this = source.get()
+                if this == "]" and char_set:
+                    # terminating ]
                     break
-                elif this and this[0] == "\\":
-                    code1 = _class_escape(source, this)
+                if this and this[0] == "\\":
+                    code1 = class_escape(source, this)
                 elif this:
-                    code1 = LITERAL, ord(this)
+                    code1 = OP.LITERAL, ord(this)
                 else:
-                    raise error, "unexpected end of regular expression"
-                if sourcematch("-"):
+                    raise error("unexpected end of pattern")
+                if source.match("-"):
                     # potential range
-                    this = sourceget()
+                    this = source.get()
                     if this == "]":
-                        if code1[0] is IN:
-                            code1 = code1[1][0]
-                        setappend(code1)
-                        setappend((LITERAL, ord("-")))
+                        # at end of pattern, so literal char and "-"
+                        char_set.append(code1)
+                        char_set.append((OP.LITERAL, ord("-")))
                         break
                     elif this:
                         if this[0] == "\\":
-                            code2 = _class_escape(source, this)
+                            code2 = class_escape(source, this)
                         else:
-                            code2 = LITERAL, ord(this)
-                        if code1[0] != LITERAL or code2[0] != LITERAL:
-                            raise error, "bad character range"
+                            code2 = OP.LITERAL, ord(this)
+                        if code1[0] != OP.LITERAL or code2[0] != OP.LITERAL:
+                            raise error("bad character range")
                         lo = code1[1]
                         hi = code2[1]
                         if hi < lo:
-                            raise error, "bad character range"
-                        setappend((RANGE, (lo, hi)))
+                            raise error("bad character range")
+                        char_set.append((OP.RANGE, (lo, hi)))
                     else:
-                        raise error, "unexpected end of regular expression"
+                        raise error("unexpected end of pattern")
                 else:
-                    if code1[0] is IN:
-                        code1 = code1[1][0]
-                    setappend(code1)
-
-            # XXX: <fl> should move set optimization to compiler!
-            if _len(set)==1 and set[0][0] is LITERAL:
-                subpatternappend(set[0]) # optimization
-            elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
-                subpatternappend((NOT_LITERAL, set[1][1])) # optimization
+                    char_set.append(code1)
+            if negate:
+                if state.flags & SRE_FLAG_IGNORECASE:
+                    subpattern.append((OP.NOT_SET_IGNORE, char_set))
+                else:
+                    subpattern.append((OP.NOT_SET, char_set))
             else:
-                # XXX: <fl> should add charmap optimization here
-                subpatternappend((IN, set))
-
-        elif this and this[0] in REPEAT_CHARS:
+                if state.flags & SRE_FLAG_IGNORECASE:
+                    subpattern.append((OP.SET_IGNORE, char_set))
+                else:
+                    subpattern.append((OP.SET, char_set))
+        elif this[0] in REPEAT_CHARS:
             # repeat previous item
             if this == "?":
                 min, max = 0, 1
             elif this == "*":
                 min, max = 0, MAXREPEAT
-
             elif this == "+":
                 min, max = 1, MAXREPEAT
             elif this == "{":
                 if source.next == "}":
-                    subpatternappend((LITERAL, ord(this)))
+                    subpattern.append((OP.LITERAL, ord(this)))
                     continue
                 here = source.tell()
                 min, max = 0, MAXREPEAT
                 lo = hi = ""
                 while source.next in DIGITS:
-                    lo = lo + source.get()
-                if sourcematch(","):
+                    lo += source.get()
+                if source.match(","):
                     while source.next in DIGITS:
-                        hi = hi + sourceget()
+                        hi += source.get()
                 else:
                     hi = lo
-                if not sourcematch("}"):
-                    subpatternappend((LITERAL, ord(this)))
+                if not source.match("}"):
+                    subpattern.append((OP.LITERAL, ord(this)))
                     source.seek(here)
                     continue
                 if lo:
@@ -507,165 +568,227 @@
                 if hi:
                     max = int(hi)
                 if max < min:
-                    raise error, "bad repeat interval"
+                    raise error("bad repeat interval")
             else:
-                raise error, "not supported"
+                raise error("not supported")
             # figure out which item to repeat
-            if subpattern:
-                item = subpattern[-1:]
+            item = subpattern[-1 : ]
+            if not item or len(item) == 1 and item[0][0] in POSITION_CODES:
+                raise error("nothing to repeat")
+            if source.match("?"):
+                subpattern[-1] = (OP.REPEAT_MIN, (min, max, item))
+            elif source.match("+"):
+                subpattern[-1] = (OP.REPEAT_POSS, (min, max, item))
             else:
-                item = None
-            if not item or (_len(item) == 1 and item[0][0] == AT):
-                raise error, "nothing to repeat"
-            if item[0][0] in REPEATCODES:
-                raise error, "multiple repeat"
-            if sourcematch("?"):
-                subpattern[-1] = (MIN_REPEAT, (min, max, item))
-            else:
-                subpattern[-1] = (MAX_REPEAT, (min, max, item))
-
+                subpattern[-1] = (OP.REPEAT_MAX, (min, max, item))
         elif this == ".":
-            subpatternappend((ANY, None))
-
+            if state.flags & SRE_FLAG_DOTALL:
+                subpattern.append((OP.ANY_ALL, None))
+            else:
+                subpattern.append((OP.ANY, None))
         elif this == "(":
-            group = 1
+            group = CAPTURE_GROUP
             name = None
             condgroup = None
-            if sourcematch("?"):
-                group = 0
+            scoped_flags = None
+            if source.match("?"):
+                group = QUERY_GROUP
                 # options
-                if sourcematch("P"):
+                if source.match("P"):
                     # python extensions
-                    if sourcematch("<"):
-                        # named group: skip forward to end of name
-                        name = ""
-                        while 1:
-                            char = sourceget()
-                            if char is None:
-                                raise error, "unterminated name"
-                            if char == ">":
-                                break
-                            name = name + char
-                        group = 1
-                        if not isname(name):
-                            raise error, "bad character in group name"
-                    elif sourcematch("="):
-                        # named backreference
-                        name = ""
-                        while 1:
-                            char = sourceget()
-                            if char is None:
-                                raise error, "unterminated name"
-                            if char == ")":
-                                break
-                            name = name + char
-                        if not isname(name):
-                            raise error, "bad character in group name"
-                        gid = state.groupdict.get(name)
-                        if gid is None:
-                            raise error, "unknown group name"
-                        subpatternappend((GROUPREF, gid))
+                    if source.match("<"):
+                        # named group
+                        name = parse_name(source, ">", "group", "(?P<")
+                        group = CAPTURE_GROUP
+                        if not is_name(name):
+                            raise error("bad group name: %s" % name)
+                    elif source.match("="):
+                        # named group reference
+                        # group reference stored as list instead of tuple so that it can be fixed later
+                        name = parse_name(source, ")", "group", "(?P=")
+                        if not is_name(name):
+                            raise error("bad group name: %s" % name)
+                        if state.flags & SRE_FLAG_IGNORECASE:
+                            ref = [OP.GROUPREF_IGNORE, name]
+                        else:
+                            ref = [OP.GROUPREF, name]
+                        state.fix_list.append(ref)
+                        subpattern.append(ref)
                         continue
                     else:
-                        char = sourceget()
+                        char = source.get()
                         if char is None:
-                            raise error, "unexpected end of pattern"
-                        raise error, "unknown specifier: ?P%s" % char
-                elif sourcematch(":"):
-                    # non-capturing group
-                    group = 2
-                elif sourcematch("#"):
+                            raise error("unexpected end of pattern")
+                        raise error("unknown specifier: (?P%s" % char)
+                elif source.match("<"):
+                    # named group or look-behind
+                    if source.next in LOOKBEHIND_ASSERT_CHARS:
+                        # lookbehind assertion
+                        dir = -1 # lookbehind
+                        char = source.get()
+                        saved_flags = state.flags
+                        p = _parse_sub(source, state)
+                        state.flags = (state.flags & ~SCOPED_FLAGS_MASK) | (saved_flags & SCOPED_FLAGS_MASK)
+                        if not source.match(")"):
+                            raise error("unbalanced parenthesis")
+                        if char == "=":
+                            subpattern.append((OP.ASSERT, (dir, p)))
+                        else:
+                            subpattern.append((OP.ASSERT_NOT, (dir, p)))
+                        continue
+                    # named group
+                    name = parse_name(source, ">", "group", "(?<")
+                    group = CAPTURE_GROUP
+                    if not is_name(name):
+                        raise error("bad group name: %s" % name)
+                elif source.match(">"):
+                    # atomic group
+                    group = ATOMIC_GROUP
+                elif source.match("#"):
                     # comment
-                    while 1:
-                        if source.next is None or source.next == ")":
+                    while True:
+                        if source.next in (None, ")"):
                             break
-                        sourceget()
-                    if not sourcematch(")"):
-                        raise error, "unbalanced parenthesis"
+                        source.get()
+                    if not source.match(")"):
+                        raise error("unbalanced parenthesis")
                     continue
-                elif source.next in ASSERTCHARS:
+                elif source.next in ASSERT_CHARS:
                     # lookahead assertions
-                    char = sourceget()
+                    char = source.get()
                     dir = 1
                     if char == "<":
-                        if source.next not in LOOKBEHINDASSERTCHARS:
-                            raise error, "syntax error"
+                        if source.next not in LOOKBEHIND_ASSERT_CHARS:
+                            raise error("syntax error: (?%s" % char)
                         dir = -1 # lookbehind
-                        char = sourceget()
+                        char = source.get()
+                    saved_flags = state.flags
                     p = _parse_sub(source, state)
-                    if not sourcematch(")"):
-                        raise error, "unbalanced parenthesis"
+                    state.flags = (state.flags & ~SCOPED_FLAGS_MASK) | (saved_flags & SCOPED_FLAGS_MASK)
+                    if not source.match(")"):
+                        raise error("unbalanced parenthesis")
                     if char == "=":
-                        subpatternappend((ASSERT, (dir, p)))
+                        subpattern.append((OP.ASSERT, (dir, p)))
                     else:
-                        subpatternappend((ASSERT_NOT, (dir, p)))
+                        subpattern.append((OP.ASSERT_NOT, (dir, p)))
                     continue
-                elif sourcematch("("):
+                elif source.match("("):
                     # conditional backreference group
-                    condname = ""
-                    while 1:
-                        char = sourceget()
-                        if char is None:
-                            raise error, "unterminated name"
-                        if char == ")":
-                            break
-                        condname = condname + char
-                    group = 2
-                    if isname(condname):
-                        condgroup = state.groupdict.get(condname)
-                        if condgroup is None:
-                            raise error, "unknown group name"
-                    else:
-                        try:
-                            condgroup = int(condname)
-                        except ValueError:
-                            raise error, "bad character in group name"
-                else:
-                    # flags
-                    if not source.next in FLAGS:
-                        raise error, "unexpected end of pattern"
+                    condgroup = parse_name(source, ")", "group", "(?(")
+                    group = NONCAPTURE_GROUP
+                    if not is_name(condgroup) and not condgroup.isdigit():
+                        raise error("bad group name: %s" % condgroup)
+                else:
+                    # probably non-capturing group or flags
+                    # might be scoped (set at start of group and local to group)
+                    scoped_flags = state.flags
+                    seen_on, seen_off = False, False
                     while source.next in FLAGS:
-                        state.flags = state.flags | FLAGS[sourceget()]
+                        scoped_flags |= FLAGS[source.get()]
+                        seen_on = True
+                    if source.match("-"):
+                        while source.next in FLAGS:
+                            if (FLAGS[source.next] & SCOPED_FLAGS_MASK) == 0:
+                                raise error("bad pattern flag: %s" % source.next)
+                            scoped_flags &= ~FLAGS[source.get()]
+                            seen_off = True
+                        if not seen_off:
+                            raise error("bad pattern flag")
+                    # update just global flags
+                    state.flags |= scoped_flags & ~SCOPED_FLAGS_MASK
+                    if source.match(":"):
+                        # non-capturing group with scoped flags
+                        group = NONCAPTURE_GROUP
+                    elif seen_on or seen_off:
+                        # not start of group, just setting flags
+                        state.flags = scoped_flags
+                        scoped_flags = None
+                    else:
+                        raise error("unexpected end of pattern")
             if group:
+                atomic = group == ATOMIC_GROUP
                 # parse group contents
-                if group == 2:
+                if group in [NONCAPTURE_GROUP, ATOMIC_GROUP]:
                     # anonymous group
                     group = None
                 else:
-                    group = state.opengroup(name)
+                    group = state.new_group(name)
+                saved_flags = state.flags
+                if scoped_flags is not None:
+                    state.flags = scoped_flags
                 if condgroup:
                     p = _parse_sub_cond(source, state, condgroup)
                 else:
                     p = _parse_sub(source, state)
-                if not sourcematch(")"):
-                    raise error, "unbalanced parenthesis"
-                if group is not None:
-                    state.closegroup(group)
-                subpatternappend((SUBPATTERN, (group, p)))
+                state.flags = (state.flags & ~SCOPED_FLAGS_MASK) | (saved_flags & SCOPED_FLAGS_MASK)
+                if not source.match(")"):
+                    raise error("unbalanced parenthesis")
+                if atomic:
+                    subpattern.append((OP.ATOMIC, (group, p)))
+                else:
+                    if group is None:
+                        subpattern.append((OP.SUBPATTERN, (None, p)))
+                    else:
+                        # group reference stored as list instead of tuple so that it can be fixed later
+                        ref = OP.SUBPATTERN, (list(group), p)
+                        state.fix_list.append(ref)
+                        subpattern.append(ref)
             else:
-                while 1:
-                    char = sourceget()
+                while True:
+                    char = source.get()
                     if char is None:
-                        raise error, "unexpected end of pattern"
+                        raise error("unexpected end of pattern")
                     if char == ")":
                         break
-                    raise error, "unknown extension"
-
+                    raise error("unknown extension")
         elif this == "^":
-            subpatternappend((AT, AT_BEGINNING))
-
+            if state.flags & SRE_FLAG_MULTILINE:
+                subpattern.append((OP.START_OF_LINE, None))
+            else:
+                subpattern.append((OP.START_OF_STRING, None))
         elif this == "$":
-            subpattern.append((AT, AT_END))
-
-        elif this and this[0] == "\\":
-            code = _escape(source, this, state)
-            subpatternappend(code)
-
+            if state.flags & SRE_FLAG_MULTILINE:
+                subpattern.append((OP.END_OF_LINE, None))
+            else:
+                subpattern.append((OP.END_OF_STRING_LN, None))
+        elif this[0] == "\\":
+            code = escape(source, this, state)
+            subpattern.append(code)
         else:
-            raise error, "parser error"
+            raise error("parser error")
 
     return subpattern
 
+def fix_ref(ref, index, state):
+    if ref[index].isdigit():
+        ref[index] = int(ref[index])
+        if not (1 <= ref[index] <= state.groups):
+            raise error("invalid group reference: %s" % ref[index])
+    else:
+        try:
+            ref[index] = state.named_groups[ref[index]]
+        except KeyError:
+            raise error("invalid group reference: %s" % ref[index])
+
+def fix_grouprefs(p, state):
+    for name, value in state.named_groups.items():
+        state.named_groups[name] = state.groups + 1 + value
+    GROUPREF_SET = set([OP.GROUPREF, OP.GROUPREF_IGNORE])
+    for ref in state.fix_list:
+        if ref[0] in GROUPREF_SET:
+            fix_ref(ref, 1, state)
+        elif ref[0] == OP.GROUPREF_EXISTS:
+            fix_ref(ref[1], 0, state)
+        elif ref[0] == OP.SUBPATTERN:
+            ref = ref[1][0]
+            if ref[1] is None:
+                ref[1] = ref[0]
+            else:
+                try:
+                    ref[1] = state.named_groups[ref[1]]
+                except KeyError:
+                    raise error("invalid group reference: %s" % ref[1])
+
 def parse(str, flags=0, pattern=None):
     # parse 're' pattern into list of (opcode, argument) tuples
 
@@ -675,122 +798,144 @@
         pattern = Pattern()
     pattern.flags = flags
     pattern.str = str
+    pattern.group_count = 0
 
     p = _parse_sub(source, pattern, 0)
 
     tail = source.get()
     if tail == ")":
-        raise error, "unbalanced parenthesis"
+        raise error("unbalanced parenthesis")
     elif tail:
-        raise error, "bogus characters at end of regular expression"
+        raise error("bad characters at end of pattern")
+
+    fix_grouprefs(p, pattern)
 
     if flags & SRE_FLAG_DEBUG:
         p.dump()
 
-    if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
-        # the VERBOSE flag was switched on inside the pattern.  to be
-        # on the safe side, we'll parse the whole thing again...
-        return parse(str, p.pattern.flags)
-
     return p
 
 def parse_template(source, pattern):
-    # parse 're' replacement string into list of literals and
-    # group references
+    # parse 're' replacement string into list of literals and group references
+    sep = source[ : 0]
+    char_type = unichr if isinstance(sep, unicode) else chr
     s = Tokenizer(source)
-    sget = s.get
-    p = []
-    a = p.append
-    def literal(literal, p=p, pappend=a):
-        if p and p[-1][0] is LITERAL:
-            p[-1] = LITERAL, p[-1][1] + literal
-        else:
-            pappend((LITERAL, literal))
-    sep = source[:0]
-    if type(sep) is type(""):
-        makechar = chr
-    else:
-        makechar = unichr
-    while 1:
-        this = sget()
+    literals, groups = [], []
+    current_literal = []
+    def add_literal(char_code):
+        current_literal.append(char_type(char_code))
+    def flush_literal():
+        if current_literal:
+            literals.append(sep.join(current_literal))
+            current_literal[:] = []
+    def add_group(index):
+        flush_literal()
+        groups.append((index, len(literals)))
+        literals.append(None)
+    while True:
+        this = s.get()
         if this is None:
             break # end of replacement string
-        if this and this[0] == "\\":
-            # group
-            c = this[1:2]
-            if c == "g":
-                name = ""
-                if s.match("<"):
-                    while 1:
-                        char = sget()
-                        if char is None:
-                            raise error, "unterminated group name"
-                        if char == ">":
-                            break
-                        name = name + char
-                if not name:
-                    raise error, "bad group name"
-                try:
+        if this[0] == "\\":
+            c = this[1 : 2]
+            if c in HEX_ESCAPE_LENGTH:
+                # hex escape
+                add_literal(hex_escape(s, escape, HEX_ESCAPE_LENGTH[c]))
+            elif c == "o":
+                # octal escape
+                add_literal(oct_escape(s, escape, ""))
+            elif c == "0":
+                add_literal(oct_escape(s, this[0], this[1 : ]))
+            elif c in DIGITS:
+                if s.next in DIGITS:
+                    this += s.get()
+                    if set(this[1 : ]) <= OCTDIGITS and s.next in OCTDIGITS:
+                        this += s.get()
+                        add_literal(int(this[1 : ], 8) & 0xFF)
+                    else:
+                        index = int(this[1 : ])
+                        if index > pattern.groups:
+                            raise error("invalid group reference: %s" % index)
+                        add_group(index)
+                else:
+                    index = int(this[1 : ])
+                    if index > pattern.groups:
+                        raise error("invalid group reference: %s" % index)
+                    add_group(index)
+            elif c == "g":
+                # group reference
+                if s.next in GROUP_DELIMITERS:
+                    # delimited group reference
+                    delimiter = s.get()
+                    name = parse_name(s, GROUP_DELIMITERS[delimiter], "group", this + delimiter)
+                elif s.next in DIGITS:
+                    # non-delimited group reference (single digit)
+                    name = s.get()
+                else:
+                    raise error("missing group name: %s" + this)
+                if name.isdigit():
                     index = int(name)
-                    if index < 0:
-                        raise error, "negative group number"
-                except ValueError:
-                    if not isname(name):
-                        raise error, "bad character in group name"
+                    if not (0 <= index <= pattern.groups):
+                        raise error("invalid group reference: %s" % index)
+                elif is_name(name):
                     try:
                         index = pattern.groupindex[name]
                     except KeyError:
-                        raise IndexError, "unknown group name"
-                a((MARK, index))
-            elif c == "0":
-                if s.next in OCTDIGITS:
-                    this = this + sget()
-                    if s.next in OCTDIGITS:
-                        this = this + sget()
-                literal(makechar(int(this[1:], 8) & 0xff))
-            elif c in DIGITS:
-                isoctal = False
-                if s.next in DIGITS:
-                    this = this + sget()
-                    if (c in OCTDIGITS and this[2] in OCTDIGITS and
-                        s.next in OCTDIGITS):
-                        this = this + sget()
-                        isoctal = True
-                        literal(makechar(int(this[1:], 8) & 0xff))
-                if not isoctal:
-                    a((MARK, int(this[1:])))
+                        raise error("invalid group reference: %s" % name)
+                else:
+                    raise error("bad group name: %s" % name)
+                add_group(index)
+            elif c == "k":
+                # named group reference
+                if s.next in GROUP_DELIMITERS:
+                    # delimited group reference
+                    delimiter = s.get()
+                    name = parse_name(s, GROUP_DELIMITERS[delimiter], "group", this + delimiter)
+                else:
+                    # non-delimited group reference; invalid for \k
+                    raise error("missing group name: %s" + this)
+                if is_name(name):
+                    try:
+                        index = pattern.groupindex[name]
+                    except KeyError:
+                        raise error("invalid group reference: %s" % name)
+                else:
+                    raise error("bad group name: %s" % name)
+                add_group(index)
+            elif c == "N":
+                # named character
+                if not s.match("{"):
+                    raise error("missing character name: %s" + this)
+                name = parse_name(s, "}", "character", this + "{")
+                try:
+                    add_literal(ord(unicodedata.lookup(name)))
+                except KeyError:
+                    raise error("bad character name: %s" % name)
             else:
                 try:
-                    this = makechar(ESCAPES[this][1])
+                    add_literal(ESCAPES[this][1])
                 except KeyError:
-                    pass
-                literal(this)
-        else:
-            literal(this)
-    # convert template to groups and literals lists
-    i = 0
-    groups = []
-    groupsappend = groups.append
-    literals = [None] * len(p)
-    for c, s in p:
-        if c is MARK:
-            groupsappend((i, s))
-            # literal[i] is already None
+                    add_literal(ord(this[0]))
+                    add_literal(ord(this[1]))
         else:
-            literals[i] = s
-        i = i + 1
-    return groups, literals
-
-def expand_template(template, match):
-    g = match.group
-    sep = match.string[:0]
-    groups, literals = template
+            add_literal(ord(this))
+    flush_literal()
+    return literals, groups
+
+def expand_template(template, match, unmatched_as_empty=False):
+    g = match._internal_group
+    sep = match.string[ : 0]
+    literals, groups = template
     literals = literals[:]
     try:
-        for index, group in groups:
-            literals[index] = s = g(group)
+        for index, pos in groups:
+            s = g(index)
             if s is None:
-                raise error, "unmatched group"
+                if unmatched_as_empty:
+                    s = sep
+                else:
+                    raise error("unmatched group")
+            literals[pos] = s
     except IndexError:
-        raise error, "invalid group reference"
+        raise error("invalid group reference: %s" % a)
     return sep.join(literals)
=== modified file Lib/re.py
--- Lib/re.py	2009-01-01 15:46:10 +0000
+++ Lib/re.py	2009-02-03 21:49:47 +0000
@@ -27,52 +27,81 @@
 concatenate ordinary characters, so last matches the string 'last'.
 
 The special characters are:
-    "."      Matches any character except a newline.
-    "^"      Matches the start of the string.
-    "$"      Matches the end of the string or just before the newline at
-             the end of the string.
-    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
-             Greedy means that it will match as many repetitions as possible.
-    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
-    "?"      Matches 0 or 1 (greedy) of the preceding RE.
-    *?,+?,?? Non-greedy versions of the previous three special characters.
-    {m,n}    Matches from m to n repetitions of the preceding RE.
-    {m,n}?   Non-greedy version of the above.
-    "\\"     Either escapes special characters or signals a special sequence.
-    []       Indicates a set of characters.
-             A "^" as the first character indicates a complementing set.
-    "|"      A|B, creates an RE that will match either A or B.
-    (...)    Matches the RE inside the parentheses.
-             The contents can be retrieved or matched later in the string.
-    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
-    (?:...)  Non-grouping version of regular parentheses.
-    (?P<name>...) The substring matched by the group is accessible by name.
-    (?P=name)     Matches the text matched earlier by the group named name.
-    (?#...)  A comment; ignored.
-    (?=...)  Matches if ... matches next, but doesn't consume the string.
-    (?!...)  Matches if ... doesn't match next.
-    (?<=...) Matches if preceded by ... (must be fixed length).
-    (?<!...) Matches if not preceded by ... (must be fixed length).
-    (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,
-                       the (optional) no pattern otherwise.
+    "."                Matches any character except a newline.
+    "^"                Matches the start of the string.
+    "$"                Matches the end of the string or just before the
+                       newline at the end of the string.
+    "*"                Matches 0 or more (greedy) repetitions of the
+                       preceding RE. Greedy means that it will match as
+                       many repetitions as possible.
+    "+"                Matches 1 or more (greedy) repetitions of the
+                       preceding RE.
+    "?"                Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,??           Non-greedy versions of the previous three special
+                       characters.
+    *+,++,?+           Possessive versions of the previous three special
+                       characters.
+    {m,n}              Matches from m to n repetitions of the preceding
+                       RE.
+    {m,n}?             Non-greedy version of the above.
+    {m,n}+             Possessive version of the above.
+    "\\"               Either escapes special characters or signals a
+                       special sequence.
+    []                 Indicates a set of characters. A "^" as the first
+                       character indicates a complementing set.
+    "|"                A|B, creates an RE that will match either A or B.
+    (...)              Matches the RE inside the parentheses. The contents
+                       can be retrieved or matched later in the string.
+    (?iLmrsuxz)        Set the I, L, M, R, S, U, X, or Z flag for the
+                       following RE (see below).
+    (?:...)            Non-capturing version of regular parentheses.
+    (?P<name>...)      The substring matched by the group isaccessible by
+                       name.
+    (?<name>...)       The substring matched by the group is accessible by
+                       name.
+    (?#...)            A comment; ignored.
+    (?>...)            Atomic group. Like (?:...) but won't retry the RE
+                       within the parentheses.
+    (?=...)            Matches if ... matches next, but doesn't consume
+                       the string.
+    (?!...)            Matches if ... doesn't match next.
+    (?<=...)           Matches if preceded by ... (must be fixed length).
+    (?<!...)           Matches if not preceded by ... (must be fixed
+                       length).
+    (?(id/name)yes|no) Matches yes pattern if the group with id/name
+                       matched, the (optional) no pattern otherwise.
 
 The special sequences consist of "\\" and a character from the list
 below.  If the ordinary character is not on the list, then the
 resulting RE will match the second character.
-    \number  Matches the contents of the group of the same number.
-    \A       Matches only at the start of the string.
-    \Z       Matches only at the end of the string.
-    \b       Matches the empty string, but only at the start or end of a word.
-    \B       Matches the empty string, but not at the start or end of a word.
-    \d       Matches any decimal digit; equivalent to the set [0-9].
-    \D       Matches any non-digit character; equivalent to the set [^0-9].
-    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
-    \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
-    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
-             With LOCALE, it will match the set [0-9_] plus characters defined
-             as letters for the current locale.
-    \W       Matches the complement of \w.
-    \\       Matches a literal backslash.
+    \number     Matches the contents of the group of the same number.
+    \A          Matches only at the start of the string.
+    \b          Matches the empty string, but only at the start or end of
+                a word.
+    \B          Matches the empty string, but not at the start or end of a
+                word.
+    \d          Matches any decimal digit; equivalent to the set [0-9].
+    \D          Matches any non-digit character; equivalent to the set
+                [^0-9].
+    \g<name>    Matches the text matched by the group named name.
+    \g<number>  Matches the contents of the group of the same number.
+    \g<+number> Matches the contents of the group of the relative number.
+    \g<-number> Matches the contents of the group of the relative number.
+    \k<name>    Matches the text matched earlier by the group named name.
+    \N{name}    Matches named Unicode character.
+    \p{name}    Matches any character having the named property.
+    \P{name}    Matches any character not having the named property.
+    \s          Matches any whitespace character; equivalent to
+                [ \t\n\r\f\v].
+    \S          Matches any non-whitespace character; equiv. to
+                [^ \t\n\r\f\v].
+    \w          Matches any alphanumeric character; equivalent to
+                [a-zA-Z0-9_]. With LOCALE, it will match the set
+                [0-9_] plus characters defined as letters for the current
+                locale.
+    \W          Matches the complement of \w.
+    \Z          Matches only at the end of the string.
+    \\          Matches a literal backslash.
 
 This module exports the following functions:
     match    Match a regular expression pattern to the beginning of a string.
@@ -87,15 +116,17 @@
     escape   Backslash all non-alphanumerics in a string.
 
 Some of the functions in this module takes flags as optional parameters:
-    I  IGNORECASE  Perform case-insensitive matching.
-    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
-    M  MULTILINE   "^" matches the beginning of lines (after a newline)
-                   as well as the string.
-                   "$" matches the end of lines (before a newline) as well
-                   as the end of the string.
-    S  DOTALL      "." matches any character at all, including the newline.
-    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
-    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
+    I  IGNORECASE Perform case-insensitive matching.
+    L  LOCALE     Make \w, \W, \b, \B, dependent on the current locale.
+    M  MULTILINE  "^" matches the beginning of lines (after a newline) as
+                  well as the string.
+                  "$" matches the end of lines (before a newline) as well
+                  as the end of the string.
+    R  REVERSE    Search backwards, from the end to the start.
+    S  DOTALL     "." matches any character at all, including the newline.
+    X  VERBOSE    Ignore whitespace and comments for nicer looking RE's.
+    U  UNICODE    Make \w, \W, \b, \B, dependent on the Unicode locale.
+    Z  ZEROWIDTH  Permit splitting on zero-width separators.
 
 This module also defines an exception 'error'.
 
@@ -109,18 +140,19 @@
 __all__ = [ "match", "search", "sub", "subn", "split", "findall",
     "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
     "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
-    "UNICODE", "error" ]
+    "UNICODE", "REVERSE", "error" ]
 
-__version__ = "2.2.1"
+__version__ = "2.2.2"
 
 # flags
 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
 M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
+R = REVERSE = sre_compile.SRE_FLAG_REVERSE # search backwards
 S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
-
+Z = ZEROWIDTH = sre_compile.SRE_FLAG_ZEROWIDTH # permit splitting on zero-width separators.
 # sre extensions (experimental, don't rely on these)
 T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
 DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
@@ -237,12 +269,12 @@
         if flags:
             raise ValueError('Cannot process flags argument with a compiled pattern')
         return pattern
-    if not sre_compile.isstring(pattern):
-        raise TypeError, "first argument must be string or compiled pattern"
+    if not isinstance(pattern, (str, unicode)):
+        raise TypeError("First argument must be string or compiled pattern")
     try:
         p = sre_compile.compile(pattern, flags)
     except error, v:
-        raise error, v # invalid expression
+        raise error(v) # invalid expression
     if len(_cache) >= _MAXCACHE:
         _cache.clear()
     _cache[cachekey] = p
@@ -257,7 +289,7 @@
     try:
         p = sre_parse.parse_template(repl, pattern)
     except error, v:
-        raise error, v # invalid expression
+        raise error(v) # invalid expression
     if len(_cache_repl) >= _MAXCACHE:
         _cache_repl.clear()
     _cache_repl[key] = p
@@ -266,7 +298,7 @@
 def _expand(pattern, match, template):
     # internal: match.expand implementation hook
     template = sre_parse.parse_template(template, pattern)
-    return sre_parse.expand_template(template, match)
+    return sre_parse.expand_template(template, match, True)
 
 def _subx(pattern, template):
     # internal: pattern.sub/subn implementation helper
@@ -275,7 +307,7 @@
         # literal replacement
         return template[1][0]
     def filter(match, template=template):
-        return sre_parse.expand_template(template, match)
+        return sre_parse.expand_template(template, match, True)
     return filter
 
 # register myself for pickling
@@ -292,36 +324,31 @@
 
 class Scanner:
     def __init__(self, lexicon, flags=0):
-        from sre_constants import BRANCH, SUBPATTERN
         self.lexicon = lexicon
         # combine phrases into a compound pattern
         p = []
         s = sre_parse.Pattern()
         s.flags = flags
-        for phrase, action in lexicon:
-            p.append(sre_parse.SubPattern(s, [
-                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
-                ]))
-        s.groups = len(p)+1
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        self.scanner = sre_compile.compile(p)
+        sep, template = map(type(lexicon[0][0]), ("|", "(%s)"))
+        regex = sep.join(template % phrase for phrase, action in lexicon)
+        self.scanner = sre_compile.compile(regex)
     def scan(self, string):
         result = []
         append = result.append
         match = self.scanner.scanner(string).match
         i = 0
-        while 1:
+        while True:
             m = match()
             if not m:
                 break
             j = m.end()
             if i == j:
                 break
-            action = self.lexicon[m.lastindex-1][1]
+            action = self.lexicon[m.lastindex - 1][1]
             if hasattr(action, '__call__'):
                 self.match = m
                 action = action(self, m.group())
             if action is not None:
                 append(action)
             i = j
-        return result, string[i:]
+        return result, string[i : ]
=== modified file Modules/sre.h
--- Modules/sre.h	2006-06-12 03:05:40 +0000
+++ Modules/sre.h	2009-01-29 22:36:26 +0000
@@ -11,19 +11,15 @@
 #ifndef SRE_INCLUDED
 #define SRE_INCLUDED
 
-#include "sre_constants.h"
+typedef int BOOL;
+enum BOOL {FALSE, TRUE};
 
-/* size of a code word (must be unsigned short or larger, and
-   large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
-#define SRE_CODE Py_UCS4
-#else
-#define SRE_CODE unsigned short
-#endif
+#include "sre_constants.h"
 
 typedef struct {
     PyObject_VAR_HEAD
     Py_ssize_t groups; /* must be first! */
+    Py_ssize_t internal_groups; /* both numbered and named (all named are numbered) */
     PyObject* groupindex;
     PyObject* indexgroup;
     /* compatibility */
@@ -36,6 +32,7 @@
 } PatternObject;
 
 #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
+#define PatternObject_GetCodeSize(o) (((PatternObject*)(o))->codesize)
 
 typedef struct {
     PyObject_VAR_HEAD
@@ -44,7 +41,9 @@
     PatternObject* pattern; /* link to the regex (pattern) object */
     Py_ssize_t pos, endpos; /* current target slice */
     Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
+    Py_ssize_t last_named_index; /* last named index marker seen by the engine (-1 if none) */
     Py_ssize_t groups; /* number of groups (start/end marks) */
+    Py_ssize_t internal_groups; /* number of groups, both numbered and named (all named are also numbered) */
     Py_ssize_t mark[1];
 } MatchObject;
 
@@ -53,12 +52,52 @@
 /* FIXME: <fl> shouldn't be a constant, really... */
 #define SRE_MARK_SIZE 200
 
-typedef struct SRE_REPEAT_T {
-    Py_ssize_t count;
-    SRE_CODE* pattern; /* points to REPEAT operator arguments */
-    void* last_ptr; /* helper to check for infinite loops */
-    struct SRE_REPEAT_T *prev; /* points to previous repeat context */
-} SRE_REPEAT;
+#define SRE_BACKTRACK_CHUNK_SIZE 1024
+
+typedef struct SRE_BACKTRACK_ITEM {
+    int op;
+    union
+    {
+        struct {
+            void* text_start;
+            void* text_ptr;
+            SRE_CODE* pattern_ptr;
+        } assert;
+        struct {
+            void* text_ptr;
+            SRE_CODE* pattern_ptr;
+        } branch;
+        struct {
+            int numbered_index;
+            void* numbered_mark_ptr;
+            int named_index;
+            void* named_mark_ptr;
+        } mark;
+        struct {
+            void* text_ptr;
+            int repeat_min;
+            int repeat_max;
+            int repeat_counter;
+            void* repeat_start;
+            struct SRE_BACKTRACK_ITEM* top_nested;
+            SRE_CODE* pattern_ptr;
+        } repeat;
+    };
+    void* marks; // Numbered and named marks.
+} SRE_BACKTRACK_ITEM;
+
+typedef struct SRE_BACKTRACK_CHUNK {
+    struct SRE_BACKTRACK_CHUNK* previous;
+    SRE_BACKTRACK_ITEM items[SRE_BACKTRACK_CHUNK_SIZE];
+    int count;
+} SRE_BACKTRACK_CHUNK;
+
+typedef struct SRE_ENCODING_TABLE {
+BOOL (*in_category)(SRE_CODE category, SRE_CODE ch);
+SRE_CODE (*lower)(SRE_CODE ch);
+SRE_CODE (*upper)(SRE_CODE ch);
+SRE_CODE (*title)(SRE_CODE ch);
+} SRE_ENCODING_TABLE;
 
 typedef struct {
     /* string pointers */
@@ -71,18 +110,20 @@
     Py_ssize_t pos, endpos;
     /* character size */
     int charsize;
+    int reverse;
+    int reject_zero_width;
     /* registers */
     Py_ssize_t lastindex;
     Py_ssize_t lastmark;
+    Py_ssize_t last_named_index;
     void* mark[SRE_MARK_SIZE];
     /* dynamically allocated stuff */
-    char* data_stack;
-    size_t data_stack_size;
-    size_t data_stack_base;
-    /* current repeat context */
-    SRE_REPEAT *repeat;
+    SRE_BACKTRACK_CHUNK* backtrack_chunk;
+    int numbered_mark_count;
+    int named_mark_count;
+    SRE_CODE* pattern_code;
     /* hooks */
-    SRE_TOLOWER_HOOK lower;
+    SRE_ENCODING_TABLE* encoding;
 } SRE_STATE;
 
 typedef struct {
=== modified file Modules/sre_constants.h
--- Modules/sre_constants.h	2003-10-17 22:13:16 +0000
+++ Modules/sre_constants.h	2009-02-01 01:43:54 +0000
@@ -11,76 +11,297 @@
  * See the _sre.c file for information on usage and redistribution.
  */
 
-#define SRE_MAGIC 20031017
+#define SRE_MAGIC 20081218
+
+/* size of a code word (must be unsigned short or larger, and
+   large enough to hold a Py_UNICODE character) */
+typedef unsigned int SRE_CODE;
+
+#define SRE_BYTES_PER_CODE 4
+#define SRE_BITS_PER_CODE 32
+#define SRE_UNLIMITED_REPEATS 0xFFFFFFFF
+
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2
 #define SRE_OP_ANY_ALL 3
-#define SRE_OP_ASSERT 4
-#define SRE_OP_ASSERT_NOT 5
-#define SRE_OP_AT 6
-#define SRE_OP_BRANCH 7
-#define SRE_OP_CALL 8
-#define SRE_OP_CATEGORY 9
-#define SRE_OP_CHARSET 10
-#define SRE_OP_BIGCHARSET 11
-#define SRE_OP_GROUPREF 12
-#define SRE_OP_GROUPREF_EXISTS 13
-#define SRE_OP_GROUPREF_IGNORE 14
-#define SRE_OP_IN 15
-#define SRE_OP_IN_IGNORE 16
-#define SRE_OP_INFO 17
-#define SRE_OP_JUMP 18
-#define SRE_OP_LITERAL 19
-#define SRE_OP_LITERAL_IGNORE 20
-#define SRE_OP_MARK 21
-#define SRE_OP_MAX_UNTIL 22
-#define SRE_OP_MIN_UNTIL 23
-#define SRE_OP_NOT_LITERAL 24
-#define SRE_OP_NOT_LITERAL_IGNORE 25
-#define SRE_OP_NEGATE 26
-#define SRE_OP_RANGE 27
-#define SRE_OP_REPEAT 28
-#define SRE_OP_REPEAT_ONE 29
-#define SRE_OP_SUBPATTERN 30
-#define SRE_OP_MIN_REPEAT_ONE 31
-#define SRE_AT_BEGINNING 0
-#define SRE_AT_BEGINNING_LINE 1
-#define SRE_AT_BEGINNING_STRING 2
-#define SRE_AT_BOUNDARY 3
-#define SRE_AT_NON_BOUNDARY 4
-#define SRE_AT_END 5
-#define SRE_AT_END_LINE 6
-#define SRE_AT_END_STRING 7
-#define SRE_AT_LOC_BOUNDARY 8
-#define SRE_AT_LOC_NON_BOUNDARY 9
-#define SRE_AT_UNI_BOUNDARY 10
-#define SRE_AT_UNI_NON_BOUNDARY 11
-#define SRE_CATEGORY_DIGIT 0
-#define SRE_CATEGORY_NOT_DIGIT 1
-#define SRE_CATEGORY_SPACE 2
-#define SRE_CATEGORY_NOT_SPACE 3
-#define SRE_CATEGORY_WORD 4
-#define SRE_CATEGORY_NOT_WORD 5
-#define SRE_CATEGORY_LINEBREAK 6
-#define SRE_CATEGORY_NOT_LINEBREAK 7
-#define SRE_CATEGORY_LOC_WORD 8
-#define SRE_CATEGORY_LOC_NOT_WORD 9
-#define SRE_CATEGORY_UNI_DIGIT 10
-#define SRE_CATEGORY_UNI_NOT_DIGIT 11
-#define SRE_CATEGORY_UNI_SPACE 12
-#define SRE_CATEGORY_UNI_NOT_SPACE 13
-#define SRE_CATEGORY_UNI_WORD 14
-#define SRE_CATEGORY_UNI_NOT_WORD 15
-#define SRE_CATEGORY_UNI_LINEBREAK 16
-#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17
-#define SRE_FLAG_TEMPLATE 1
-#define SRE_FLAG_IGNORECASE 2
-#define SRE_FLAG_LOCALE 4
-#define SRE_FLAG_MULTILINE 8
-#define SRE_FLAG_DOTALL 16
-#define SRE_FLAG_UNICODE 32
-#define SRE_FLAG_VERBOSE 64
-#define SRE_INFO_PREFIX 1
-#define SRE_INFO_LITERAL 2
-#define SRE_INFO_CHARSET 4
+#define SRE_OP_ANY_ALL_REV 4
+#define SRE_OP_ANY_REV 5
+#define SRE_OP_ASSERT 6
+#define SRE_OP_ASSERT_NOT 7
+#define SRE_OP_ATOMIC 8
+#define SRE_OP_BOUNDARY 9
+#define SRE_OP_BRANCH 10
+#define SRE_OP_CATEGORY 11
+#define SRE_OP_CATEGORY_REV 12
+#define SRE_OP_CHARSET 13
+#define SRE_OP_CHARSET_IGNORE 14
+#define SRE_OP_CHARSET_IGNORE_REV 15
+#define SRE_OP_CHARSET_REV 16
+#define SRE_OP_END_ASSERT 17
+#define SRE_OP_END_ASSERT_NOT 18
+#define SRE_OP_END_ATOMIC 19
+#define SRE_OP_END_OF_LINE 20
+#define SRE_OP_END_OF_STRING 21
+#define SRE_OP_END_OF_STRING_LN 22
+#define SRE_OP_END_REPEAT_MAX 23
+#define SRE_OP_END_REPEAT_MAX_REV 24
+#define SRE_OP_END_REPEAT_MIN 25
+#define SRE_OP_END_REPEAT_MIN_REV 26
+#define SRE_OP_END_REPEAT_POSS 27
+#define SRE_OP_END_REPEAT_POSS_REV 28
+#define SRE_OP_GROUPREF 29
+#define SRE_OP_GROUPREF_EXISTS 30
+#define SRE_OP_GROUPREF_IGNORE 31
+#define SRE_OP_GROUPREF_IGNORE_REV 32
+#define SRE_OP_GROUPREF_REV 33
+#define SRE_OP_JUMP 34
+#define SRE_OP_LITERAL 35
+#define SRE_OP_LITERAL_IGNORE 36
+#define SRE_OP_LITERAL_IGNORE_REV 37
+#define SRE_OP_LITERAL_REV 38
+#define SRE_OP_LITERAL_STRING 39
+#define SRE_OP_LITERAL_STRING_IGNORE 40
+#define SRE_OP_LITERAL_STRING_IGNORE_REV 41
+#define SRE_OP_LITERAL_STRING_REV 42
+#define SRE_OP_MARK 43
+#define SRE_OP_NOT_BOUNDARY 44
+#define SRE_OP_NOT_CATEGORY 45
+#define SRE_OP_NOT_CATEGORY_REV 46
+#define SRE_OP_NOT_CHARSET 47
+#define SRE_OP_NOT_CHARSET_IGNORE 48
+#define SRE_OP_NOT_CHARSET_IGNORE_REV 49
+#define SRE_OP_NOT_CHARSET_REV 50
+#define SRE_OP_NOT_LITERAL 51
+#define SRE_OP_NOT_LITERAL_IGNORE 52
+#define SRE_OP_NOT_LITERAL_IGNORE_REV 53
+#define SRE_OP_NOT_LITERAL_REV 54
+#define SRE_OP_NOT_RANGE 55
+#define SRE_OP_NOT_RANGE_IGNORE 56
+#define SRE_OP_NOT_RANGE_IGNORE_REV 57
+#define SRE_OP_NOT_RANGE_REV 58
+#define SRE_OP_NOT_SET 59
+#define SRE_OP_NOT_SET_IGNORE 60
+#define SRE_OP_NOT_SET_IGNORE_REV 61
+#define SRE_OP_NOT_SET_REV 62
+#define SRE_OP_RANGE 63
+#define SRE_OP_RANGE_IGNORE 64
+#define SRE_OP_RANGE_IGNORE_REV 65
+#define SRE_OP_RANGE_REV 66
+#define SRE_OP_REPEAT_MAX 67
+#define SRE_OP_REPEAT_MAX_REV 68
+#define SRE_OP_REPEAT_MIN 69
+#define SRE_OP_REPEAT_MIN_REV 70
+#define SRE_OP_REPEAT_ONE_MAX 71
+#define SRE_OP_REPEAT_ONE_MAX_REV 72
+#define SRE_OP_REPEAT_ONE_MIN 73
+#define SRE_OP_REPEAT_ONE_MIN_REV 74
+#define SRE_OP_REPEAT_ONE_POSS 75
+#define SRE_OP_REPEAT_ONE_POSS_REV 76
+#define SRE_OP_REPEAT_POSS 77
+#define SRE_OP_REPEAT_POSS_REV 78
+#define SRE_OP_SET 79
+#define SRE_OP_SET_IGNORE 80
+#define SRE_OP_SET_IGNORE_REV 81
+#define SRE_OP_SET_REV 82
+#define SRE_OP_START_OF_LINE 83
+#define SRE_OP_START_OF_STRING 84
+#define SRE_OP_SUBPATTERN 85
+#define SRE_MAX_OP 85
+
+#define SRE_FLAG_TEMPLATE 0x1
+#define SRE_FLAG_IGNORECASE 0x2
+#define SRE_FLAG_LOCALE 0x4
+#define SRE_FLAG_MULTILINE 0x8
+#define SRE_FLAG_DOTALL 0x10
+#define SRE_FLAG_UNICODE 0x20
+#define SRE_FLAG_VERBOSE 0x40
+#define SRE_FLAG_REVERSE 0x100
+#define SRE_FLAG_ZEROWIDTH 0x200
+
+#define SRE_INFO_PREFIX 0x1
+#define SRE_INFO_LITERAL 0x2
+#define SRE_INFO_CHARSET 0x4
+
+#define SRE_UNI_CAT_Lu 0x1
+#define SRE_UNI_CAT_Ll 0x2
+#define SRE_UNI_CAT_Lt 0x3
+#define SRE_UNI_CAT_Mn 0x4
+#define SRE_UNI_CAT_Mc 0x5
+#define SRE_UNI_CAT_Me 0x6
+#define SRE_UNI_CAT_Nd 0x7
+#define SRE_UNI_CAT_Nl 0x8
+#define SRE_UNI_CAT_No 0x9
+#define SRE_UNI_CAT_Zs 0xA
+#define SRE_UNI_CAT_Zl 0xB
+#define SRE_UNI_CAT_Zp 0xC
+#define SRE_UNI_CAT_Cc 0xD
+#define SRE_UNI_CAT_Cf 0xE
+#define SRE_UNI_CAT_Cs 0xF
+#define SRE_UNI_CAT_Co 0x10
+#define SRE_UNI_CAT_Lm 0x12
+#define SRE_UNI_CAT_Lo 0x13
+#define SRE_UNI_CAT_Pc 0x14
+#define SRE_UNI_CAT_Pd 0x15
+#define SRE_UNI_CAT_Ps 0x16
+#define SRE_UNI_CAT_Pe 0x17
+#define SRE_UNI_CAT_Pi 0x18
+#define SRE_UNI_CAT_Pf 0x19
+#define SRE_UNI_CAT_Po 0x1A
+#define SRE_UNI_CAT_Sm 0x1B
+#define SRE_UNI_CAT_Sc 0x1C
+#define SRE_UNI_CAT_Sk 0x1D
+#define SRE_UNI_CAT_So 0x1E
+
+#define SRE_UNI_CAT_L 0x20
+#define SRE_UNI_CAT_M 0x21
+#define SRE_UNI_CAT_N 0x22
+#define SRE_UNI_CAT_Z 0x23
+#define SRE_UNI_CAT_C 0x24
+#define SRE_UNI_CAT_P 0x25
+#define SRE_UNI_CAT_S 0x26
+
+#define SRE_CAT_Alpha 0x27
+#define SRE_CAT_Alnum 0x28
+#define SRE_CAT_ASCII 0x29
+#define SRE_CAT_Blank 0x2A
+#define SRE_CAT_Cntrl 0x2B
+#define SRE_CAT_Digit 0x2C
+#define SRE_CAT_Graph 0x2D
+#define SRE_CAT_LineBreak 0x2E
+#define SRE_CAT_Lower 0x2F
+#define SRE_CAT_Print 0x30
+#define SRE_CAT_Punct 0x31
+#define SRE_CAT_Space 0x32
+#define SRE_CAT_Upper 0x33
+#define SRE_CAT_Word 0x34
+#define SRE_CAT_XDigit 0x35
+
+#define SRE_UNI_CAT_C_MASK 0x0001E000
+#define SRE_UNI_CAT_L_MASK 0x000C000E
+#define SRE_UNI_CAT_M_MASK 0x00000070
+#define SRE_UNI_CAT_N_MASK 0x00000380
+#define SRE_UNI_CAT_P_MASK 0x07F00000
+#define SRE_UNI_CAT_S_MASK 0x78000000
+#define SRE_UNI_CAT_Z_MASK 0x00001C00
+
+#define SRE_UNI_CAT_MASK_Alnum 0x000C008E
+#define SRE_UNI_CAT_MASK_Alpha 0x000C000E
+#define SRE_UNI_CAT_MASK_Graph 0x7FFC03FE
+#define SRE_UNI_CAT_MASK_Print 0x7FFC1FFE
+#define SRE_UNI_CAT_MASK_Punct 0x7FF00000
+#define SRE_UNI_CAT_MASK_Word  0x001C03FE
+
+// info for operator validation
+typedef struct SRE_OpInfo {
+    int type;
+    int direction;
+    int end_marker;
+} SRE_OpInfo;
+
+#define SRE_TYPE_INVALID 0
+#define SRE_TYPE_ASSERT 1
+#define SRE_TYPE_ATOMIC 2
+#define SRE_TYPE_BRANCH 3
+#define SRE_TYPE_CATEGORY 4
+#define SRE_TYPE_CHARSET 5
+#define SRE_TYPE_GROUPREF 6
+#define SRE_TYPE_GROUPREF_EXISTS 7
+#define SRE_TYPE_LITERAL 8
+#define SRE_TYPE_LITERAL_STRING 9
+#define SRE_TYPE_MARK 10
+#define SRE_TYPE_POSITION 11
+#define SRE_TYPE_RANGE 12
+#define SRE_TYPE_REPEAT 13
+#define SRE_TYPE_REPEAT_ONE 14
+#define SRE_TYPE_SET 15
+#define SRE_TYPE_SIMPLE_CATEGORY 16
+
+static SRE_OpInfo op_info[] = {
+    {0, 0, 0}, // SRE_OP_FAILURE
+    {0, 0, 0}, // SRE_OP_SUCCESS
+    {16, 1, 0}, // SRE_OP_ANY
+    {16, 1, 0}, // SRE_OP_ANY_ALL
+    {16, -1, 0}, // SRE_OP_ANY_ALL_REV
+    {16, -1, 0}, // SRE_OP_ANY_REV
+    {1, 0, SRE_OP_END_ASSERT}, // SRE_OP_ASSERT
+    {1, 0, SRE_OP_END_ASSERT_NOT}, // SRE_OP_ASSERT_NOT
+    {2, 0, SRE_OP_END_ATOMIC}, // SRE_OP_ATOMIC
+    {11, 0, 0}, // SRE_OP_BOUNDARY
+    {3, 0, 0}, // SRE_OP_BRANCH
+    {4, 1, 0}, // SRE_OP_CATEGORY
+    {4, -1, 0}, // SRE_OP_CATEGORY_REV
+    {5, 1, 0}, // SRE_OP_CHARSET
+    {5, 1, 0}, // SRE_OP_CHARSET_IGNORE
+    {5, -1, 0}, // SRE_OP_CHARSET_IGNORE_REV
+    {5, -1, 0}, // SRE_OP_CHARSET_REV
+    {0, 0, 0}, // SRE_OP_END_ASSERT
+    {0, 0, 0}, // SRE_OP_END_ASSERT_NOT
+    {0, 0, 0}, // SRE_OP_END_ATOMIC
+    {11, 0, 0}, // SRE_OP_END_OF_LINE
+    {11, 0, 0}, // SRE_OP_END_OF_STRING
+    {11, 0, 0}, // SRE_OP_END_OF_STRING_LN
+    {0, 1, 0}, // SRE_OP_END_REPEAT_MAX
+    {0, -1, 0}, // SRE_OP_END_REPEAT_MAX_REV
+    {0, 1, 0}, // SRE_OP_END_REPEAT_MIN
+    {0, -1, 0}, // SRE_OP_END_REPEAT_MIN_REV
+    {0, 1, 0}, // SRE_OP_END_REPEAT_POSS
+    {0, -1, 0}, // SRE_OP_END_REPEAT_POSS_REV
+    {6, 1, 0}, // SRE_OP_GROUPREF
+    {7, 0, 0}, // SRE_OP_GROUPREF_EXISTS
+    {6, 1, 0}, // SRE_OP_GROUPREF_IGNORE
+    {6, -1, 0}, // SRE_OP_GROUPREF_IGNORE_REV
+    {6, -1, 0}, // SRE_OP_GROUPREF_REV
+    {0, 0, 0}, // SRE_OP_JUMP
+    {8, 1, 0}, // SRE_OP_LITERAL
+    {8, 1, 0}, // SRE_OP_LITERAL_IGNORE
+    {8, -1, 0}, // SRE_OP_LITERAL_IGNORE_REV
+    {8, -1, 0}, // SRE_OP_LITERAL_REV
+    {9, 1, 0}, // SRE_OP_LITERAL_STRING
+    {9, 1, 0}, // SRE_OP_LITERAL_STRING_IGNORE
+    {9, -1, 0}, // SRE_OP_LITERAL_STRING_IGNORE_REV
+    {9, -1, 0}, // SRE_OP_LITERAL_STRING_REV
+    {10, 0, 0}, // SRE_OP_MARK
+    {11, 0, 0}, // SRE_OP_NOT_BOUNDARY
+    {4, 1, 0}, // SRE_OP_NOT_CATEGORY
+    {4, -1, 0}, // SRE_OP_NOT_CATEGORY_REV
+    {5, 1, 0}, // SRE_OP_NOT_CHARSET
+    {5, 1, 0}, // SRE_OP_NOT_CHARSET_IGNORE
+    {5, -1, 0}, // SRE_OP_NOT_CHARSET_IGNORE_REV
+    {5, -1, 0}, // SRE_OP_NOT_CHARSET_REV
+    {8, 1, 0}, // SRE_OP_NOT_LITERAL
+    {8, 1, 0}, // SRE_OP_NOT_LITERAL_IGNORE
+    {8, -1, 0}, // SRE_OP_NOT_LITERAL_IGNORE_REV
+    {8, -1, 0}, // SRE_OP_NOT_LITERAL_REV
+    {12, 1, 0}, // SRE_OP_NOT_RANGE
+    {12, 1, 0}, // SRE_OP_NOT_RANGE_IGNORE
+    {12, -1, 0}, // SRE_OP_NOT_RANGE_IGNORE_REV
+    {12, -1, 0}, // SRE_OP_NOT_RANGE_REV
+    {15, 1, 0}, // SRE_OP_NOT_SET
+    {15, 1, 0}, // SRE_OP_NOT_SET_IGNORE
+    {15, -1, 0}, // SRE_OP_NOT_SET_IGNORE_REV
+    {15, -1, 0}, // SRE_OP_NOT_SET_REV
+    {12, 1, 0}, // SRE_OP_RANGE
+    {12, 1, 0}, // SRE_OP_RANGE_IGNORE
+    {12, -1, 0}, // SRE_OP_RANGE_IGNORE_REV
+    {12, -1, 0}, // SRE_OP_RANGE_REV
+    {13, 1, SRE_OP_END_REPEAT_MAX}, // SRE_OP_REPEAT_MAX
+    {13, -1, SRE_OP_END_REPEAT_MAX}, // SRE_OP_REPEAT_MAX_REV
+    {13, 1, SRE_OP_END_REPEAT_MIN}, // SRE_OP_REPEAT_MIN
+    {13, -1, SRE_OP_END_REPEAT_MIN}, // SRE_OP_REPEAT_MIN_REV
+    {14, 1, 0}, // SRE_OP_REPEAT_ONE_MAX
+    {14, -1, 0}, // SRE_OP_REPEAT_ONE_MAX_REV
+    {14, 1, 0}, // SRE_OP_REPEAT_ONE_MIN
+    {14, -1, 0}, // SRE_OP_REPEAT_ONE_MIN_REV
+    {14, 1, 0}, // SRE_OP_REPEAT_ONE_POSS
+    {14, -1, 0}, // SRE_OP_REPEAT_ONE_POSS_REV
+    {13, 1, SRE_OP_END_REPEAT_POSS}, // SRE_OP_REPEAT_POSS
+    {13, -1, SRE_OP_END_REPEAT_POSS}, // SRE_OP_REPEAT_POSS_REV
+    {15, 1, 0}, // SRE_OP_SET
+    {15, 1, 0}, // SRE_OP_SET_IGNORE
+    {15, -1, 0}, // SRE_OP_SET_IGNORE_REV
+    {15, -1, 0}, // SRE_OP_SET_REV
+    {11, 0, 0}, // SRE_OP_START_OF_LINE
+    {11, 0, 0}, // SRE_OP_START_OF_STRING
+    {0, 0, 0}, // SRE_OP_SUBPATTERN
+};
=== modified file Modules/_sre.c
--- Modules/_sre.c	2008-09-10 14:27:00 +0000
+++ Modules/_sre.c	2009-02-03 17:29:46 +0000
@@ -4,24 +4,25 @@
  * regular expression matching engine
  *
  * partial history:
- * 1999-10-24 fl  created (based on existing template matcher code)
- * 2000-03-06 fl  first alpha, sort of
- * 2000-08-01 fl  fixes for 1.6b1
- * 2000-08-07 fl  use PyOS_CheckStack() if available
- * 2000-09-20 fl  added expand method
- * 2001-03-20 fl  lots of fixes for 2.1b2
- * 2001-04-15 fl  export copyright as Python attribute, not global
- * 2001-04-28 fl  added __copy__ methods (work in progress)
- * 2001-05-14 fl  fixes for 1.5.2 compatibility
- * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
- * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
- * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
- * 2001-10-21 fl  added sub/subn primitive
- * 2001-10-24 fl  added finditer primitive (for 2.2 only)
- * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
- * 2002-11-09 fl  fixed empty sub/subn return type
- * 2003-04-18 mvl fully support 4-byte codes
- * 2003-10-17 gn  implemented non recursive scheme
+ * 1999-10-24 fl   created (based on existing template matcher code)
+ * 2000-03-06 fl   first alpha, sort of
+ * 2000-08-01 fl   fixes for 1.6b1
+ * 2000-08-07 fl   use PyOS_CheckStack() if available
+ * 2000-09-20 fl   added expand method
+ * 2001-03-20 fl   lots of fixes for 2.1b2
+ * 2001-04-15 fl   export copyright as Python attribute, not global
+ * 2001-04-28 fl   added __copy__ methods (work in progress)
+ * 2001-05-14 fl   fixes for 1.5.2 compatibility
+ * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
+ * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
+ * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
+ * 2001-10-21 fl   added sub/subn primitive
+ * 2001-10-24 fl   added finditer primitive (for 2.2 only)
+ * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
+ * 2002-11-09 fl   fixed empty sub/subn return type
+ * 2003-04-18 mvl  fully support 4-byte codes
+ * 2003-10-17 gn   implemented non recursive scheme
+ * 2008-09-21 mrab major reworking
  *
  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  *
@@ -55,11 +56,14 @@
 
 #define SRE_PY_MODULE "re"
 
-/* defining this one enables tracing */
-#undef VERBOSE
+/* uncomment this define to enable tracing */
+//#define VERBOSE_SRE_ENGINE
+
+//#define DEBUG_TRACE(v) printf v
+#define DEBUG_TRACE(v)
 
 #if PY_VERSION_HEX >= 0x01060000
-#if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
+#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
 /* defining this enables unicode support (default under 1.6a1 and later) */
 #define HAVE_UNICODE
 #endif
@@ -68,9 +72,6 @@
 /* -------------------------------------------------------------------- */
 /* optional features */
 
-/* enables fast searching */
-#define USE_FAST_SEARCH
-
 /* enables aggressive inlining (always on for Visual C) */
 #undef USE_INLINE
 
@@ -95,13 +96,13 @@
 #endif
 
 /* error codes */
-#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
-#define SRE_ERROR_STATE -2 /* illegal state */
+#define SRE_ERROR_ILLEGAL -1         /* illegal opcode */
+#define SRE_ERROR_STATE -2           /* illegal state */
 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
-#define SRE_ERROR_MEMORY -9 /* out of memory */
-#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
+#define SRE_ERROR_MEMORY -9          /* out of memory */
+#define SRE_ERROR_INTERRUPTED -10    /* signal handler raised exception */
 
-#if defined(VERBOSE)
+#if defined(VERBOSE_SRE_ENGINE)
 #define TRACE(v) printf v
 #else
 #define TRACE(v)
@@ -110,219 +111,408 @@
 /* -------------------------------------------------------------------- */
 /* search engine state */
 
-/* default character predicates (run sre_chars.py to regenerate tables) */
+typedef struct {
+    const unsigned char category;         /* index into _PyUnicode_CategoryNames */
+    const unsigned char combining;        /* combining class value 0 - 255 */
+    const unsigned char bidirectional;    /* index into _PyUnicode_BidirectionalNames */
+    const unsigned char mirrored;         /* true if mirrored in bidir mode */
+    const unsigned char east_asian_width; /* index into _PyUnicode_EastAsianWidth */
+} _PyUnicode_DatabaseRecord;
+
+typedef struct change_record {
+    const unsigned char bidir_changed;
+    const unsigned char category_changed;
+    const unsigned char decimal_changed;
+    const unsigned char mirrored_changed;
+    const int numeric_changed;
+} change_record;
+
+#include "unicodedata_db.h"
+
+static const unsigned char get_unicode_category(Py_UCS4 code) {
+    int index;
+    if (code >= 0x110000)
+        index = 0;
+    else {
+        index = index1[(code >> SHIFT)];
+        index = index2[(index << SHIFT) + (code & ((1 << SHIFT) - 1))];
+    }
+
+    return _PyUnicode_Database_Records[index].category;
+}
 
-#define SRE_DIGIT_MASK 1
-#define SRE_SPACE_MASK 2
-#define SRE_LINEBREAK_MASK 4
-#define SRE_ALNUM_MASK 8
-#define SRE_WORD_MASK 16
-
-/* FIXME: this assumes ASCII.  create tables in init_sre() instead */
-
-static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
-25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
-0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
-
-static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
-10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
-61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
-108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
-122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
-106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-120, 121, 122, 123, 124, 125, 126, 127 };
-
-#define SRE_IS_DIGIT(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
-#define SRE_IS_SPACE(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
-#define SRE_IS_LINEBREAK(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
-#define SRE_IS_ALNUM(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
-#define SRE_IS_WORD(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
+/* ASCII */
 
-static unsigned int sre_lower(unsigned int ch)
-{
-    return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
+#define SRE_ASCII_MAX 0x7F
+
+#define SRE_BLANK_MASK 0x001
+#define SRE_DIGIT_MASK 0x002
+#define SRE_GRAPH_MASK 0x004
+#define SRE_LOWER_MASK 0x008
+#define SRE_PRINT_MASK 0x010
+#define SRE_PUNCT_MASK 0x020
+#define SRE_UNDERSCORE_MASK 0x040
+#define SRE_UPPER_MASK 0x080
+#define SRE_XDIGIT_MASK 0x100
+#define SRE_WHITESPACE_MASK 0x200
+
+static short sre_ascii_info[SRE_ASCII_MAX + 1] = {
+0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x201, 0x200, 0x200, 0x200, 0x200, 0x000, 0x000,
+0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x200, 0x200, 0x200, 0x200,
+0x211, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034,
+0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034,
+0x034, 0x194, 0x194, 0x194, 0x194, 0x194, 0x194, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094,
+0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x034, 0x034, 0x034, 0x034, 0x074,
+0x034, 0x11C, 0x11C, 0x11C, 0x11C, 0x11C, 0x11C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C,
+0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x034, 0x034, 0x034, 0x034, 0x000,
+};
+
+static BOOL ascii_in_category(SRE_CODE category, SRE_CODE ch) {
+    if (ch > SRE_ASCII_MAX)
+        return FALSE;
+
+    switch(category) {
+    case SRE_CAT_Alnum:
+        return (sre_ascii_info[ch] & (SRE_DIGIT_MASK | SRE_LOWER_MASK | SRE_UPPER_MASK)) != 0;
+    case SRE_CAT_Alpha:
+        return (sre_ascii_info[ch] & (SRE_LOWER_MASK | SRE_UPPER_MASK)) != 0;
+    case SRE_CAT_ASCII:
+        return TRUE;
+    case SRE_CAT_Blank:
+        return (sre_ascii_info[ch] & SRE_BLANK_MASK) != 0;
+    case SRE_CAT_Cntrl:
+        return (sre_ascii_info[ch] & SRE_PRINT_MASK) == 0;
+    case SRE_CAT_Digit:
+        return (sre_ascii_info[ch] & SRE_DIGIT_MASK) != 0;
+    case SRE_CAT_Graph:
+        return (sre_ascii_info[ch] & SRE_GRAPH_MASK) != 0;
+    case SRE_CAT_LineBreak:
+        return ch == '\n';
+    case SRE_CAT_Lower:
+        return (sre_ascii_info[ch] & SRE_LOWER_MASK) != 0;
+    case SRE_CAT_Print:
+        return (sre_ascii_info[ch] & SRE_PRINT_MASK) != 0;
+    case SRE_CAT_Punct:
+        return (sre_ascii_info[ch] & SRE_PUNCT_MASK) != 0;
+    case SRE_CAT_Space:
+        return (sre_ascii_info[ch] & SRE_WHITESPACE_MASK) != 0;
+    case SRE_CAT_Upper:
+        return (sre_ascii_info[ch] & SRE_UPPER_MASK) != 0;
+    case SRE_CAT_Word:
+        return (sre_ascii_info[ch] & (SRE_DIGIT_MASK | SRE_LOWER_MASK | SRE_UPPER_MASK | SRE_UNDERSCORE_MASK)) != 0;
+    case SRE_CAT_XDigit:
+        return (sre_ascii_info[ch] & SRE_XDIGIT_MASK) != 0;
+    default:
+        return FALSE;
+    }
 }
 
-/* locale-specific character predicates */
-/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
- * warnings when c's type supports only numbers < N+1 */
-#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
-#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
-#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
-#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
-#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
+static SRE_CODE ascii_lower(SRE_CODE ch) {
+    return ch <= SRE_ASCII_MAX && (sre_ascii_info[ch] & SRE_UPPER_MASK) ? ch ^ 0x20 : ch;
+}
 
-static unsigned int sre_lower_locale(unsigned int ch)
-{
-    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
+static SRE_CODE ascii_upper(SRE_CODE ch) {
+    return ch <= SRE_ASCII_MAX && (sre_ascii_info[ch] & SRE_LOWER_MASK) ? ch ^ 0x20 : ch;
 }
 
-/* unicode-specific character predicates */
+static SRE_ENCODING_TABLE ascii_encoding = {
+ascii_in_category,
+ascii_lower,
+ascii_upper,
+ascii_upper,
+};
 
-#if defined(HAVE_UNICODE)
+/* locale-specific */
 
-#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
-#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
-#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
-#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
-#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
+#define SRE_LOC_MAX 0xFF
 
-static unsigned int sre_lower_unicode(unsigned int ch)
-{
-    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
+static BOOL loc_in_category(SRE_CODE category, SRE_CODE ch) {
+    if (ch > SRE_LOC_MAX)
+        return FALSE;
+
+    switch (category) {
+    case SRE_CAT_Alnum:
+        return isalnum(ch);
+    case SRE_CAT_Alpha:
+        return isalpha(ch);
+    case SRE_CAT_ASCII:
+        return ch <= SRE_ASCII_MAX;
+    case SRE_CAT_Blank:
+        return ch == '\t' || ch == ' ';
+    case SRE_CAT_Cntrl:
+        return !isprint(ch);
+    case SRE_CAT_Digit:
+        return isdigit(ch);
+    case SRE_CAT_Graph:
+        return isgraph(ch);
+    case SRE_CAT_LineBreak:
+        return ch == '\n';
+    case SRE_CAT_Lower:
+        return islower(ch);
+    case SRE_CAT_Print:
+        return isprint(ch);
+    case SRE_CAT_Punct:
+        return ispunct(ch);
+    case SRE_CAT_Space:
+        return isspace(ch);
+    case SRE_CAT_Upper:
+        return isupper(ch);
+    case SRE_CAT_Word:
+        return ch == '_' || isalnum(ch);
+    case SRE_CAT_XDigit:
+        return isxdigit(ch);
+    default:
+        return FALSE;
+    }
 }
 
-#endif
+static SRE_CODE loc_lower(SRE_CODE ch) {
+    return ch <= SRE_LOC_MAX ? (SRE_CODE)tolower(ch) : ch;
+}
 
-LOCAL(int)
-sre_category(SRE_CODE category, unsigned int ch)
-{
-    switch (category) {
+static SRE_CODE loc_upper(SRE_CODE ch) {
+    return ch <= SRE_LOC_MAX ? (SRE_CODE)toupper(ch) : ch;
+}
 
-    case SRE_CATEGORY_DIGIT:
-        return SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_NOT_DIGIT:
-        return !SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_SPACE:
-        return SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_NOT_SPACE:
-        return !SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_WORD:
-        return SRE_IS_WORD(ch);
-    case SRE_CATEGORY_NOT_WORD:
-        return !SRE_IS_WORD(ch);
-    case SRE_CATEGORY_LINEBREAK:
-        return SRE_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_NOT_LINEBREAK:
-        return !SRE_IS_LINEBREAK(ch);
-
-    case SRE_CATEGORY_LOC_WORD:
-        return SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_LOC_NOT_WORD:
-        return !SRE_LOC_IS_WORD(ch);
+static SRE_ENCODING_TABLE locale_encoding = {
+loc_in_category,
+loc_lower,
+loc_upper,
+loc_upper,
+};
 
-#if defined(HAVE_UNICODE)
-    case SRE_CATEGORY_UNI_DIGIT:
-        return SRE_UNI_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_NOT_DIGIT:
-        return !SRE_UNI_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_SPACE:
-        return SRE_UNI_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_NOT_SPACE:
-        return !SRE_UNI_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_WORD:
-        return SRE_UNI_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_NOT_WORD:
-        return !SRE_UNI_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_LINEBREAK:
-        return SRE_UNI_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-        return !SRE_UNI_IS_LINEBREAK(ch);
-#else
-    case SRE_CATEGORY_UNI_DIGIT:
-        return SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_NOT_DIGIT:
-        return !SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_SPACE:
-        return SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_NOT_SPACE:
-        return !SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_WORD:
-        return SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_NOT_WORD:
-        return !SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_LINEBREAK:
-        return SRE_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-        return !SRE_IS_LINEBREAK(ch);
-#endif
+/* unicode */
+
+static BOOL uni_in_category(SRE_CODE category, SRE_CODE ch) {
+    int cat = get_unicode_category(ch);
+    if (category < 0x20)
+        return cat == category;
+
+    switch (category) {
+    case SRE_UNI_CAT_L:
+        return (SRE_UNI_CAT_L_MASK & (1 << cat)) != 0;
+    case SRE_UNI_CAT_M:
+        return (SRE_UNI_CAT_M_MASK & (1 << cat)) != 0;
+    case SRE_UNI_CAT_N:
+        return (SRE_UNI_CAT_N_MASK & (1 << cat)) != 0;
+    case SRE_UNI_CAT_Z:
+        return (SRE_UNI_CAT_Z_MASK & (1 << cat)) != 0;
+    case SRE_UNI_CAT_C:
+        return (SRE_UNI_CAT_C_MASK & (1 << cat)) != 0;
+    case SRE_UNI_CAT_P:
+        return (SRE_UNI_CAT_P_MASK & (1 << cat)) != 0;
+    case SRE_UNI_CAT_S:
+        return (SRE_UNI_CAT_S_MASK & (1 << cat)) != 0;
+    case SRE_CAT_Alnum:
+        return (SRE_UNI_CAT_MASK_Alnum & (1 << cat)) != 0;
+    case SRE_CAT_Alpha:
+        return (SRE_UNI_CAT_MASK_Alpha & (1 << cat)) != 0;
+    case SRE_CAT_ASCII:
+        return ch <= SRE_ASCII_MAX;
+    case SRE_CAT_Blank:
+        return ch == '\t' || cat == SRE_UNI_CAT_Zs;
+    case SRE_CAT_Cntrl:
+        return cat == SRE_UNI_CAT_Cc;
+    case SRE_CAT_Digit:
+        return cat == SRE_UNI_CAT_Nd;
+    case SRE_CAT_Graph:
+        return (SRE_UNI_CAT_MASK_Graph & (1 << cat)) != 0;
+    case SRE_CAT_LineBreak:
+        return ch == '\n';
+    case SRE_CAT_Lower:
+        return cat == SRE_UNI_CAT_Ll;
+    case SRE_CAT_Print:
+        return (SRE_UNI_CAT_MASK_Print & (1 << cat)) != 0;
+    case SRE_CAT_Punct:
+        return (SRE_UNI_CAT_MASK_Punct & (1 << cat)) != 0;
+    case SRE_CAT_Space:
+        return ch == '\t' || ch == '\r' || ch == '\n' || ch == '\v' || ch == '\f' || (SRE_UNI_CAT_Z_MASK & (1 << cat)) != 0;
+    case SRE_CAT_Upper:
+        return cat == SRE_UNI_CAT_Lu;
+    case SRE_CAT_Word:
+        return (SRE_UNI_CAT_MASK_Word & (1 << cat)) != 0;
+    case SRE_CAT_XDigit:
+        return ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f';
+    default:
+        return FALSE;
     }
-    return 0;
 }
 
-/* helpers */
+static SRE_CODE uni_lower(SRE_CODE ch) {
+    return (SRE_CODE)Py_UNICODE_TOLOWER((Py_UNICODE)ch);
+}
 
-static void
-data_stack_dealloc(SRE_STATE* state)
-{
-    if (state->data_stack) {
-        PyMem_FREE(state->data_stack);
-        state->data_stack = NULL;
-    }
-    state->data_stack_size = state->data_stack_base = 0;
+static SRE_CODE uni_upper(SRE_CODE ch) {
+    return (SRE_CODE)Py_UNICODE_TOUPPER((Py_UNICODE)ch);
 }
 
-static int
-data_stack_grow(SRE_STATE* state, Py_ssize_t size)
-{
-    Py_ssize_t minsize, cursize;
-    minsize = state->data_stack_base+size;
-    cursize = state->data_stack_size;
-    if (cursize < minsize) {
-        void* stack;
-        cursize = minsize+minsize/4+1024;
-        TRACE(("allocate/grow stack %d\n", cursize));
-        stack = PyMem_REALLOC(state->data_stack, cursize);
-        if (!stack) {
-            data_stack_dealloc(state);
-            return SRE_ERROR_MEMORY;
+static SRE_CODE uni_title(SRE_CODE ch) {
+    return (SRE_CODE)Py_UNICODE_TOTITLE((Py_UNICODE)ch);
+}
+
+static SRE_ENCODING_TABLE sre_unicode_encoding = {
+uni_in_category,
+uni_lower,
+uni_upper,
+uni_title,
+};
+
+LOCAL(unsigned int) sre_min(unsigned int x, unsigned int y) {
+    return x <= y ? x : y;
+}
+
+LOCAL(unsigned int) sre_max(unsigned int x, unsigned int y) {
+    return x >= y ? x : y;
+}
+
+LOCAL(BOOL) in_charset(SRE_CODE* charset, SRE_CODE ch) {
+    // Charset format: max_char indexes... chunks...
+    int hi_byte = ch / 256;
+    int lo_byte = ch % 256;
+    int index;
+    SRE_CODE* chunk;
+    // Check against the maximum character code in the charset.
+    if (ch > charset[0])
+        return FALSE;
+    // Get the chunk index (2 x 16-bit indexes in each codeword).
+    index = (charset[1 + hi_byte / 2] >> ((hi_byte % 2) * 16)) & 0xFFFF;
+    // Get the chunk.
+    chunk = charset + 1 + charset[0] / 256 / 2 + 1 + index * (256 / SRE_BITS_PER_CODE);
+    return ((chunk[lo_byte / SRE_BITS_PER_CODE] >> (lo_byte % SRE_BITS_PER_CODE)) & 0x1) != 0;
+}
+
+LOCAL(BOOL) in_charset_ignore(SRE_STATE* state, SRE_CODE* charset, SRE_CODE ch) {
+    return in_charset(charset, state->encoding->lower(ch)) ||
+        in_charset(charset, state->encoding->upper(ch)) ||
+        in_charset(charset, state->encoding->title(ch));
+}
+
+LOCAL(BOOL) in_range(SRE_CODE ch, SRE_CODE lower, SRE_CODE upper) {
+    return lower <= ch && ch <= upper;
+}
+
+LOCAL(BOOL) in_range_ignore(SRE_STATE* state, SRE_CODE ch, SRE_CODE lower, SRE_CODE upper) {
+    return in_range(state->encoding->lower(ch), lower, upper) ||
+        in_range(state->encoding->upper(ch), lower, upper) ||
+        in_range(state->encoding->title(ch), lower, upper);
+}
+
+LOCAL(BOOL) in_set(SRE_STATE* state, SRE_CODE* charset, SRE_CODE ch) {
+    // Check if character is a member of the given set.
+    SRE_CODE* charset_end = charset + charset[0];
+
+    charset++;
+
+    do {
+        switch (charset[0]) {
+        case SRE_OP_CATEGORY:
+            // <CATEGORY> <category>
+            if (state->encoding->in_category(charset[1], ch))
+                return TRUE;
+            charset += 2;
+            break;
+        case SRE_OP_CHARSET:
+            // <CHARSET> <skip> <charset>
+            if (in_charset(charset + 2, ch))
+                return TRUE;
+            charset += 1 + charset[1];
+            break;
+        case SRE_OP_LITERAL:
+            // <LITERAL> <code>
+            if (ch == charset[1])
+                return TRUE;
+            charset += 2;
+            break;
+        case SRE_OP_RANGE:
+            // <RANGE> <lower> <upper>
+            if (in_range(ch, charset[1], charset[2]))
+                return TRUE;
+            charset += 3;
+            break;
+        default:
+            /* internal error -- there's not much we can do about it
+               here, so let's just pretend it didn't match... */
+            return FALSE;
         }
-        state->data_stack = (char *)stack;
-        state->data_stack_size = cursize;
-    }
-    return 0;
+    } while (charset < charset_end);
+
+    return FALSE;
 }
 
-/* generate 8-bit version */
+LOCAL(BOOL) in_set_ignore(SRE_STATE* state, SRE_CODE* charset, SRE_CODE ch) {
+    return in_set(state, charset, state->encoding->lower(ch)) ||
+        in_set(state, charset, state->encoding->upper(ch)) ||
+        in_set(state, charset, state->encoding->title(ch));
+}
+
+LOCAL(BOOL) same_char_ignore(SRE_STATE* state, SRE_CODE ch_1, SRE_CODE ch_2) {
+    return state->encoding->lower(ch_1) == state->encoding->lower(ch_2) ||
+        state->encoding->upper(ch_1) == state->encoding->upper(ch_2) ||
+        state->encoding->title(ch_1) == state->encoding->title(ch_2);
+}
+
+/* generate bytestring version */
 
 #define SRE_CHAR unsigned char
-#define SRE_AT sre_at
-#define SRE_COUNT sre_count
-#define SRE_CHARSET sre_charset
-#define SRE_INFO sre_info
-#define SRE_MATCH sre_match
-#define SRE_MATCH_CONTEXT sre_match_context
-#define SRE_SEARCH sre_search
-#define SRE_LITERAL_TEMPLATE sre_literal_template
+#define SRE_MATCH sre_bmatch
+#define SRE_SEARCH sre_bsearch
+#define SRE_LITERAL_TEMPLATE sre_bliteral_template
+#define SRE_AT_BOUNDARY sre_bat_boundary
+#define SRE_CONTEXT sre_bcontext
+#define SRE_SAVE_BACKTRACK sre_bsave_backtrack
+#define SRE_DISCARD_BACKTRACK sre_bdiscard_backtrack
+#define SRE_REFRESH_MARKS sre_brefresh_marks
+#define SRE_DISCARD_UNTIL sre_bdiscard_until
+#define SRE_CLEANUP sre_bcleanup
+#define SRE_POSSIBLE_MATCH_AHEAD sre_bpossible_match_ahead
+#define SRE_MATCH_MANY sre_bmatch_many
+#define SRE_MATCH_UNTIL_TAIL sre_bmatch_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL sre_bunmatch_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL_REV sre_bunmatch_until_tail_rev
+#define SRE_PRINT_TEXT sre_bprint_text
 
 #if defined(HAVE_UNICODE)
 
 #define SRE_RECURSIVE
 #include "_sre.c"
+#undef SRE_PRINT_TEXT
+#undef SRE_UNMATCH_UNTIL_TAIL_REV
+#undef SRE_UNMATCH_UNTIL_TAIL
+#undef SRE_MATCH_UNTIL_TAIL
+#undef SRE_MATCH_MANY
+#undef SRE_POSSIBLE_MATCH_AHEAD
+#undef SRE_CLEANUP
+#undef SRE_DISCARD_UNTIL
+#undef SRE_REFRESH_MARKS
+#undef SRE_DISCARD_BACKTRACK
+#undef SRE_SAVE_BACKTRACK
+#undef SRE_CONTEXT
 #undef SRE_RECURSIVE
-
+#undef SRE_AT_BOUNDARY
 #undef SRE_LITERAL_TEMPLATE
 #undef SRE_SEARCH
 #undef SRE_MATCH
-#undef SRE_MATCH_CONTEXT
-#undef SRE_INFO
-#undef SRE_CHARSET
-#undef SRE_COUNT
-#undef SRE_AT
 #undef SRE_CHAR
 
-/* generate 16-bit unicode version */
+/* generate unicode version */
 
 #define SRE_CHAR Py_UNICODE
-#define SRE_AT sre_uat
-#define SRE_COUNT sre_ucount
-#define SRE_CHARSET sre_ucharset
-#define SRE_INFO sre_uinfo
 #define SRE_MATCH sre_umatch
-#define SRE_MATCH_CONTEXT sre_umatch_context
 #define SRE_SEARCH sre_usearch
 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
+#define SRE_AT_BOUNDARY sre_uat_boundary
+#define SRE_CONTEXT sre_ucontext
+#define SRE_SAVE_BACKTRACK sre_usave_backtrack
+#define SRE_DISCARD_BACKTRACK sre_udiscard_backtrack
+#define SRE_REFRESH_MARKS sre_urefresh_marks
+#define SRE_DISCARD_UNTIL sre_udiscard_until
+#define SRE_CLEANUP sre_ucleanup
+#define SRE_POSSIBLE_MATCH_AHEAD sre_upossible_match_ahead
+#define SRE_MATCH_MANY sre_umatch_many
+#define SRE_MATCH_UNTIL_TAIL sre_umatch_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL sre_uunmatch_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL_REV sre_uunmatch_until_tail_rev
+#define SRE_PRINT_TEXT sre_uprint_text
 #endif
 
 #endif /* SRE_RECURSIVE */
@@ -333,1295 +523,3030 @@
 /* the following section is compiled twice, with different character
    settings */
 
-LOCAL(int)
-SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
-{
-    /* check if pointer is at given position */
-
-    Py_ssize_t thisp, thatp;
+typedef struct SRE_CONTEXT {
+    SRE_CHAR* text_beginning;  // The true start of the text.
+    SRE_CHAR* text_ptr;
+    SRE_CHAR* text_start;      // The start of the text to search/match.
+    SRE_CHAR* text_end;        // The end of the text to search/match; also the true end, or treated as such.
+    SRE_CHAR* final_linebreak; // The position of the final linebreak if it's final, otherwise NULL.
+    SRE_CODE* pattern_ptr;
+    SRE_CHAR** marks;          // All the numbered and named marks (start and end of numbered and named groups).
+    int marks_size;            // The total size of numbered and named text mark pointers.
+    SRE_BACKTRACK_CHUNK* backtrack_chunk;
+    SRE_BACKTRACK_ITEM* backtrack_item;
+} SRE_CONTEXT;
+
+LOCAL(int) SRE_CLEANUP(SRE_CONTEXT* context, SRE_STATE* state, int result) {
+    SRE_BACKTRACK_CHUNK* current;
+    SRE_BACKTRACK_ITEM* item;
+
+    current = context->backtrack_chunk;
+    while (current->previous != NULL) {
+        SRE_BACKTRACK_CHUNK* previous = current->previous;
+
+        for(item = current->items; item < &current->items[current->count]; item++) {
+            if (item->marks != NULL)
+                PyMem_FREE(item->marks);
+        }
 
-    switch (at) {
+        PyMem_FREE(current);
+        current = previous;
+    }
 
-    case SRE_AT_BEGINNING:
-    case SRE_AT_BEGINNING_STRING:
-        return ((void*) ptr == state->beginning);
+    for(item = current->items; item < &current->items[current->count]; item++) {
+        if (item->marks != NULL)
+            PyMem_FREE(item->marks);
+    }
 
-    case SRE_AT_BEGINNING_LINE:
-        return ((void*) ptr == state->beginning ||
-                SRE_IS_LINEBREAK((int) ptr[-1]));
+    current->count = 0;
 
-    case SRE_AT_END:
-        return (((void*) (ptr+1) == state->end &&
-                 SRE_IS_LINEBREAK((int) ptr[0])) ||
-                ((void*) ptr == state->end));
+    context->backtrack_chunk = current;
+    state->backtrack_chunk = current;
 
-    case SRE_AT_END_LINE:
-        return ((void*) ptr == state->end ||
-                SRE_IS_LINEBREAK((int) ptr[0]));
+    return result;
+}
 
-    case SRE_AT_END_STRING:
-        return ((void*) ptr == state->end);
+LOCAL(int) SRE_SAVE_BACKTRACK(SRE_CONTEXT* context, int op, BOOL save_marks) {
+    SRE_BACKTRACK_ITEM* backtrack_item;
 
-    case SRE_AT_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
+    if (context->backtrack_chunk->count >= SRE_BACKTRACK_CHUNK_SIZE) {
+        SRE_BACKTRACK_CHUNK* new_backtrack_chunk = (SRE_BACKTRACK_CHUNK*)PyMem_MALLOC(sizeof(SRE_BACKTRACK_CHUNK));
+        if (new_backtrack_chunk == NULL)
+            return SRE_ERROR_MEMORY;
 
-    case SRE_AT_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
+        new_backtrack_chunk->previous = context->backtrack_chunk;
+        new_backtrack_chunk->count = 0;
+        context->backtrack_chunk = new_backtrack_chunk;
+    }
 
-    case SRE_AT_LOC_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
+    backtrack_item = &context->backtrack_chunk->items[context->backtrack_chunk->count++];
+    backtrack_item->op = op;
+    if (save_marks && context->marks_size > 0) {
+        backtrack_item->marks = PyMem_MALLOC(context->marks_size);
+        if (backtrack_item->marks == NULL)
+            return SRE_ERROR_MEMORY;
+    } else
+        backtrack_item->marks = NULL;
+    context->backtrack_item = backtrack_item;
 
-    case SRE_AT_LOC_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
+    return 0;
+}
 
-#if defined(HAVE_UNICODE)
-    case SRE_AT_UNI_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
+LOCAL(void) SRE_DISCARD_BACKTRACK(SRE_CONTEXT* context) {
+    SRE_BACKTRACK_CHUNK* chunk = context->backtrack_chunk;
+    SRE_BACKTRACK_ITEM* item = &chunk->items[--chunk->count];
 
-    case SRE_AT_UNI_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-#endif
+    if (item->marks != NULL)
+        PyMem_FREE(item->marks);
 
+    if (chunk->count == 0 && chunk->previous != NULL) {
+        SRE_BACKTRACK_CHUNK* previous = chunk->previous;
+        PyMem_FREE(chunk);
+        context->backtrack_chunk = previous;
     }
 
-    return 0;
+    context->backtrack_item = item;
 }
 
-LOCAL(int)
-SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
-{
-    /* check if character is a member of the given set */
-
-    int ok = 1;
+LOCAL(void) SRE_DISCARD_UNTIL(SRE_CONTEXT* context, int op) {
+    SRE_BACKTRACK_ITEM* item;
 
     for (;;) {
-        switch (*set++) {
-
-        case SRE_OP_FAILURE:
-            return !ok;
-
-        case SRE_OP_LITERAL:
-            /* <LITERAL> <code> */
-            if (ch == set[0])
-                return ok;
-            set++;
+        SRE_BACKTRACK_CHUNK* chunk = context->backtrack_chunk;
+        item = &chunk->items[chunk->count - 1];
+        if (item->op == op)
             break;
+        SRE_DISCARD_BACKTRACK(context);
+    }
 
-        case SRE_OP_CATEGORY:
-            /* <CATEGORY> <code> */
-            if (sre_category(set[0], (int) ch))
-                return ok;
-            set += 1;
-            break;
+    context->backtrack_item = item;
+}
 
-        case SRE_OP_CHARSET:
-            if (sizeof(SRE_CODE) == 2) {
-                /* <CHARSET> <bitmap> (16 bits per code word) */
-                if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
-                    return ok;
-                set += 16;
-            }
-            else {
-                /* <CHARSET> <bitmap> (32 bits per code word) */
-                if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
-                    return ok;
-                set += 8;
-            }
-            break;
+LOCAL(BOOL) SRE_AT_BOUNDARY(SRE_CONTEXT* context, SRE_STATE* state) {
+    int before = context->text_ptr > context->text_beginning && state->encoding->in_category(SRE_CAT_Word, context->text_ptr[-1]);
+    int after = context->text_ptr < context->text_end && state->encoding->in_category(SRE_CAT_Word, context->text_ptr[0]);
+    return before != after;
+}
 
-        case SRE_OP_RANGE:
-            /* <RANGE> <lower> <upper> */
-            if (set[0] <= ch && ch <= set[1])
-                return ok;
-            set += 2;
-            break;
-
-        case SRE_OP_NEGATE:
-            ok = !ok;
-            break;
-
-        case SRE_OP_BIGCHARSET:
-            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
-        {
-            Py_ssize_t count, block;
-            count = *(set++);
-
-            if (sizeof(SRE_CODE) == 2) {
-                block = ((unsigned char*)set)[ch >> 8];
-                set += 128;
-                if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
-                    return ok;
-                set += count*16;
-            }
-            else {
-                /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
-                 * warnings when c's type supports only numbers < N+1 */
-                if (!(ch & ~65535))
-                    block = ((unsigned char*)set)[ch >> 8];
-                else
-                    block = -1;
-                set += 64;
-                if (block >=0 &&
-                    (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
-                    return ok;
-                set += count*8;
-            }
-            break;
-        }
+#define SRE_MARK_OP_SIZE 3
 
-        default:
-            /* internal error -- there's not much we can do about it
-               here, so let's just pretend it didn't match... */
-            return 0;
-        }
+// Look ahead to see whether it could match. Returns 0 if couldn't match.
+LOCAL(BOOL) SRE_POSSIBLE_MATCH_AHEAD(SRE_CONTEXT* context, SRE_STATE* state, SRE_CODE* tail) {
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    switch (tail[0]) {
+    case SRE_OP_ANY:
+        return !state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]);
+    case SRE_OP_ANY_ALL:
+        return TRUE;
+    case SRE_OP_ANY_ALL_REV:
+        return TRUE;
+    case SRE_OP_ANY_REV:
+        return !state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]);
+    case SRE_OP_BOUNDARY:
+        return SRE_AT_BOUNDARY(context, state);
+    case SRE_OP_CATEGORY:
+        return state->encoding->in_category(tail[1], context->text_ptr[0]);
+    case SRE_OP_CATEGORY_REV:
+        return state->encoding->in_category(tail[1], context->text_ptr[-1]);
+    case SRE_OP_CHARSET:
+        return in_charset(tail + 2, context->text_ptr[0]);
+    case SRE_OP_CHARSET_IGNORE:
+        return in_charset_ignore(state, tail + 2, context->text_ptr[0]);
+    case SRE_OP_CHARSET_IGNORE_REV:
+        return in_charset_ignore(state, tail + 2, context->text_ptr[-1]);
+    case SRE_OP_CHARSET_REV:
+        return in_charset(tail + 2, context->text_ptr[-1]);
+    case SRE_OP_END_OF_LINE:
+        return context->text_ptr >= context->text_end || state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]);
+    case SRE_OP_END_OF_STRING:
+        return context->text_ptr >= context->text_end;
+    case SRE_OP_END_OF_STRING_LN:
+        return context->text_ptr >= context->text_end || context->text_ptr == context->final_linebreak;
+    case SRE_OP_LITERAL:
+        return context->text_ptr[0] == (SRE_CHAR)tail[1];
+    case SRE_OP_LITERAL_IGNORE:
+        return same_char_ignore(state, context->text_ptr[0], tail[1]);
+    case SRE_OP_LITERAL_IGNORE_REV:
+        return same_char_ignore(state, context->text_ptr[-1], tail[1]);
+    case SRE_OP_LITERAL_REV:
+        return context->text_ptr[-1] == (SRE_CHAR)tail[1];
+    case SRE_OP_LITERAL_STRING:
+        return context->text_ptr[0] == (SRE_CHAR)tail[2];
+    case SRE_OP_LITERAL_STRING_IGNORE:
+        return same_char_ignore(state, context->text_ptr[0], tail[2]);
+    case SRE_OP_LITERAL_STRING_IGNORE_REV:
+        return same_char_ignore(state, context->text_ptr[-(int)tail[1]], tail[2]);
+    case SRE_OP_LITERAL_STRING_REV:
+        return context->text_ptr[-(int)tail[1]] == (SRE_CHAR)tail[2];
+    case SRE_OP_NOT_BOUNDARY:
+        return !SRE_AT_BOUNDARY(context, state);
+    case SRE_OP_NOT_CATEGORY:
+        return !state->encoding->in_category(tail[1], context->text_ptr[0]);
+    case SRE_OP_NOT_CATEGORY_REV:
+        return !state->encoding->in_category(tail[1], context->text_ptr[-1]);
+    case SRE_OP_NOT_CHARSET:
+        return !in_charset(tail + 2, context->text_ptr[0]);
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        return !in_charset_ignore(state, tail + 2, context->text_ptr[0]);
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        return !in_charset_ignore(state, tail + 2, context->text_ptr[-1]);
+    case SRE_OP_NOT_CHARSET_REV:
+        return !in_charset(tail + 2, context->text_ptr[-1]);
+    case SRE_OP_NOT_LITERAL:
+        return context->text_ptr[0] != (SRE_CHAR)tail[1];
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        return !same_char_ignore(state, context->text_ptr[0], tail[1]);
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        return !same_char_ignore(state, context->text_ptr[-1], tail[1]);
+    case SRE_OP_NOT_LITERAL_REV:
+        return context->text_ptr[-1] != (SRE_CHAR)tail[1];
+    case SRE_OP_NOT_SET:
+        return !in_set(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_NOT_SET_IGNORE:
+        return !in_set_ignore(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        return !in_set_ignore(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_NOT_SET_REV:
+        return !in_set(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_SET:
+        return in_set(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_SET_IGNORE:
+        return in_set_ignore(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_SET_IGNORE_REV:
+        return in_set_ignore(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_SET_REV:
+        return in_set(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_START_OF_LINE:
+        return context->text_ptr == context->text_beginning || state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]);
+    case SRE_OP_START_OF_STRING:
+        return context->text_ptr == context->text_beginning;
+    default:
+        return TRUE;
     }
 }
 
-LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
-
-LOCAL(Py_ssize_t)
-SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
-{
-    SRE_CODE chr;
-    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t i;
-
-    /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
-        end = ptr + maxcount;
-
-    switch (pattern[0]) {
-
-    case SRE_OP_IN:
-        /* repeated set */
-        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
-        while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
-            ptr++;
+// Match up to the maximum.
+LOCAL(void) SRE_MATCH_MANY(SRE_CONTEXT* context, SRE_STATE* state, SRE_CHAR* max_ptr, SRE_CODE* body) {
+    switch (body[0]) {
+    case SRE_OP_ANY:
+        while (context->text_ptr < max_ptr && !state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_ANY_ALL:
+        while (context->text_ptr < max_ptr)
+            context->text_ptr++;
+        break;
+    case SRE_OP_ANY_ALL_REV:
+        while (context->text_ptr > max_ptr)
+            context->text_ptr--;
+        break;
+    case SRE_OP_ANY_REV:
+        while (context->text_ptr > max_ptr && !state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CATEGORY:
+        while (context->text_ptr < max_ptr && state->encoding->in_category(body[1], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CATEGORY_REV:
+        while (context->text_ptr > max_ptr && state->encoding->in_category(body[1], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET:
+        while (context->text_ptr < max_ptr && in_charset(body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        while (context->text_ptr < max_ptr && in_charset_ignore(state, body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        while (context->text_ptr > max_ptr && in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET_REV:
+        while (context->text_ptr > max_ptr && in_charset(body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL:
+        while (context->text_ptr < max_ptr && context->text_ptr[0] == (SRE_CHAR)body[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_IGNORE:
+        while (context->text_ptr < max_ptr && same_char_ignore(state, context->text_ptr[0], body[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        while (context->text_ptr > max_ptr && !same_char_ignore(state, context->text_ptr[-1], body[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_REV:
+        while (context->text_ptr > max_ptr && context->text_ptr[-1] == (SRE_CHAR)body[1])
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        while (context->text_ptr < max_ptr && !state->encoding->in_category(body[1], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CATEGORY_REV:
+        while (context->text_ptr > max_ptr && !state->encoding->in_category(body[1], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET:
+        while (context->text_ptr < max_ptr && !in_charset(body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        while (context->text_ptr < max_ptr && !in_charset_ignore(state, body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        while (context->text_ptr > max_ptr && !in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        while (context->text_ptr > max_ptr && !in_charset(body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_LITERAL:
+        while (context->text_ptr < max_ptr && context->text_ptr[0] != (SRE_CHAR)body[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        while (context->text_ptr < max_ptr && !same_char_ignore(state, context->text_ptr[0], body[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        while (context->text_ptr > max_ptr && !same_char_ignore(state, context->text_ptr[-1], body[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        while (context->text_ptr > max_ptr && context->text_ptr[-1] != (SRE_CHAR)body[1])
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE:
+        while (context->text_ptr < max_ptr && !in_range(context->text_ptr[0], body[1], body[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        while (context->text_ptr < max_ptr && !in_range_ignore(state, context->text_ptr[0], body[1], body[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        while (context->text_ptr > max_ptr && !in_range_ignore(state, context->text_ptr[-1], body[1], body[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        while (context->text_ptr > max_ptr && !in_range(context->text_ptr[-1], body[1], body[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET:
+        while (context->text_ptr < max_ptr && !in_set(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        while (context->text_ptr < max_ptr && !in_set_ignore(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        while (context->text_ptr > max_ptr && !in_set_ignore(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET_REV:
+        while (context->text_ptr > max_ptr && !in_set(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE:
+        while (context->text_ptr < max_ptr && in_range(context->text_ptr[0], body[1], body[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        while (context->text_ptr < max_ptr && in_range_ignore(state, context->text_ptr[0], body[1], body[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        while (context->text_ptr > max_ptr && in_range_ignore(state, context->text_ptr[-1], body[1], body[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE_REV:
+        while (context->text_ptr > max_ptr && in_range(context->text_ptr[-1], body[1], body[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET:
+        while (context->text_ptr < max_ptr && in_set(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
         break;
+    case SRE_OP_SET_IGNORE:
+        while (context->text_ptr < max_ptr && in_set_ignore(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        while (context->text_ptr > max_ptr && in_set_ignore(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET_REV:
+        while (context->text_ptr > max_ptr && in_set(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    }
+}
+
+// Unmatch down to the minimum until the tail could match. Returns 0 if min_ptr is reached but still no match.
+LOCAL(int) SRE_UNMATCH_UNTIL_TAIL(SRE_CONTEXT* context, SRE_STATE* state, SRE_CHAR* min_ptr, SRE_CODE* tail) {
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
 
+    switch (tail[0]) {
     case SRE_OP_ANY:
-        /* repeated dot wildcard. */
-        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
-        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
-            ptr++;
+        while (context->text_ptr >= min_ptr && state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+            context->text_ptr--;
         break;
-
     case SRE_OP_ANY_ALL:
-        /* repeated dot wildcard.  skip to the end of the target
-           string, and backtrack from there */
-        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
-        ptr = end;
         break;
-
+    case SRE_OP_BOUNDARY:
+        while (context->text_ptr >= min_ptr && !SRE_AT_BOUNDARY(context, state))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CATEGORY:
+        while (context->text_ptr >= min_ptr && !state->encoding->in_category(tail[1], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET:
+        while (context->text_ptr >= min_ptr && !in_charset(tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        while (context->text_ptr >= min_ptr && !in_charset_ignore(state, tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_END_OF_LINE:
+        while (context->text_ptr >= min_ptr && context->text_ptr < context->text_end &&
+            !state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_END_OF_STRING:
+        while (context->text_ptr >= min_ptr && context->text_ptr < context->text_end)
+            context->text_ptr--;
+        break;
+    case SRE_OP_END_OF_STRING_LN:
+        while (context->text_ptr >= min_ptr && context->text_ptr < context->text_end && context->text_ptr != context->final_linebreak)
+            context->text_ptr--;
+        break;
     case SRE_OP_LITERAL:
-        /* repeated literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) *ptr == chr)
-            ptr++;
+        while (context->text_ptr >= min_ptr && context->text_ptr[0] != (SRE_CHAR)tail[1])
+            context->text_ptr--;
         break;
-
     case SRE_OP_LITERAL_IGNORE:
-        /* repeated literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
-            ptr++;
+        while (context->text_ptr >= min_ptr && !same_char_ignore(state, context->text_ptr[0], tail[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_STRING:
+        while (context->text_ptr >= min_ptr && context->text_ptr[0] != (SRE_CHAR)tail[2])
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_STRING_IGNORE:
+        while (context->text_ptr >= min_ptr && !same_char_ignore(state, context->text_ptr[0], tail[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_BOUNDARY:
+        while (context->text_ptr >= min_ptr && SRE_AT_BOUNDARY(context, state))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        while (context->text_ptr >= min_ptr && state->encoding->in_category(tail[1], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET:
+        while (context->text_ptr >= min_ptr && in_charset(tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        while (context->text_ptr >= min_ptr && in_charset_ignore(state, tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
         break;
-
     case SRE_OP_NOT_LITERAL:
-        /* repeated non-literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) *ptr != chr)
-            ptr++;
+        while (context->text_ptr >= min_ptr && context->text_ptr[0] == (SRE_CHAR)tail[1])
+            context->text_ptr--;
         break;
-
     case SRE_OP_NOT_LITERAL_IGNORE:
-        /* repeated non-literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
-            ptr++;
+        while (context->text_ptr >= min_ptr && same_char_ignore(state, context->text_ptr[0], tail[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE:
+        while (context->text_ptr >= min_ptr && in_range(context->text_ptr[0], tail[1], tail[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        while (context->text_ptr >= min_ptr && in_range_ignore(state, context->text_ptr[0], tail[1], tail[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET:
+        while (context->text_ptr >= min_ptr && in_set(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        while (context->text_ptr >= min_ptr && in_set_ignore(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE:
+        while (context->text_ptr >= min_ptr && !in_range(context->text_ptr[0], tail[1], tail[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        while (context->text_ptr >= min_ptr && !in_range_ignore(state, context->text_ptr[0], tail[1], tail[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET:
+        while (context->text_ptr >= min_ptr && !in_set(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET_IGNORE:
+        while (context->text_ptr >= min_ptr && !in_set_ignore(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_START_OF_LINE:
+        while (context->text_ptr >= min_ptr && context->text_ptr != context->text_beginning &&
+            state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_START_OF_STRING:
+        while (context->text_ptr >= min_ptr && context->text_ptr != context->text_beginning)
+            context->text_ptr--;
         break;
-
-    default:
-        /* repeated single character pattern */
-        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
-        while ((SRE_CHAR*) state->ptr < end) {
-            i = SRE_MATCH(state, pattern);
-            if (i < 0)
-                return i;
-            if (!i)
-                break;
-        }
-        TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
-               (SRE_CHAR*) state->ptr - ptr));
-        return (SRE_CHAR*) state->ptr - ptr;
     }
 
-    TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
-    return ptr - (SRE_CHAR*) state->ptr;
+    return context->text_ptr >= min_ptr;
 }
 
-#if 0 /* not used in this release */
-LOCAL(int)
-SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
-{
-    /* check if an SRE_OP_INFO block matches at the current position.
-       returns the number of SRE_CODE objects to skip if successful, 0
-       if no match */
-
-    SRE_CHAR* end = state->end;
-    SRE_CHAR* ptr = state->ptr;
-    Py_ssize_t i;
-
-    /* check minimal length */
-    if (pattern[3] && (end - ptr) < pattern[3])
-        return 0;
+// Unmatch down to the minimum until the tail could match. Returns 0 if min_ptr is reached but still no match.
+LOCAL(int) SRE_UNMATCH_UNTIL_TAIL_REV(SRE_CONTEXT* context, SRE_STATE* state, SRE_CHAR* min_ptr, SRE_CODE* tail) {
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
 
-    /* check known prefix */
-    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
-        /* <length> <skip> <prefix data> <overlap data> */
-        for (i = 0; i < pattern[5]; i++)
-            if ((SRE_CODE) ptr[i] != pattern[7 + i])
-                return 0;
-        return pattern[0] + 2 * pattern[6];
+    switch (tail[0]) {
+    case SRE_OP_ANY_ALL_REV:
+        break;
+    case SRE_OP_ANY_REV:
+        while (context->text_ptr <= min_ptr && state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_BOUNDARY:
+        while (context->text_ptr <= min_ptr && !SRE_AT_BOUNDARY(context, state))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CATEGORY_REV:
+        while (context->text_ptr <= min_ptr && !state->encoding->in_category(tail[1], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && !in_charset_ignore(state, tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_REV:
+        while (context->text_ptr <= min_ptr && !in_charset(tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_END_OF_LINE:
+        while (context->text_ptr <= min_ptr && context->text_ptr < context->text_end &&
+            !state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_END_OF_STRING:
+        while (context->text_ptr <= min_ptr && context->text_ptr < context->text_end)
+            context->text_ptr++;
+        break;
+    case SRE_OP_END_OF_STRING_LN:
+        while (context->text_ptr <= min_ptr && context->text_ptr < context->text_end && context->text_ptr != context->final_linebreak)
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && !same_char_ignore(state, context->text_ptr[-1], tail[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_REV:
+        while (context->text_ptr <= min_ptr && context->text_ptr[-1] != (SRE_CHAR)tail[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_STRING_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && state->encoding->lower(context->text_ptr[-(int)tail[1]]) != (SRE_CHAR)tail[2])
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_STRING_REV:
+        while (context->text_ptr <= min_ptr && context->text_ptr[-(int)tail[1]] != (SRE_CHAR)tail[2])
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_BOUNDARY:
+        while (context->text_ptr <= min_ptr && SRE_AT_BOUNDARY(context, state))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CATEGORY_REV:
+        while (context->text_ptr <= min_ptr && state->encoding->in_category(tail[1], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && in_charset_ignore(state, tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        while (context->text_ptr <= min_ptr && in_charset(tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && same_char_ignore(state, context->text_ptr[-1], tail[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        while (context->text_ptr <= min_ptr && context->text_ptr[-1] == (SRE_CHAR)tail[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && in_range_ignore(state, context->text_ptr[-1], tail[1], tail[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        while (context->text_ptr <= min_ptr && in_range(context->text_ptr[-1], tail[1], tail[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && in_set_ignore(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_REV:
+        while (context->text_ptr <= min_ptr && in_set(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && !in_range_ignore(state, context->text_ptr[-1], tail[1], tail[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_REV:
+        while (context->text_ptr <= min_ptr && !in_range(context->text_ptr[-1], tail[1], tail[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        while (context->text_ptr <= min_ptr && !in_set_ignore(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_REV:
+        while (context->text_ptr <= min_ptr && !in_set(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_START_OF_LINE:
+        while (context->text_ptr <= min_ptr && context->text_ptr != context->text_beginning &&
+            state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_START_OF_STRING:
+        while (context->text_ptr <= min_ptr && context->text_ptr != context->text_beginning)
+            context->text_ptr++;
+        break;
     }
-    return pattern[0];
+
+    return context->text_ptr <= min_ptr;
 }
-#endif
 
-/* The macros below should be used to protect recursive SRE_MATCH()
- * calls that *failed* and do *not* return immediately (IOW, those
- * that will backtrack). Explaining:
- *
- * - Recursive SRE_MATCH() returned true: that's usually a success
- *   (besides atypical cases like ASSERT_NOT), therefore there's no
- *   reason to restore lastmark;
- *
- * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
- *   is returning to the caller: If the current SRE_MATCH() is the
- *   top function of the recursion, returning false will be a matching
- *   failure, and it doesn't matter where lastmark is pointing to.
- *   If it's *not* the top function, it will be a recursive SRE_MATCH()
- *   failure by itself, and the calling SRE_MATCH() will have to deal
- *   with the failure by the same rules explained here (it will restore
- *   lastmark by itself if necessary);
- *
- * - Recursive SRE_MATCH() returned false, and will continue the
- *   outside 'for' loop: must be protected when breaking, since the next
- *   OP could potentially depend on lastmark;
- *
- * - Recursive SRE_MATCH() returned false, and will be called again
- *   inside a local for/while loop: must be protected between each
- *   loop iteration, since the recursive SRE_MATCH() could do anything,
- *   and could potentially depend on lastmark.
- *
- * For more information, check the discussion at SF patch #712900.
- */
-#define LASTMARK_SAVE()     \
-    do { \
-        ctx->lastmark = state->lastmark; \
-        ctx->lastindex = state->lastindex; \
-    } while (0)
-#define LASTMARK_RESTORE()  \
-    do { \
-        state->lastmark = ctx->lastmark; \
-        state->lastindex = ctx->lastindex; \
-    } while (0)
-
-#define RETURN_ERROR(i) do { return i; } while(0)
-#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
-#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
-
-#define RETURN_ON_ERROR(i) \
-    do { if (i < 0) RETURN_ERROR(i); } while (0)
-#define RETURN_ON_SUCCESS(i) \
-    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
-#define RETURN_ON_FAILURE(i) \
-    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
-
-#define SFY(x) #x
-
-#define DATA_STACK_ALLOC(state, type, ptr) \
-do { \
-    alloc_pos = state->data_stack_base; \
-    TRACE(("allocating %s in %d (%d)\n", \
-           SFY(type), alloc_pos, sizeof(type))); \
-    if (state->data_stack_size < alloc_pos+sizeof(type)) { \
-        int j = data_stack_grow(state, sizeof(type)); \
-        if (j < 0) return j; \
-        if (ctx_pos != -1) \
-            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
-    } \
-    ptr = (type*)(state->data_stack+alloc_pos); \
-    state->data_stack_base += sizeof(type); \
-} while (0)
-
-#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
-do { \
-    TRACE(("looking up %s at %d\n", SFY(type), pos)); \
-    ptr = (type*)(state->data_stack+pos); \
-} while (0)
-
-#define DATA_STACK_PUSH(state, data, size) \
-do { \
-    TRACE(("copy data in %p to %d (%d)\n", \
-           data, state->data_stack_base, size)); \
-    if (state->data_stack_size < state->data_stack_base+size) { \
-        int j = data_stack_grow(state, size); \
-        if (j < 0) return j; \
-        if (ctx_pos != -1) \
-            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
-    } \
-    memcpy(state->data_stack+state->data_stack_base, data, size); \
-    state->data_stack_base += size; \
-} while (0)
-
-#define DATA_STACK_POP(state, data, size, discard) \
-do { \
-    TRACE(("copy data to %p from %d (%d)\n", \
-           data, state->data_stack_base-size, size)); \
-    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
-    if (discard) \
-        state->data_stack_base -= size; \
-} while (0)
-
-#define DATA_STACK_POP_DISCARD(state, size) \
-do { \
-    TRACE(("discard data from %d (%d)\n", \
-           state->data_stack_base-size, size)); \
-    state->data_stack_base -= size; \
-} while(0)
-
-#define DATA_PUSH(x) \
-    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
-#define DATA_POP(x) \
-    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
-#define DATA_POP_DISCARD(x) \
-    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
-#define DATA_ALLOC(t,p) \
-    DATA_STACK_ALLOC(state, t, p)
-#define DATA_LOOKUP_AT(t,p,pos) \
-    DATA_STACK_LOOKUP_AT(state,t,p,pos)
-
-#define MARK_PUSH(lastmark) \
-    do if (lastmark > 0) { \
-        i = lastmark; /* ctx->lastmark may change if reallocated */ \
-        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
-    } while (0)
-#define MARK_POP(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
-    } while (0)
-#define MARK_POP_KEEP(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
-    } while (0)
-#define MARK_POP_DISCARD(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
-    } while (0)
-
-#define JUMP_NONE            0
-#define JUMP_MAX_UNTIL_1     1
-#define JUMP_MAX_UNTIL_2     2
-#define JUMP_MAX_UNTIL_3     3
-#define JUMP_MIN_UNTIL_1     4
-#define JUMP_MIN_UNTIL_2     5
-#define JUMP_MIN_UNTIL_3     6
-#define JUMP_REPEAT          7
-#define JUMP_REPEAT_ONE_1    8
-#define JUMP_REPEAT_ONE_2    9
-#define JUMP_MIN_REPEAT_ONE  10
-#define JUMP_BRANCH          11
-#define JUMP_ASSERT          12
-#define JUMP_ASSERT_NOT      13
-
-#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
-    DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
-    nextctx->last_ctx_pos = ctx_pos; \
-    nextctx->jump = jumpvalue; \
-    nextctx->pattern = nextpattern; \
-    ctx_pos = alloc_pos; \
-    ctx = nextctx; \
-    goto entrance; \
-    jumplabel: \
-    while (0) /* gcc doesn't like labels at end of scopes */ \
-
-typedef struct {
-    Py_ssize_t last_ctx_pos;
-    Py_ssize_t jump;
-    SRE_CHAR* ptr;
-    SRE_CODE* pattern;
-    Py_ssize_t count;
-    Py_ssize_t lastmark;
-    Py_ssize_t lastindex;
-    union {
-        SRE_CODE chr;
-        SRE_REPEAT* rep;
-    } u;
-} SRE_MATCH_CONTEXT;
+// Match up to the maximum until the tail could match. Returns 0 if max_ptr is reached but still no match.
+LOCAL(int) SRE_MATCH_UNTIL_TAIL(SRE_CONTEXT* context, SRE_STATE* state, SRE_CHAR* max_ptr, SRE_CODE* body, SRE_CODE* tail) {
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
 
-/* check if string matches the given pattern.  returns <0 for
-   error, 0 for failure, and 1 for success */
-LOCAL(Py_ssize_t)
-SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
-{
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t alloc_pos, ctx_pos = -1;
-    Py_ssize_t i, ret = 0;
-    Py_ssize_t jump;
-    unsigned int sigcount=0;
-
-    SRE_MATCH_CONTEXT* ctx;
-    SRE_MATCH_CONTEXT* nextctx;
-
-    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
-
-    DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
-    ctx->last_ctx_pos = -1;
-    ctx->jump = JUMP_NONE;
-    ctx->pattern = pattern;
-    ctx_pos = alloc_pos;
-
-entrance:
-
-    ctx->ptr = (SRE_CHAR *)state->ptr;
-
-    if (ctx->pattern[0] == SRE_OP_INFO) {
-        /* optimization info block */
-        /* <INFO> <1=skip> <2=flags> <3=min> ... */
-        if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
-            TRACE(("reject (got %d chars, need %d)\n",
-                   (end - ctx->ptr), ctx->pattern[3]));
-            RETURN_FAILURE;
+    switch (body[0]) {
+    case SRE_OP_ANY:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_ANY_ALL:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr)
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_ANY_ALL_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr)
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_ANY_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || state->encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
         }
-        ctx->pattern += ctx->pattern[1] + 1;
+        break;
+    case SRE_OP_CATEGORY:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !state->encoding->in_category(body[1], context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_CHARSET:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !in_charset(body + 2, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !in_charset_ignore(state, body + 2, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_CHARSET_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !in_charset(body + 2, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_LITERAL:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || context->text_ptr[0] != (SRE_CHAR)body[1])
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_LITERAL_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !same_char_ignore(state, context->text_ptr[0], body[1]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !same_char_ignore(state, context->text_ptr[-1], body[1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_LITERAL_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || context->text_ptr[-1] != (SRE_CHAR)body[1])
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || state->encoding->in_category(body[1], context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || in_charset(body + 2, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || in_charset_ignore(state, body + 2, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || in_charset(body + 2, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || context->text_ptr[0] == (SRE_CHAR)body[1])
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || same_char_ignore(state, context->text_ptr[0], body[1]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || same_char_ignore(state, context->text_ptr[-1], body[1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || context->text_ptr[-1] == (SRE_CHAR)body[1])
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_RANGE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || in_range(context->text_ptr[0], body[1], body[2]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || in_range_ignore(state, context->text_ptr[0], body[1], body[2]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || in_range_ignore(state, context->text_ptr[-1], body[1], body[2]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || in_range(context->text_ptr[-1], body[1], body[2]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_SET:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || in_set(state, body + 1, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || in_set_ignore(state, body + 1, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || in_set_ignore(state, body + 1, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_SET_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || in_set(state, body + 1, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_RANGE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !in_range(context->text_ptr[0], body[1], body[2]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !in_range_ignore(state, context->text_ptr[0], body[1], body[2]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !in_range_ignore(state, context->text_ptr[-1], body[1], body[2]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_RANGE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !in_range(context->text_ptr[-1], body[1], body[2]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_SET:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !in_set(state, body + 1, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_SET_IGNORE:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr >= max_ptr || !in_set_ignore(state, body + 1, context->text_ptr[0]))
+                return 0;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !in_set_ignore(state, body + 1, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_SET_REV:
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, state, tail)) {
+            if (context->text_ptr <= max_ptr || !in_set(state, body + 1, context->text_ptr[-1]))
+                return 0;
+            context->text_ptr--;
+        }
+        break;
     }
 
-    for (;;) {
-        ++sigcount;
-        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
-            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
-
-        switch (*ctx->pattern++) {
-
-        case SRE_OP_MARK:
-            /* set mark */
-            /* <MARK> <gid> */
-            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            if (i & 1)
-                state->lastindex = i/2 + 1;
-            if (i > state->lastmark) {
-                /* state->lastmark is the highest valid index in the
-                   state->mark array.  If it is increased by more than 1,
-                   the intervening marks must be set to NULL to signal
-                   that these marks have not been encountered. */
-                Py_ssize_t j = state->lastmark + 1;
-                while (j < i)
-                    state->mark[j++] = NULL;
-                state->lastmark = i;
-            }
-            state->mark[i] = ctx->ptr;
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_LITERAL:
-            /* match literal string */
-            /* <LITERAL> <code> */
-            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
+    return TRUE;
+}
 
-        case SRE_OP_NOT_LITERAL:
-            /* match anything that is not literal character */
-            /* <NOT_LITERAL> <code> */
-            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
+/* check if string matches the given pattern.  returns <0 for
+   error, 0 for failure, and 1 for success */
+LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern_ptr) {
+    SRE_CONTEXT context;
+    unsigned int repeat_min;
+    unsigned int repeat_max;
+    unsigned int repeat_counter;
+    SRE_CHAR* repeat_start;
+    SRE_BACKTRACK_ITEM* top_nested;
+    unsigned int sigcount = 0;
+    int result;
+
+    context.text_beginning = (SRE_CHAR *)state->beginning;
+    context.text_ptr = state->ptr;
+    context.text_start = (SRE_CHAR *)state->start;
+    context.text_end = (SRE_CHAR *)state->end;
+    context.pattern_ptr = pattern_ptr;
+    context.marks = (SRE_CHAR**)state->mark;
+    context.marks_size = (state->numbered_mark_count + state->named_mark_count) * sizeof(context.marks[0]);
+    context.backtrack_chunk = state->backtrack_chunk;
+
+    // Point to the final newline if it's the final character.
+    context.final_linebreak = context.text_beginning < context.text_end &&
+        state->encoding->in_category(SRE_CAT_LineBreak, context.text_end[-1]) ? context.text_end - 1 : NULL;
+
+    TRACE(("|%p|%p|ENTER\n", context.pattern_ptr, context.text_ptr));
+    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_FAILURE, 0);
+    if (result != 0)
+        return SRE_CLEANUP(&context, state, result);
+    top_nested = context.backtrack_item;
 
-        case SRE_OP_SUCCESS:
-            /* end of pattern */
-            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
-            state->ptr = ctx->ptr;
-            RETURN_SUCCESS;
-
-        case SRE_OP_AT:
-            /* match at given position */
-            /* <AT> <code> */
-            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
-            if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            break;
+    memset(context.marks, 0, context.marks_size);
 
-        case SRE_OP_CATEGORY:
-            /* match at given category */
-            /* <CATEGORY> <code> */
-            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
+advance:
+    for (;;) {
+        ++sigcount;
+        if ((0 == (sigcount & 0xFFF)) && PyErr_CheckSignals())
+            return SRE_CLEANUP(&context, state, SRE_ERROR_INTERRUPTED);
 
+        switch (context.pattern_ptr[0]) {
         case SRE_OP_ANY:
-            /* match anything (except a newline) */
-            /* <ANY> */
-            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
-                RETURN_FAILURE;
-            ctx->ptr++;
+            // Any character except a newline.
+            // <ANY>
+            TRACE(("|%p|%p|ANY\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || state->encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr++;
             break;
-
         case SRE_OP_ANY_ALL:
-            /* match anything */
-            /* <ANY_ALL> */
-            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end)
-                RETURN_FAILURE;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_IN:
-            /* match set member (or non_member) */
-            /* <IN> <skip> <set> */
-            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
-                RETURN_FAILURE;
-            ctx->pattern += ctx->pattern[0];
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_LITERAL_IGNORE:
-            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
-                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
-            if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
+            // Any character.
+            // <ANY_ALL>
+            TRACE(("|%p|%p|ANY_ALL\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end)
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_ANY_ALL_REV:
+            // Any character.
+            // <ANY_ALL_REV>
+            TRACE(("|%p|%p|ANY_ALL_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start)
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_ANY_REV:
+            // Any character except a newline.
+            // <ANY_REV>
+            TRACE(("|%p|%p|ANY_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || state->encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr++;
             break;
-
-        case SRE_OP_NOT_LITERAL_IGNORE:
-            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
-                   ctx->pattern, ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_IN_IGNORE:
-            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end
-                || !SRE_CHARSET(ctx->pattern+1,
-                                (SRE_CODE)state->lower(*ctx->ptr)))
-                RETURN_FAILURE;
-            ctx->pattern += ctx->pattern[0];
-            ctx->ptr++;
+        case SRE_OP_ASSERT:
+            // Assert subpattern.
+            // <ASSERT> <skip to end> ... <END_ASSERT>
+            TRACE(("|%p|%p|ASSERT\n", context.pattern_ptr, context.text_ptr));
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ASSERT, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            // If the subpattern succeeds then we'll discard the enclosed backtrack info,
+            // including any marks, so we need to save the marks here.
+            memmove(context.backtrack_item->marks, context.marks, context.marks_size);
+            context.backtrack_item->assert.text_start = context.text_start;
+            context.backtrack_item->assert.text_ptr = context.text_ptr;
+            // The assert can look at the text before the start position, if any.
+            context.text_start = state->beginning;
+            context.pattern_ptr += 2;
             break;
-
-        case SRE_OP_JUMP:
-        case SRE_OP_INFO:
-            /* jump forward */
-            /* <JUMP> <offset> */
-            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            ctx->pattern += ctx->pattern[0];
+        case SRE_OP_ASSERT_NOT:
+            // Assert not subpattern.
+            // <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT>
+            TRACE(("|%p|%p|ASSERT_NOT\n", context.pattern_ptr, context.text_ptr));
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ASSERT_NOT, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            // If the subpattern succeeds then we'll discard the enclosed backtrack info,
+            // including any marks, so we need to save the marks here.
+            memmove(context.backtrack_item->marks, context.marks, context.marks_size);
+            context.backtrack_item->assert.text_start = context.text_start;
+            context.backtrack_item->assert.text_ptr = context.text_ptr;
+            context.backtrack_item->assert.pattern_ptr = context.pattern_ptr;
+            // The assert can look at the text before the start position, if any.
+            context.text_start = state->beginning;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_ATOMIC:
+            // Atomic subpattern.
+            // <ATOMIC> ... <END_ATOMIC>
+            TRACE(("|%p|%p|ATOMIC\n", context.pattern_ptr, context.text_ptr));
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ATOMIC, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            // If the subpattern succeeds then we'll discard the enclosed backtrack info,
+            // including any marks, so we need to save the marks here.
+            memmove(context.backtrack_item->marks, context.marks, context.marks_size);
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_BOUNDARY:
+            // Boundary between word and non-word.
+            // <BOUNDARY>
+            TRACE(("|%p|%p|BOUNDARY\n", context.pattern_ptr, context.text_ptr));
+            if (!SRE_AT_BOUNDARY(&context, state))
+                goto backtrack;
+            context.pattern_ptr++;
             break;
-
         case SRE_OP_BRANCH:
-            /* alternation */
-            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
-            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
-            LASTMARK_SAVE();
-            ctx->u.rep = state->repeat;
-            if (ctx->u.rep)
-                MARK_PUSH(ctx->lastmark);
-            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
-                if (ctx->pattern[1] == SRE_OP_LITERAL &&
-                    (ctx->ptr >= end ||
-                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
-                    continue;
-                if (ctx->pattern[1] == SRE_OP_IN &&
-                    (ctx->ptr >= end ||
-                     !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
-                    continue;
-                state->ptr = ctx->ptr;
-                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
-                if (ret) {
-                    if (ctx->u.rep)
-                        MARK_POP_DISCARD(ctx->lastmark);
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                if (ctx->u.rep)
-                    MARK_POP_KEEP(ctx->lastmark);
-                LASTMARK_RESTORE();
-            }
-            if (ctx->u.rep)
-                MARK_POP_DISCARD(ctx->lastmark);
-            RETURN_FAILURE;
-
-        case SRE_OP_REPEAT_ONE:
-            /* match repeated sequence (maximizing regexp) */
-
-            /* this operator only works if the repeated item is
-               exactly one character wide, and we're not already
-               collecting backtracking points.  for other cases,
-               use the MAX_REPEAT operator */
-
-            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
-
-            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            if (ctx->ptr + ctx->pattern[1] > end)
-                RETURN_FAILURE; /* cannot match */
-
-            state->ptr = ctx->ptr;
-
-            ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
-            RETURN_ON_ERROR(ret);
-            DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-            ctx->count = ret;
-            ctx->ptr += ctx->count;
-
-            /* when we arrive here, count contains the number of
-               matches, and ctx->ptr points to the tail of the target
-               string.  check if the rest of the pattern matches,
-               and backtrack if not. */
-
-            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
-                RETURN_FAILURE;
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
-                /* tail is empty.  we're finished */
-                state->ptr = ctx->ptr;
-                RETURN_SUCCESS;
-            }
-
-            LASTMARK_SAVE();
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
-                /* tail starts with a literal. skip positions where
-                   the rest of the pattern cannot possibly match */
-                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
-                for (;;) {
-                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
-                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
-                        ctx->ptr--;
-                        ctx->count--;
-                    }
-                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
-                        break;
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-
-                    LASTMARK_RESTORE();
-
-                    ctx->ptr--;
-                    ctx->count--;
+        {
+            // Alternation.
+            // <BRANCH> <skip to next> ... <JUMP> <skip to end> <skip to next> ... <JUMP> <skip to end> 0
+            SRE_CODE* skip_ptr = context.pattern_ptr + 1;
+            TRACE(("|%p|%p|BRANCH\n", context.pattern_ptr, context.text_ptr));
+            // Look ahead in the branch to avoid unnecessary backtracking.
+            while (! SRE_POSSIBLE_MATCH_AHEAD(&context, state, skip_ptr + 1)) {
+                skip_ptr += skip_ptr[0];
+                // Is there another branch?
+                if (skip_ptr[0] == 0)
+                    goto backtrack;
+            }
+            // Try this branch.
+            context.pattern_ptr = skip_ptr + 1;
+            // Save the next branch, if present.
+            skip_ptr += skip_ptr[0];
+            if (skip_ptr[0] != 0) {
+                result = SRE_SAVE_BACKTRACK(&context, SRE_OP_BRANCH, FALSE);
+                if (result != 0)
+                    return SRE_CLEANUP(&context, state, result);
+                context.backtrack_item->branch.text_ptr = context.text_ptr;
+                context.backtrack_item->branch.pattern_ptr = skip_ptr;
+            }
+            break;
+        }
+        case SRE_OP_CATEGORY:
+            // Character in category.
+            // <CATEGORY> <mask>
+            TRACE(("|%p|%p|CATEGORY 0x%X\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_end || !state->encoding->in_category(context.pattern_ptr[1], context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_CATEGORY_REV:
+            // Character in category.
+            // <CATEGORY_REV> <mask>
+            TRACE(("|%p|%p|CATEGORY_REV 0x%X\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr <= context.text_start || !state->encoding->in_category(context.pattern_ptr[1], context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_CHARSET:
+            // Character in set.
+            // <CHARSET> <skip> <set>
+            TRACE(("|%p|%p|CHARSET\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || !in_charset(context.pattern_ptr + 2, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_IGNORE:
+            // Character in set, ignoring case.
+            // <CHARSET_IGNORE> <skip> <set>
+            TRACE(("|%p|%p|CHARSET_IGNORE\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || !in_charset_ignore(state, context.pattern_ptr + 2, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_IGNORE_REV:
+            // Character in set, ignoring case.
+            // <CHARSET_IGNORE_REV> <skip> <set>
+            TRACE(("|%p|%p|CHARSET_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || !in_charset_ignore(state, context.pattern_ptr + 2, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_REV:
+            // Character in set.
+            // <CHARSET_REV> <skip> <set>
+            TRACE(("|%p|%p|CHARSET_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || !in_charset(context.pattern_ptr + 2, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_END_ASSERT:
+            // Assert subpattern.
+            // <ASSERT> <skip to end> ... <END_ASSERT>
+            TRACE(("|%p|%p|END_ASSERT\n", context.pattern_ptr, context.text_ptr));
+            // Discard all backtrack info in the assertion.
+            SRE_DISCARD_UNTIL(&context, SRE_OP_ASSERT);
+            // Restore the marks.
+            memmove(context.marks, context.backtrack_item->marks, context.marks_size);
+            context.text_start = context.backtrack_item->assert.text_start;
+            context.text_ptr = context.backtrack_item->assert.text_ptr;
+            SRE_DISCARD_BACKTRACK(&context);
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_ASSERT_NOT:
+            // Assert not subpattern.
+            // <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT>
+            TRACE(("|%p|%p|END_ASSERT_NOT\n", context.pattern_ptr, context.text_ptr));
+            // Discard all backtrack info in the assertion.
+            SRE_DISCARD_UNTIL(&context, SRE_OP_ASSERT_NOT);
+            // Restore the marks.
+            memmove(context.marks, context.backtrack_item->marks, context.marks_size);
+            context.text_start = context.backtrack_item->assert.text_start;
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        case SRE_OP_END_ATOMIC:
+            // Atomic subpattern.
+            // <ATOMIC> <skip to end> ... <END_ATOMIC>
+            TRACE(("|%p|%p|END_ATOMIC\n", context.pattern_ptr, context.text_ptr));
+            // Discard all backtrack info in the atomic group.
+            SRE_DISCARD_UNTIL(&context, SRE_OP_ATOMIC);
+            // Modify the backtrack info so that the marks will be restored if the tail
+            // of the pattern fails.
+            context.backtrack_item->op = SRE_OP_END_ATOMIC;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_OF_LINE:
+            // End of line.
+            // <END_OF_LINE>
+            TRACE(("|%p|%p|END_OF_LINE\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr < context.text_end && !state->encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[0]))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_OF_STRING:
+            // End of string.
+            // <END_OF_STRING>
+            TRACE(("|%p|%p|END_OF_STRING\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr < context.text_end)
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_OF_STRING_LN:
+            // End of string or final line.
+            // <END_OF_STRING_LN>
+            TRACE(("|%p|%p|END_OF_STRING_LN\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr < context.text_end && context.text_ptr != context.final_linebreak)
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_REPEAT_MAX:
+        {
+            // End of greedy repeat.
+            // <REPEAT_MAX> <skip to end> <min> <max> ... <END_REPEAT_MAX> <skip to start>
+            SRE_CODE* end_repeat_ptr = context.pattern_ptr;
+            SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_end - context.text_ptr;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|END_REPEAT_MAX\n", context.pattern_ptr, context.text_ptr));
+            // At this point the repeat info refers to the inner repeat.
+            ++repeat_counter;
+            repeat_max = sre_min(repeat_max, repeat_counter + available);
+            try_again = repeat_counter < repeat_max && context.text_ptr != repeat_start;
+            try_tail = repeat_counter >= repeat_min && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    // The backtrack info must refer to the outer repeat.
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
-
+                repeat_start = context.text_ptr;
+                context.pattern_ptr = body;
             } else {
-                /* general case */
-                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-                    ctx->ptr--;
-                    ctx->count--;
-                    LASTMARK_RESTORE();
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_END_REPEAT_MAX_REV:
+        {
+            // End of greedy repeat.
+            // <REPEAT_MAX_REV> <skip to end> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
+            SRE_CODE* end_repeat_ptr = context.pattern_ptr;
+            SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_ptr - context.text_start;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|END_REPEAT_MAX_REV\n", context.pattern_ptr, context.text_ptr));
+            // At this point the repeat info refers to the inner repeat.
+            ++repeat_counter;
+            repeat_max = sre_min(repeat_max, repeat_counter + available);
+            try_again = repeat_counter < repeat_max && context.text_ptr != repeat_start;
+            try_tail = repeat_counter >= repeat_min && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX_REV, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    // The backtrack info must refer to the outer repeat.
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
+                repeat_start = context.text_ptr;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
             }
-            RETURN_FAILURE;
-
-        case SRE_OP_MIN_REPEAT_ONE:
-            /* match repeated sequence (minimizing regexp) */
-
-            /* this operator only works if the repeated item is
-               exactly one character wide, and we're not already
-               collecting backtracking points.  for other cases,
-               use the MIN_REPEAT operator */
-
-            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
-
-            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            if (ctx->ptr + ctx->pattern[1] > end)
-                RETURN_FAILURE; /* cannot match */
-
-            state->ptr = ctx->ptr;
-
-            if (ctx->pattern[1] == 0)
-                ctx->count = 0;
-            else {
-                /* count using pattern min as the maximum */
-                ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
-                RETURN_ON_ERROR(ret);
-                DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-                if (ret < (Py_ssize_t) ctx->pattern[1])
-                    /* didn't match minimum number of times */
-                    RETURN_FAILURE;
-                /* advance past minimum matches of repeat */
-                ctx->count = ret;
-                ctx->ptr += ctx->count;
-            }
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
-                /* tail is empty.  we're finished */
-                state->ptr = ctx->ptr;
-                RETURN_SUCCESS;
-
+            break;
+        }
+        case SRE_OP_END_REPEAT_MIN:
+        {
+            // Lazy repeat.
+            // <REPEAT_MIN> <skip to end> <min> <max> ... <END_REPEAT_MIN> <skip to start>
+            SRE_CODE* end_repeat_ptr = context.pattern_ptr;
+            SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_end - context.text_ptr;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|END_REPEAT_MIN\n", context.pattern_ptr, context.text_ptr));
+            // At this point the repeat info refers to the inner repeat.
+            ++repeat_counter;
+            repeat_max = sre_min(repeat_max, repeat_counter + available);
+            try_again = repeat_counter < repeat_max && context.text_ptr != repeat_start;
+            try_tail = repeat_counter >= repeat_min && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_tail) {
+                if (try_again) {
+                    // Need to save the repeat info for the inner repeat in case the tail fails.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    // The backtrack info must refer to the outer repeat.
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.repeat_min = repeat_min;
+                    context.backtrack_item->repeat.repeat_max = repeat_max;
+                    context.backtrack_item->repeat.repeat_counter = repeat_counter;
+                    context.backtrack_item->repeat.repeat_start = repeat_start;
+                    context.backtrack_item->repeat.pattern_ptr = body;
+                }
+                // Restore the repeat info for the outer repeat.
+                repeat_min = top_nested->repeat.repeat_min;
+                repeat_max = top_nested->repeat.repeat_max;
+                repeat_counter = top_nested->repeat.repeat_counter;
+                repeat_start = top_nested->repeat.repeat_start;
+                top_nested = top_nested->repeat.top_nested;
+                context.pattern_ptr = tail;
             } else {
-                /* general case */
-                LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
-                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-                    state->ptr = ctx->ptr;
-                    ret = SRE_COUNT(state, ctx->pattern+3, 1);
-                    RETURN_ON_ERROR(ret);
-                    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-                    if (ret == 0)
-                        break;
-                    assert(ret == 1);
-                    ctx->ptr++;
-                    ctx->count++;
-                    LASTMARK_RESTORE();
+                if (try_again)
+                    context.pattern_ptr = body;
+                else
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_END_REPEAT_MIN_REV:
+        {
+            // Lazy repeat.
+            // <REPEAT_MIN_REV> <skip to end> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
+            SRE_CODE* end_repeat_ptr = context.pattern_ptr;
+            SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_ptr - context.text_start;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|END_REPEAT_MIN_REV\n", context.pattern_ptr, context.text_ptr));
+            // At this point the repeat info refers to the inner repeat.
+            ++repeat_counter;
+            repeat_max = sre_min(repeat_max, repeat_counter + available);
+            try_again = repeat_counter < repeat_max && context.text_ptr != repeat_start;
+            try_tail = repeat_counter >= repeat_min && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_tail) {
+                if (try_again) {
+                    // Need to save the repeat info for the inner repeat in case the tail fails.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN_REV, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    // The backtrack info must refer to the outer repeat.
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.repeat_min = repeat_min;
+                    context.backtrack_item->repeat.repeat_max = repeat_max;
+                    context.backtrack_item->repeat.repeat_counter = repeat_counter;
+                    context.backtrack_item->repeat.repeat_start = repeat_start;
+                    context.backtrack_item->repeat.pattern_ptr = body;
                 }
+                // Restore the repeat info for the outer repeat.
+                repeat_min = top_nested->repeat.repeat_min;
+                repeat_max = top_nested->repeat.repeat_max;
+                repeat_counter = top_nested->repeat.repeat_counter;
+                repeat_start = top_nested->repeat.repeat_start;
+                top_nested = top_nested->repeat.top_nested;
+                context.pattern_ptr = tail;
+            } else {
+                if (try_again)
+                    context.pattern_ptr = body;
+                else
+                    goto backtrack;
             }
-            RETURN_FAILURE;
-
-        case SRE_OP_REPEAT:
-            /* create repeat context.  all the hard work is done
-               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
-            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
-            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            /* install new repeat context */
-            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
-            if (!ctx->u.rep) {
-                PyErr_NoMemory();
-                RETURN_FAILURE;
-            }
-            ctx->u.rep->count = -1;
-            ctx->u.rep->pattern = ctx->pattern;
-            ctx->u.rep->prev = state->repeat;
-            ctx->u.rep->last_ptr = NULL;
-            state->repeat = ctx->u.rep;
-
-            state->ptr = ctx->ptr;
-            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
-            state->repeat = ctx->u.rep->prev;
-            PyObject_FREE(ctx->u.rep);
-
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
-            }
-            RETURN_FAILURE;
-
-        case SRE_OP_MAX_UNTIL:
-            /* maximizing repeat */
-            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
-
-            /* FIXME: we probably need to deal with zero-width
-               matches in here... */
-
-            ctx->u.rep = state->repeat;
-            if (!ctx->u.rep)
-                RETURN_ERROR(SRE_ERROR_STATE);
-
-            state->ptr = ctx->ptr;
-
-            ctx->count = ctx->u.rep->count+1;
-
-            TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
-                   ctx->ptr, ctx->count));
-
-            if (ctx->count < ctx->u.rep->pattern[1]) {
-                /* not enough matches */
-                ctx->u.rep->count = ctx->count;
-                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
-                        ctx->u.rep->pattern+3);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
+            break;
+        }
+        case SRE_OP_END_REPEAT_POSS:
+        {
+            // End of greedy repeat.
+            // <REPEAT_POSS> <skip to end> <min> <max> ... <END_REPEAT_POSS> <skip to start>
+            SRE_CODE* end_repeat_ptr = context.pattern_ptr;
+            SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_end - context.text_ptr;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|END_REPEAT_POSS\n", context.pattern_ptr, context.text_ptr));
+            // Discard all backtrack info in the body of the possessive repeat.
+            SRE_DISCARD_UNTIL(&context, SRE_OP_REPEAT_POSS);
+            // At this point the repeat info refers to the inner repeat.
+            ++repeat_counter;
+            repeat_max = sre_min(repeat_max, repeat_counter + available);
+            try_again = repeat_counter < repeat_max && context.text_ptr != repeat_start;
+            try_tail = repeat_counter >= repeat_min && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    // The backtrack info must refer to the outer repeat.
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-                RETURN_FAILURE;
-            }
-
-            if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
-                state->ptr != ctx->u.rep->last_ptr) {
-                /* we may have enough matches, but if we can
-                   match another item, do so */
-                ctx->u.rep->count = ctx->count;
-                LASTMARK_SAVE();
-                MARK_PUSH(ctx->lastmark);
-                /* zero-width match protection */
-                DATA_PUSH(&ctx->u.rep->last_ptr);
-                ctx->u.rep->last_ptr = state->ptr;
-                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
-                        ctx->u.rep->pattern+3);
-                DATA_POP(&ctx->u.rep->last_ptr);
-                if (ret) {
-                    MARK_POP_DISCARD(ctx->lastmark);
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
+                repeat_start = context.text_ptr;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_END_REPEAT_POSS_REV:
+        {
+            // End of greedy repeat.
+            // <REPEAT_POSS_REV> <skip to end> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
+            SRE_CODE* end_repeat_ptr = context.pattern_ptr;
+            SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_ptr - context.text_start;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|END_REPEAT_POSS_REV\n", context.pattern_ptr, context.text_ptr));
+            // Discard all backtrack info in the body of the possessive repeat.
+            SRE_DISCARD_UNTIL(&context, SRE_OP_REPEAT_POSS_REV);
+            // At this point the repeat info refers to the inner repeat.
+            ++repeat_counter;
+            repeat_max = sre_min(repeat_max, repeat_counter + available);
+            try_again = repeat_counter < repeat_max && context.text_ptr != repeat_start;
+            try_tail = repeat_counter >= repeat_min && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS_REV, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    // The backtrack info must refer to the outer repeat.
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
-                MARK_POP(ctx->lastmark);
-                LASTMARK_RESTORE();
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-            }
-
-            /* cannot match more repeated items here.  make sure the
-               tail matches */
-            state->repeat = ctx->u.rep->prev;
-            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
-            RETURN_ON_SUCCESS(ret);
-            state->repeat = ctx->u.rep;
-            state->ptr = ctx->ptr;
-            RETURN_FAILURE;
-
-        case SRE_OP_MIN_UNTIL:
-            /* minimizing repeat */
-            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
-
-            ctx->u.rep = state->repeat;
-            if (!ctx->u.rep)
-                RETURN_ERROR(SRE_ERROR_STATE);
-
-            state->ptr = ctx->ptr;
-
-            ctx->count = ctx->u.rep->count+1;
-
-            TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
-                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
-
-            if (ctx->count < ctx->u.rep->pattern[1]) {
-                /* not enough matches */
-                ctx->u.rep->count = ctx->count;
-                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
-                        ctx->u.rep->pattern+3);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
+                repeat_start = context.text_ptr;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_GROUPREF:
+        {
+            // Match capture group.
+            // <GROUPREF> <group_index>
+            Py_ssize_t group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|GROUPREF %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            group = context.pattern_ptr[1]; // Zero-based index. Note that externally group 0 is the entire matched string.
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+            length = group_end - group_start;
+            if (length > context.text_end - context.text_ptr)
+                goto backtrack;
+            i = 0;
+            while (i < length) {
+                if (context.text_ptr[i] != group_start[i])
+                    goto backtrack;
+                i++;
+            }
+            context.text_ptr += length;
+            context.pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_GROUPREF_EXISTS:
+        {
+            // Whether capture group exists.
+            // <GROUPREF_EXISTS> <group_index> <skip> code_yes <JUMP> <skip> code_no
+            Py_ssize_t group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            TRACE(("|%p|%p|GROUPREF_EXISTS %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            group = context.pattern_ptr[1]; // Zero-based index. Note that externally group 0 is the entire matched string.
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                context.pattern_ptr += 1 + context.pattern_ptr[2];
+            else
+                context.pattern_ptr += 3;
+            break;
+        }
+        case SRE_OP_GROUPREF_IGNORE:
+        {
+            // Match capture group, ignoring case.
+            // <GROUPREF_IGNORE> <group_index>
+            Py_ssize_t group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|GROUPREF_IGNORE %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            group = context.pattern_ptr[1]; // Zero-based index. Note that externally group 0 is the entire matched string.
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+            length = group_end - group_start;
+            if (length > context.text_end - context.text_ptr)
+                goto backtrack;
+            i = 0;
+            while (i < length) {
+                if (!same_char_ignore(state, context.text_ptr[i], group_start[i]))
+                    goto backtrack;
+                i++;
+            }
+            context.text_ptr += length;
+            context.pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_GROUPREF_IGNORE_REV:
+        {
+            // Match capture group, ignoring case.
+            // <GROUPREF_IGNORE_REV> <group_index>
+            Py_ssize_t group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|GROUPREF_IGNORE_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            group = context.pattern_ptr[1]; // Zero-based index. Note that externally group 0 is the entire matched string.
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+            length = group_end - group_start;
+            if (length > context.text_ptr - context.text_start)
+                goto backtrack;
+            context.text_ptr -= length;
+            i = 0;
+            while (i < length) {
+                if (!same_char_ignore(state, context.text_ptr[i], group_start[i]))
+                    goto backtrack;
+                i++;
+            }
+            context.pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_GROUPREF_REV:
+        {
+            // Match capture group.
+            // <GROUPREF_REV> <group_index>
+            Py_ssize_t group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|GROUPREF_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            group = context.pattern_ptr[1]; // Zero-based index. Note that externally group 0 is the entire matched string.
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+            length = group_end - group_start;
+            if (length > context.text_ptr - context.text_start)
+                goto backtrack;
+            context.text_ptr -= length;
+            i = 0;
+            while (i < length) {
+                if (context.text_ptr[i] != group_start[i])
+                    goto backtrack;
+                i++;
+            }
+            context.pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_JUMP:
+            // Jump forward.
+            // <JUMP> <offset>
+            TRACE(("|%p|%p|JUMP %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_LITERAL:
+            // Character is a literal.
+            // <LITERAL> <character>
+            TRACE(("|%p|%p|LITERAL %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_end || context.text_ptr[0] != (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_IGNORE:
+            // Character is a literal, ignoring case.
+            // <LITERAL_IGNORE> <character>
+            TRACE(("|%p|%p|LITERAL_IGNORE %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_end || !same_char_ignore(state, context.text_ptr[0], context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_IGNORE_REV:
+            // Character is a literal, ignoring case.
+            // <LITERAL_IGNORE_REV> <character>
+            TRACE(("|%p|%p|LITERAL_IGNORE_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr <= context.text_start || !same_char_ignore(state, context.text_ptr[-1], context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_REV:
+            // Character is a literal.
+            // <LITERAL_REV> <character>
+            TRACE(("|%p|%p|LITERAL_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr <= context.text_start || context.text_ptr[-1] != (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_STRING:
+        {
+            // Literal string.
+            // <LITERAL_STRING> <length> ...
+            Py_ssize_t length = context.pattern_ptr[1];
+            SRE_CODE* literal = context.pattern_ptr + 2;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|LITERAL_STRING %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (length > context.text_end - context.text_ptr)
+                goto backtrack;
+            i = 0;
+            do {
+                if (context.text_ptr[i] != (SRE_CHAR)literal[i])
+                    goto backtrack;
+                i++;
+            }
+            while (i < length);
+            context.text_ptr += length;
+            context.pattern_ptr = literal + length;
+            break;
+        }
+        case SRE_OP_LITERAL_STRING_IGNORE:
+        {
+            // Literal string, ignoring case.
+            // <LITERAL_STRING_IGNORE> <length> ...
+            Py_ssize_t length = context.pattern_ptr[1];
+            SRE_CODE* literal = context.pattern_ptr + 2;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|LITERAL_STRING_IGNORE %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (length > context.text_end - context.text_ptr)
+                goto backtrack;
+            i = 0;
+            do {
+                if (!same_char_ignore(state, context.text_ptr[i], literal[i]))
+                    goto backtrack;
+                i++;
+            }
+            while (i < length);
+            context.text_ptr += length;
+            context.pattern_ptr = literal + length;
+            break;
+        }
+        case SRE_OP_LITERAL_STRING_IGNORE_REV:
+        {
+            // Literal string, ignoring case.
+            // <LITERAL_STRING_IGNORE_REV> <length> ...
+            Py_ssize_t length = context.pattern_ptr[1];
+            SRE_CODE* literal = context.pattern_ptr + 2;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|LITERAL_STRING_IGNORE_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (length > context.text_ptr - context.text_start)
+                goto backtrack;
+            context.text_ptr -= length;
+            i = 0;
+            do {
+                if (!same_char_ignore(state, context.text_ptr[i], literal[i]))
+                    goto backtrack;
+                i++;
+            }
+            while (i < length);
+            context.pattern_ptr = literal + length;
+            break;
+        }
+        case SRE_OP_LITERAL_STRING_REV:
+        {
+            // Literal string.
+            // <LITERAL_STRING_REV> <length> ...
+            Py_ssize_t length = context.pattern_ptr[1];
+            SRE_CODE* literal = context.pattern_ptr + 2;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|LITERAL_STRING_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (length > context.text_ptr - context.text_start)
+                goto backtrack;
+            context.text_ptr -= length;
+            i = 0;
+            do {
+                if (context.text_ptr[i] != (SRE_CHAR)literal[i])
+                    goto backtrack;
+                i++;
+            }
+            while (i < length);
+            context.pattern_ptr = literal + length;
+            break;
+        }
+        case SRE_OP_MARK:
+        {
+            // Set mark.
+            // <MARK> <numbered_index> <named_index>
+            int numbered_index = context.pattern_ptr[1];
+            int named_index = context.pattern_ptr[2];
+            TRACE(("|%p|%p|MARK %u %u\n", context.pattern_ptr, context.text_ptr, numbered_index, named_index));
+            // Save the current marks.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_MARK, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->mark.numbered_index = numbered_index;
+            context.backtrack_item->mark.numbered_mark_ptr = context.marks[numbered_index];
+            context.marks[numbered_index] = context.text_ptr;
+            DEBUG_TRACE(("saving mark %u as 0x%p\n", context.backtrack_item->mark.numbered_index, context.backtrack_item->mark.numbered_mark_ptr));
+            context.backtrack_item->mark.named_index = named_index;
+            context.backtrack_item->mark.named_mark_ptr = context.marks[named_index];
+            context.marks[named_index] = context.text_ptr;
+            DEBUG_TRACE(("saving mark %u as 0x%p\n", context.backtrack_item->mark.named_index, context.backtrack_item->mark.named_mark_ptr));
+            context.pattern_ptr += 3;
+            break;
+        }
+        case SRE_OP_NOT_BOUNDARY:
+            // Not boundary between word and non-word.
+            // <NOT_BOUNDARY>
+            TRACE(("|%p|%p|NOT_BOUNDARY\n", context.pattern_ptr, context.text_ptr));
+            if (SRE_AT_BOUNDARY(&context, state))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_NOT_CATEGORY:
+            // Character not in category.
+            // <NOT_CATEGORY> <mask>
+            TRACE(("|%p|%p|NOT_CATEGORY 0x%X\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_end || state->encoding->in_category(context.pattern_ptr[1], context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_CATEGORY_REV:
+            // Character not in category.
+            // <NOT_CATEGORY_REV> <mask>
+            TRACE(("|%p|%p|NOT_CATEGORY_REV 0x%X\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_start || state->encoding->in_category(context.pattern_ptr[1], context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_CHARSET:
+            // Character not in set.
+            // <NOT_CHARSET> <set>
+            TRACE(("|%p|%p|NOT_CHARSET\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || in_charset(context.pattern_ptr + 2, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_IGNORE:
+            // Character not in set, ignoring case.
+            // <NOT_CHARSET_IGNORE> <set>
+            TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || in_charset_ignore(state, context.pattern_ptr + 2, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_IGNORE_REV:
+            // Character not in set, ignoring case.
+            // <NOT_CHARSET_IGNORE_REV> <set>
+            TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || in_charset_ignore(state, context.pattern_ptr + 2, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_REV:
+            // Character not in set.
+            // <NOT_CHARSET_REV> <set>
+            TRACE(("|%p|%p|NOT_CHARSET_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || in_charset(context.pattern_ptr + 2, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_LITERAL:
+            // Character is not a literal.
+            // <NOT_LITERAL> <character>
+            TRACE(("|%p|%p|NOT_LITERAL %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_end || context.text_ptr[0] == (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_IGNORE:
+            // Character is not a literal, ignoring case.
+            // <NOT_LITERAL_IGNORE> <character>
+            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr >= context.text_end || same_char_ignore(state, context.text_ptr[0], context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_IGNORE_REV:
+            // Character is not a literal, ignoring case.
+            // <NOT_LITERAL_IGNORE_REV> <character>
+            TRACE(("|%p|%p|NOT_LITERAL_IGNORE_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr <= context.text_start || same_char_ignore(state, context.text_ptr[-1], context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_REV:
+            // Character is not a literal.
+            // <NOT_LITERAL_REV> <character>
+            TRACE(("|%p|%p|NOT_LITERAL_REV %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1]));
+            if (context.text_ptr <= context.text_start || context.text_ptr[-1] == (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_RANGE:
+            // Character not in range.
+            // <NOT_RANGE> <lower> <upper>
+            TRACE(("|%p|%p|NOT_RANGE %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr >= context.text_end || in_range(context.text_ptr[0], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_IGNORE:
+            // Character not in range, ignoring case.
+            // <NOT_RANGE_IGNORE> <lower> <upper>
+            TRACE(("|%p|%p|NOT_RANGE_IGNORE %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr >= context.text_end || in_range_ignore(state, context.text_ptr[0], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_IGNORE_REV:
+            // Character not in range, ignoring case.
+            // <NOT_RANGE_IGNORE_REV> <lower> <upper>
+            TRACE(("|%p|%p|NOT_RANGE_IGNORE_REV %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr <= context.text_start || in_range_ignore(state, context.text_ptr[-1], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_REV:
+            // Character not in range.
+            // <NOT_RANGE_REV> <lower> <upper>
+            TRACE(("|%p|%p|NOT_RANGE_REV %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr <= context.text_start || in_range(context.text_ptr[-1], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_SET:
+            // Character not in set.
+            // <NOT_SET> <set>
+            TRACE(("|%p|%p|NOT_SET\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || in_set(state, context.pattern_ptr + 1, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_IGNORE:
+            // Character not in set, ignoring case.
+            // <NOT_SET_IGNORE> <set>
+            TRACE(("|%p|%p|NOT_SET_IGNORE\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || in_set_ignore(state, context.pattern_ptr + 1, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_IGNORE_REV:
+            // Character not in set, ignoring case.
+            // <NOT_SET_IGNORE_REV> <set>
+            TRACE(("|%p|%p|NOT_SET_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || in_set_ignore(state, context.pattern_ptr + 1, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_REV:
+            // Character not in set.
+            // <NOT_SET_REV> <set>
+            TRACE(("|%p|%p|NOT_SET_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || in_set(state, context.pattern_ptr + 1, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_RANGE:
+            // Character in range.
+            // <RANGE> <lower> <upper>
+            TRACE(("|%p|%p|RANGE %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr >= context.text_end || !in_range(context.text_ptr[0], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_IGNORE:
+            // Character in range, ignoring case.
+            // <RANGE_IGNORE> <lower> <upper>
+            TRACE(("|%p|%p|RANGE_IGNORE %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr >= context.text_end || !in_range_ignore(state, context.text_ptr[0], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_IGNORE_REV:
+            // Character in range, ignoring case.
+            // <RANGE_IGNORE_REV> <lower> <upper>
+            TRACE(("|%p|%p|RANGE_IGNORE_REV %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr <= context.text_start || !in_range_ignore(state, context.text_ptr[-1], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_REV:
+            // Character in range.
+            // <RANGE_REV> <lower> <upper>
+            TRACE(("|%p|%p|RANGE_REV %u %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[1], context.pattern_ptr[2]));
+            if (context.text_ptr <= context.text_start || !in_range(context.text_ptr[-1], context.pattern_ptr[1], context.pattern_ptr[2]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_SET:
+            // Character in set.
+            // <SET> <set>
+            TRACE(("|%p|%p|SET\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || !in_set(state, context.pattern_ptr + 1, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_SET_IGNORE:
+            // Character in set, ignoring case.
+            // <SET_IGNORE> <set>
+            TRACE(("|%p|%p|SET_IGNORE\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr >= context.text_end || !in_set_ignore(state, context.pattern_ptr + 1, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_SET_IGNORE_REV:
+            // Character in set, ignoring case.
+            // <SET_IGNORE_REV> <set>
+            TRACE(("|%p|%p|SET_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || !in_set_ignore(state, context.pattern_ptr + 1, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_SET_REV:
+            // Character in set.
+            // <SET_REV> <set>
+            TRACE(("|%p|%p|SET_REV\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr <= context.text_start || !in_set(state, context.pattern_ptr + 1, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_REPEAT_MAX:
+        {
+            // Greedy repeat.
+            // <REPEAT_MAX> <skip to end> <min> <max> ... <END_REPEAT_MAX> <skip to start>
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_end - context.text_ptr;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|REPEAT_MAX %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            // At this point the repeat info refers to the outer repeat, so save it.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MAX, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_min;
+            context.backtrack_item->repeat.repeat_max = repeat_max;
+            context.backtrack_item->repeat.repeat_counter = repeat_counter;
+            context.backtrack_item->repeat.repeat_start = repeat_start;
+            // Initialise the repeat info for the inner repeat.
+            top_nested = context.backtrack_item;
+            repeat_min = repeat_ptr[2];
+            repeat_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            repeat_counter = 0;
+            repeat_start = context.text_ptr;
+            try_again = available > 0;
+            try_tail = repeat_min == 0 && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-                RETURN_FAILURE;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
             }
-
-            LASTMARK_SAVE();
-
-            /* see if the tail matches */
-            state->repeat = ctx->u.rep->prev;
-            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
+            break;
+        }
+        case SRE_OP_REPEAT_MAX_REV:
+        {
+            // Greedy repeat.
+            // <REPEAT_MAX_REV> <skip to end> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_ptr - context.text_start;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|REPEAT_MAX_REV %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            // At this point the repeat info refers to the outer repeat, so save it.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MAX_REV, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_min;
+            context.backtrack_item->repeat.repeat_max = repeat_max;
+            context.backtrack_item->repeat.repeat_counter = repeat_counter;
+            context.backtrack_item->repeat.repeat_start = repeat_start;
+            // Initialise the repeat info for the inner repeat.
+            top_nested = context.backtrack_item;
+            repeat_min = repeat_ptr[2];
+            repeat_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            repeat_counter = 0;
+            repeat_start = context.text_ptr;
+            try_again = available > 0;
+            try_tail = repeat_min == 0 && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX_REV, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
+                }
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
             }
-
-            state->repeat = ctx->u.rep;
-            state->ptr = ctx->ptr;
-
-            LASTMARK_RESTORE();
-
-            if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
-                RETURN_FAILURE;
-
-            ctx->u.rep->count = ctx->count;
-            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
-                    ctx->u.rep->pattern+3);
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
+            break;
+        }
+        case SRE_OP_REPEAT_MIN:
+        {
+            // Lazy repeat.
+            // <REPEAT_MIN> <skip to end> <min> <max> ... <END_REPEAT_MIN> <skip to start>
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_end - context.text_ptr;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|REPEAT_MIN %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            // At this point the repeat info refers to the outer repeat, so save it.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MIN, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_min;
+            context.backtrack_item->repeat.repeat_max = repeat_max;
+            context.backtrack_item->repeat.repeat_counter = repeat_counter;
+            context.backtrack_item->repeat.repeat_start = repeat_start;
+            // Initialise the repeat info for the inner repeat.
+            top_nested = context.backtrack_item;
+            repeat_min = repeat_ptr[2];
+            repeat_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            repeat_counter = 0;
+            repeat_start = context.text_ptr;
+            try_again = available > 0;
+            try_tail = repeat_min == 0 && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_tail) {
+                if (try_again) {
+                    // Need to save the repeat info for the inner repeat in case the tail fails.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.repeat_min = repeat_min;
+                    context.backtrack_item->repeat.repeat_max = repeat_max;
+                    context.backtrack_item->repeat.repeat_counter = repeat_counter;
+                    context.backtrack_item->repeat.repeat_start = repeat_start;
+                    context.backtrack_item->repeat.pattern_ptr = body;
+                }
+                // Restore the repeat info for the outer repeat.
+                repeat_min = top_nested->repeat.repeat_min;
+                repeat_max = top_nested->repeat.repeat_max;
+                repeat_counter = top_nested->repeat.repeat_counter;
+                repeat_start = top_nested->repeat.repeat_start;
+                top_nested = top_nested->repeat.top_nested;
+                context.pattern_ptr = tail;
+            } else {
+                if (try_again)
+                    context.pattern_ptr = body;
+                else
+                    goto backtrack;
             }
-            ctx->u.rep->count = ctx->count-1;
-            state->ptr = ctx->ptr;
-            RETURN_FAILURE;
-
-        case SRE_OP_GROUPREF:
-            /* match backreference */
-            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    RETURN_FAILURE;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p)
-                        RETURN_FAILURE;
-                    while (p < e) {
-                        if (ctx->ptr >= end || *ctx->ptr != *p)
-                            RETURN_FAILURE;
-                        p++; ctx->ptr++;
-                    }
+            break;
+        }
+        case SRE_OP_REPEAT_MIN_REV:
+        {
+            // Lazy repeat.
+            // <REPEAT_MIN_REV> <skip to end> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_ptr - context.text_start;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|REPEAT_MIN_REV %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            // At this point the repeat info refers to the outer repeat, so save it.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MIN_REV, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_min;
+            context.backtrack_item->repeat.repeat_max = repeat_max;
+            context.backtrack_item->repeat.repeat_counter = repeat_counter;
+            context.backtrack_item->repeat.repeat_start = repeat_start;
+            // Initialise the repeat info for the inner repeat.
+            top_nested = context.backtrack_item;
+            repeat_min = repeat_ptr[2];
+            repeat_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            repeat_counter = 0;
+            repeat_start = context.text_ptr;
+            try_again = available > 0;
+            try_tail = repeat_min == 0 && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_tail) {
+                if (try_again) {
+                    // Need to save the repeat info for the inner repeat in case the tail fails.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN_REV, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.repeat_min = repeat_min;
+                    context.backtrack_item->repeat.repeat_max = repeat_max;
+                    context.backtrack_item->repeat.repeat_counter = repeat_counter;
+                    context.backtrack_item->repeat.repeat_start = repeat_start;
+                    context.backtrack_item->repeat.pattern_ptr = body;
                 }
+                // Restore the repeat info for the outer repeat.
+                repeat_min = top_nested->repeat.repeat_min;
+                repeat_max = top_nested->repeat.repeat_max;
+                repeat_counter = top_nested->repeat.repeat_counter;
+                repeat_start = top_nested->repeat.repeat_start;
+                top_nested = top_nested->repeat.top_nested;
+                context.pattern_ptr = tail;
+            } else {
+                if (try_again)
+                    context.pattern_ptr = body;
+                else
+                    goto backtrack;
             }
-            ctx->pattern++;
             break;
-
-        case SRE_OP_GROUPREF_IGNORE:
-            /* match backreference */
-            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    RETURN_FAILURE;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p)
-                        RETURN_FAILURE;
-                    while (p < e) {
-                        if (ctx->ptr >= end ||
-                            state->lower(*ctx->ptr) != state->lower(*p))
-                            RETURN_FAILURE;
-                        p++; ctx->ptr++;
-                    }
+        }
+        case SRE_OP_REPEAT_ONE_MAX:
+        {
+            // Greedy repeat.
+            // <REPEAT_ONE_MAX> <skip to end> <min> <max> ...
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+            unsigned int available = context.text_end - context.text_ptr;
+            SRE_CHAR* start_ptr;
+            unsigned int rep_max;
+            SRE_CHAR* max_ptr;
+            SRE_CHAR* min_ptr;
+            TRACE(("|%p|%p|REPEAT_ONE_MAX %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            start_ptr = context.text_ptr;
+            // Match up to the maximum.
+            rep_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            max_ptr = start_ptr + rep_max;
+            SRE_MATCH_MANY(&context, state, max_ptr, body);
+            // Unmatch down to the minimum until the tail could match.
+            min_ptr = start_ptr + repeat_ptr[2];
+            if (!SRE_UNMATCH_UNTIL_TAIL(&context, state, min_ptr, tail))
+                // Reached the minimum and the tail still couldn't match.
+                goto backtrack;
+            // Save the repeat info for the inner repeat.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MAX, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_ptr[2];
+            context.backtrack_item->repeat.repeat_max = rep_max;
+            context.backtrack_item->repeat.repeat_counter = context.text_ptr - start_ptr;
+            context.backtrack_item->repeat.pattern_ptr = context.pattern_ptr;
+            // Now match the tail.
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_MAX_REV:
+        {
+            // Greedy repeat.
+            // <REPEAT_ONE_MAX_REV> <skip to end> <min> <max> ...
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+            unsigned int available = context.text_ptr - context.text_start;
+            SRE_CHAR* start_ptr;
+            unsigned int rep_max;
+            SRE_CHAR* max_ptr;
+            SRE_CHAR* min_ptr;
+            TRACE(("|%p|%p|REPEAT_ONE_MAX_REV %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            start_ptr = context.text_ptr;
+            // Match up to the maximum.
+            rep_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            max_ptr = start_ptr - rep_max;
+            SRE_MATCH_MANY(&context, state, max_ptr, body);
+            // Unmatch down to the minimum until the tail could match.
+            min_ptr = start_ptr - repeat_ptr[2];
+            if (!SRE_UNMATCH_UNTIL_TAIL_REV(&context, state, min_ptr, tail))
+                // Reached the minimum and the tail still couldn't match.
+                goto backtrack;
+            // Save the repeat info for the inner repeat.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MAX_REV, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_ptr[2];
+            context.backtrack_item->repeat.repeat_max = rep_max;
+            context.backtrack_item->repeat.repeat_counter = start_ptr - context.text_ptr;
+            context.backtrack_item->repeat.pattern_ptr = context.pattern_ptr;
+            // Now match the tail.
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_MIN:
+        {
+            // Lazy repeat.
+            // <REPEAT_ONE_MIN> <skip to end> <min> <max> ...
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+            unsigned int available = context.text_end - context.text_ptr;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* min_ptr;
+            unsigned int rep_max;
+            SRE_CHAR* max_ptr;
+            TRACE(("|%p|%p|REPEAT_ONE_MIN %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            start_ptr = context.text_ptr;
+            // Match up to the minimum.
+            min_ptr = start_ptr + repeat_ptr[2];
+            SRE_MATCH_MANY(&context, state, min_ptr, body);
+            // Matched at least the minimum?
+            if (context.text_ptr < min_ptr)
+                goto backtrack;
+            // Match up to the maximum until the tail could match.
+            rep_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            max_ptr = start_ptr + rep_max;
+            if(!SRE_MATCH_UNTIL_TAIL(&context, state, max_ptr, body, tail))
+                // Reached the maximum and the tail still couldn't match.
+                goto backtrack;
+            // Save the repeat info for the inner repeat.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MIN, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_ptr[2];
+            context.backtrack_item->repeat.repeat_max = rep_max;
+            context.backtrack_item->repeat.repeat_counter = context.text_ptr - start_ptr;
+            context.backtrack_item->repeat.pattern_ptr = context.pattern_ptr;
+            // Now match the tail.
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_MIN_REV:
+        {
+            // Lazy repeat.
+            // <REPEAT_ONE_MIN_REV> <skip to end> <min> <max> ...
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+            unsigned int available = context.text_ptr - context.text_start;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* min_ptr;
+            unsigned int rep_max;
+            SRE_CHAR* max_ptr;
+            TRACE(("|%p|%p|REPEAT_ONE_MIN_REV %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            start_ptr = context.text_ptr;
+            // Match up to the minimum.
+            min_ptr = start_ptr - repeat_ptr[2];
+            SRE_MATCH_MANY(&context, state, min_ptr, body);
+            // Matched at least the minimum?
+            if (context.text_ptr > min_ptr)
+                goto backtrack;
+            // Match up to the maximum until the tail could match.
+            rep_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            max_ptr = start_ptr - rep_max;
+            if(!SRE_MATCH_UNTIL_TAIL(&context, state, max_ptr, body, tail))
+                // Reached the maximum and the tail still couldn't match.
+                goto backtrack;
+            // Save the repeat info for the inner repeat.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MIN_REV, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_ptr[2];
+            context.backtrack_item->repeat.repeat_max = rep_max;
+            context.backtrack_item->repeat.repeat_counter = start_ptr - context.text_ptr;
+            context.backtrack_item->repeat.pattern_ptr = context.pattern_ptr;
+            // Now match the tail.
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_POSS:
+        {
+            // Possessive repeat.
+            // <REPEAT_ONE_POSS> <skip to end> <min> <max> ...
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+            unsigned int available = context.text_end - context.text_ptr;
+            SRE_CHAR* start_ptr;
+            unsigned int rep_max;
+            SRE_CHAR* max_ptr;
+            SRE_CHAR* min_ptr;
+            TRACE(("|%p|%p|REPEAT_ONE_POSS %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            start_ptr = context.text_ptr;
+            // Match up to the maximum.
+            rep_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            max_ptr = start_ptr + rep_max;
+            SRE_MATCH_MANY(&context, state, max_ptr, body);
+            // Matched at least the minimum?
+            min_ptr = start_ptr + repeat_ptr[2];
+            if (context.text_ptr < min_ptr)
+                goto backtrack;
+            // Now match the tail.
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_POSS_REV:
+        {
+            // Possessive repeat.
+            // <REPEAT_ONE_POSS_REV> <skip to end> <min> <max> ...
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+            unsigned int available = context.text_ptr - context.text_start;
+            SRE_CHAR* start_ptr;
+            unsigned int rep_max;
+            SRE_CHAR* max_ptr;
+            SRE_CHAR* min_ptr;
+            TRACE(("|%p|%p|REPEAT_ONE_POSS_REV %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            start_ptr = context.text_ptr;
+            // Match up to the maximum.
+            rep_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            max_ptr = start_ptr - rep_max;
+            SRE_MATCH_MANY(&context, state, max_ptr, body);
+            // Matched at least the minimum?
+            min_ptr = start_ptr - repeat_ptr[2];
+            if (context.text_ptr > min_ptr)
+                goto backtrack;
+            // Now match the tail.
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_POSS:
+        {
+            // Possessive repeat.
+            // <REPEAT_POSS> <skip to end> <min> <max> ... <END_REPEAT_POSS> <skip to start>
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_end - context.text_ptr;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|REPEAT_POSS %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            // At this point the repeat info refers to the outer repeat, so save it.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_POSS, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            // If the subpattern succeeds then we'll discard the enclosed backtrack info,
+            // including any marks, so we need to save the marks here.
+            memmove(context.backtrack_item->marks, context.marks, context.marks_size);
+            context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_min;
+            context.backtrack_item->repeat.repeat_max = repeat_max;
+            context.backtrack_item->repeat.repeat_counter = repeat_counter;
+            context.backtrack_item->repeat.repeat_start = repeat_start;
+            // Initialise the repeat info for the inner repeat.
+            top_nested = context.backtrack_item;
+            repeat_min = repeat_ptr[2];
+            repeat_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            repeat_counter = 0;
+            repeat_start = context.text_ptr;
+            try_again = available > 0;
+            try_tail = repeat_min == 0 && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
             }
-            ctx->pattern++;
             break;
-
-        case SRE_OP_GROUPREF_EXISTS:
-            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    ctx->pattern += ctx->pattern[1];
-                    break;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p) {
-                        ctx->pattern += ctx->pattern[1];
-                        break;
-                    }
+        }
+        case SRE_OP_REPEAT_POSS_REV:
+        {
+            // Possessive repeat.
+            // <REPEAT_POSS_REV> <skip to end> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
+            SRE_CODE* repeat_ptr = context.pattern_ptr;
+            SRE_CODE* end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            SRE_CODE* body = repeat_ptr + 4;
+            SRE_CODE* tail = end_repeat_ptr + 2;
+            unsigned int available = context.text_ptr - context.text_start;
+            BOOL try_again;
+            BOOL try_tail;
+            TRACE(("|%p|%p|REPEAT_POSS_REV %u %u\n", context.pattern_ptr, context.text_ptr, repeat_ptr[2], repeat_ptr[3]));
+            // Are there enough characters available for the repeat? (We're assuming at least one per iteration, up to the minimum.)
+            if (repeat_ptr[2] > available)
+                goto backtrack;
+            // At this point the repeat info refers to the outer repeat, so save it.
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_POSS_REV, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, state, result);
+            // If the subpattern succeeds then we'll discard the enclosed backtrack info,
+            // including any marks, so we need to save the marks here.
+            memmove(context.backtrack_item->marks, context.marks, context.marks_size);
+            context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+            context.backtrack_item->repeat.text_ptr = context.text_ptr;
+            context.backtrack_item->repeat.repeat_min = repeat_min;
+            context.backtrack_item->repeat.repeat_max = repeat_max;
+            context.backtrack_item->repeat.repeat_counter = repeat_counter;
+            context.backtrack_item->repeat.repeat_start = repeat_start;
+            // Initialise the repeat info for the inner repeat.
+            top_nested = context.backtrack_item;
+            repeat_min = repeat_ptr[2];
+            repeat_max = repeat_ptr[3] == SRE_UNLIMITED_REPEATS ? available : repeat_ptr[3];
+            repeat_counter = 0;
+            repeat_start = context.text_ptr;
+            try_again = available > 0;
+            try_tail = repeat_min == 0 && SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail);
+            if (try_again) {
+                if (try_tail) {
+                    // Save this position for possible match of the tail.
+                    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS_REV, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, state, result);
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.top_nested = top_nested; // top_nested currently refers to the outer repeat.
+                    context.backtrack_item->repeat.pattern_ptr = tail;
                 }
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    // Restore the repeat info for the outer repeat.
+                    repeat_min = top_nested->repeat.repeat_min;
+                    repeat_max = top_nested->repeat.repeat_max;
+                    repeat_counter = top_nested->repeat.repeat_counter;
+                    repeat_start = top_nested->repeat.repeat_start;
+                    top_nested = top_nested->repeat.top_nested;
+                    context.pattern_ptr = tail;
+                } else
+                    goto backtrack;
             }
-            ctx->pattern += 2;
             break;
-
-        case SRE_OP_ASSERT:
-            /* assert subpattern */
-            /* <ASSERT> <skip> <back> <pattern> */
-            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr < state->beginning)
-                RETURN_FAILURE;
-            DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
-            RETURN_ON_FAILURE(ret);
-            ctx->pattern += ctx->pattern[0];
+        }
+        case SRE_OP_START_OF_LINE:
+            // Start of line.
+            // <START_OF_LINE>
+            TRACE(("|%p|%p|START_OF_LINE\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr > context.text_beginning && !state->encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[-1]))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_START_OF_STRING:
+            // Start of string.
+            // <START_OF_STRING>
+            TRACE(("|%p|%p|START_OF_STRING\n", context.pattern_ptr, context.text_ptr));
+            if (context.text_ptr > context.text_beginning)
+                goto backtrack;
+            context.pattern_ptr++;
             break;
-
-        case SRE_OP_ASSERT_NOT:
-            /* assert not subpattern */
-            /* <ASSERT_NOT> <skip> <back> <pattern> */
-            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr >= state->beginning) {
-                DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_FAILURE;
+        case SRE_OP_SUCCESS:
+        {
+            // End of pattern.
+            // <SUCCESS>
+            int zero_width;
+            int m;
+            SRE_CHAR* end_ptr;
+            TRACE(("|%p|%p|SUCCESS\n", context.pattern_ptr, context.text_ptr));
+            // Is the entire matched portion zero-width?
+            zero_width = context.text_ptr == context.text_start;
+            // Reject the match if it's zero-width and we aren't allowed to return it.
+            if (zero_width && state->reject_zero_width)
+                goto backtrack;
+
+            // Find the numbered mark which matched the furthest to the right.
+            end_ptr = NULL;
+            for (m = 1; m < state->numbered_mark_count; m += 2) {
+                TRACE(("context.marks[%u] = 0x%p, context.marks[%u] = 0x%p\n", m - 1, context.marks[m - 1], m, context.marks[m]));
+                if (context.marks[m - 1] != NULL && context.marks[m] >= context.marks[m - 1]) {
+                    state->lastmark = m;
+                    if (end_ptr < context.marks[m]) {
+                        state->lastindex = 1 + m / 2;
+                        end_ptr = context.marks[m];
+                    }
                 }
             }
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_FAILURE:
-            /* immediate failure */
-            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
-            RETURN_FAILURE;
 
+            // Find the named mark which matched the furthest to the right.
+            end_ptr = NULL;
+            for (m = state->numbered_mark_count + 1; m < state->numbered_mark_count + state->named_mark_count; m += 2) {
+                TRACE(("context.marks[%u] = 0x%p, context.marks[%u] = 0x%p\n", m - 1, context.marks[m - 1], m, context.marks[m]));
+                if (context.marks[m - 1] != NULL && context.marks[m] >= context.marks[m - 1]) {
+                    if (end_ptr < context.marks[m]) {
+                        state->last_named_index = 1 + m / 2;
+                        end_ptr = context.marks[m];
+                    }
+                }
+            }
+            state->ptr = context.text_ptr;
+            return SRE_CLEANUP(&context, state, 1);
+        }
         default:
-            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[-1]));
-            RETURN_ERROR(SRE_ERROR_ILLEGAL);
-        }
-    }
-
-exit:
-    ctx_pos = ctx->last_ctx_pos;
-    jump = ctx->jump;
-    DATA_POP_DISCARD(ctx);
-    if (ctx_pos == -1)
-        return ret;
-    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-
-    switch (jump) {
-        case JUMP_MAX_UNTIL_2:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_2;
-        case JUMP_MAX_UNTIL_3:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_3;
-        case JUMP_MIN_UNTIL_2:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_2;
-        case JUMP_MIN_UNTIL_3:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_3;
-        case JUMP_BRANCH:
-            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
-            goto jump_branch;
-        case JUMP_MAX_UNTIL_1:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_1;
-        case JUMP_MIN_UNTIL_1:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_1;
-        case JUMP_REPEAT:
-            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat;
-        case JUMP_REPEAT_ONE_1:
-            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat_one_1;
-        case JUMP_REPEAT_ONE_2:
-            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat_one_2;
-        case JUMP_MIN_REPEAT_ONE:
-            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
-            goto jump_min_repeat_one;
-        case JUMP_ASSERT:
-            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
-            goto jump_assert;
-        case JUMP_ASSERT_NOT:
-            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
-            goto jump_assert_not;
-        case JUMP_NONE:
-            TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
-            break;
+            TRACE(("|%p|%p|UNKNOWN %u\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
+            return SRE_CLEANUP(&context, state, SRE_ERROR_ILLEGAL);
+        }
+    }
+
+backtrack:
+    TRACE(("|%p|%p|BACKTRACK ", context.pattern_ptr, context.text_ptr));
+    context.backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
+    switch (context.backtrack_item->op) {
+    case SRE_OP_ASSERT:
+        // Assert subpattern.
+        // <ASSERT> <skip to end> ... <END_ASSERT>
+        TRACE(("ASSERT\n"));
+        // The subpattern has failed, so the marks have already backtracked and been restored.
+        context.text_start = context.backtrack_item->assert.text_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_ASSERT_NOT:
+        // Assert not subpattern.
+        // <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT>
+        TRACE(("ASSERT_NOT\n"));
+        // The subpattern has failed, so the marks have already backtracked and been restored.
+        context.text_start = context.backtrack_item->assert.text_start;
+        context.text_ptr = context.backtrack_item->assert.text_ptr;
+        context.pattern_ptr = context.backtrack_item->assert.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        context.pattern_ptr += 1 + context.pattern_ptr[1];
+        goto advance;
+    case SRE_OP_ATOMIC:
+        // Atomic subpattern.
+        // <ATOMIC> ... <END_ATOMIC>
+        TRACE(("ATOMIC\n"));
+        // The subpattern has failed, so the marks have already backtracked and been restored.
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_BRANCH:
+    {
+        // Alternation.
+        // <BRANCH> <skip to next> ... <JUMP> <skip to end> <skip to next> ... <JUMP> <skip to end> 0
+        SRE_CODE* skip_ptr = context.backtrack_item->branch.pattern_ptr;
+        TRACE(("BRANCH\n"));
+        context.text_ptr = context.backtrack_item->branch.text_ptr;
+        // Look ahead in the branch to avoid unnecessary backtracking.
+        while (! SRE_POSSIBLE_MATCH_AHEAD(&context, state, skip_ptr + 1)) {
+            skip_ptr += skip_ptr[0];
+            // Is there another branch?
+            if (skip_ptr[0] == 0) {
+                // No more branches, so backtrack.
+                SRE_DISCARD_BACKTRACK(&context);
+                goto backtrack;
+            }
+        }
+        // Try this branch.
+        context.pattern_ptr = skip_ptr + 1;
+        // Is there another branch?
+        skip_ptr += skip_ptr[0];
+        if (skip_ptr[0] == 0)
+            // No more branches after this one.
+            SRE_DISCARD_BACKTRACK(&context);
+        else
+            // Save the next branch for backtracking.
+            context.backtrack_item->branch.pattern_ptr = skip_ptr;
+        goto advance;
+    }
+    case SRE_OP_END_ATOMIC:
+        // Atomic subpattern.
+        // <ATOMIC> <skip to end> ... <END_ATOMIC>
+        TRACE(("END_ATOMIC\n"));
+        // Restore the marks.
+        memmove(context.marks, context.backtrack_item->marks, context.marks_size);
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_END_REPEAT_MAX:
+        // End of greedy repeat.
+        // <REPEAT_MAX> <skip to end> <min> <max> ... <END_REPEAT_MAX> <skip to start>
+        TRACE(("END_REPEAT_MAX\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        repeat_min = top_nested->repeat.repeat_min;
+        repeat_max = top_nested->repeat.repeat_max;
+        repeat_counter = top_nested->repeat.repeat_counter;
+        repeat_start = top_nested->repeat.repeat_start;
+        goto advance;
+    case SRE_OP_END_REPEAT_MAX_REV:
+        // End of greedy repeat.
+        // <REPEAT_MAX_REV> <skip to end> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
+        TRACE(("END_REPEAT_MAX_REV\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        repeat_min = top_nested->repeat.repeat_min;
+        repeat_max = top_nested->repeat.repeat_max;
+        repeat_counter = top_nested->repeat.repeat_counter;
+        repeat_start = top_nested->repeat.repeat_start;
+        goto advance;
+    case SRE_OP_END_REPEAT_MIN:
+        // Lazy repeat.
+        // <REPEAT_MIN> <skip to end> <min> <max> ... <END_REPEAT_MIN> <skip to start>
+        TRACE(("END_REPEAT_MIN\n"));
+        // Restore the repeat info for the inner repeat.
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto advance;
+    case SRE_OP_END_REPEAT_MIN_REV:
+        // Lazy repeat.
+        // <REPEAT_MIN_REV> <skip to end> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
+        TRACE(("END_REPEAT_MIN_REV\n"));
+        // Restore the repeat info for the inner repeat.
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto advance;
+    case SRE_OP_END_REPEAT_POSS:
+        // End of greedy repeat.
+        // <REPEAT_POSS> <skip to end> <min> <max> ... <END_REPEAT_POSS> <skip to start>
+        TRACE(("END_REPEAT_POSS\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        repeat_min = top_nested->repeat.repeat_min;
+        repeat_max = top_nested->repeat.repeat_max;
+        repeat_counter = top_nested->repeat.repeat_counter;
+        repeat_start = top_nested->repeat.repeat_start;
+        goto advance;
+    case SRE_OP_END_REPEAT_POSS_REV:
+        // End of greedy repeat.
+        // <REPEAT_POSS_REV> <skip to end> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
+        TRACE(("END_REPEAT_POSS_REV\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+        repeat_min = top_nested->repeat.repeat_min;
+        repeat_max = top_nested->repeat.repeat_max;
+        repeat_counter = top_nested->repeat.repeat_counter;
+        repeat_start = top_nested->repeat.repeat_start;
+        goto advance;
+    case SRE_OP_FAILURE:
+        // Failed to match.
+        TRACE(("FAILURE\n"));
+        state->reject_zero_width = 0;
+        return SRE_CLEANUP(&context, state, 0);
+    case SRE_OP_MARK:
+        // Set mark.
+        // <MARK> <numbered_index> <named_index>
+        TRACE(("MARK\n"));
+        // The numbered and named marks need to be restored in the opposite order to which they were saved.
+        context.marks[context.backtrack_item->mark.named_index] = context.backtrack_item->mark.named_mark_ptr;
+        DEBUG_TRACE(("restoring mark %u to 0x%p\n", context.backtrack_item->mark.named_index, context.backtrack_item->mark.named_mark_ptr));
+        context.marks[context.backtrack_item->mark.numbered_index] = context.backtrack_item->mark.numbered_mark_ptr;
+        DEBUG_TRACE(("restoring mark %u to 0x%p\n", context.backtrack_item->mark.numbered_index, context.backtrack_item->mark.numbered_mark_ptr));
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_MAX:
+        // Greedy repeat.
+        // <REPEAT_MAX> <skip to end> <min> <max> ... <END_REPEAT_MAX> <skip to start>
+        TRACE(("REPEAT_MAX\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_MAX_REV:
+        // Greedy repeat.
+        // <REPEAT_MAX_REV> <skip to end> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
+        TRACE(("REPEAT_MAX_REV\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_MIN:
+        // Lazy repeat.
+        // <REPEAT_MIN> <skip to end> <min> <max> ... <END_REPEAT_MIN> <skip to start>
+        TRACE(("REPEAT_MIN\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_MIN_REV:
+        // Lazy repeat.
+        // <REPEAT_MIN_REV> <skip to end> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
+        TRACE(("REPEAT_MIN_REV\n"));
+        // Restore the repeat info for the outer repeat.
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_ONE_MAX:
+    {
+        // Greedy repeat.
+        // <REPEAT_ONE_MAX> <skip to end> <min> <max> ...
+        SRE_CODE* repeat_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* min_ptr;
+        TRACE(("REPEAT_ONE_MAX\n"));
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        start_ptr = context.text_ptr - context.backtrack_item->repeat.repeat_counter;
+        // Match down to the minimum until the tail could match.
+        min_ptr = start_ptr + context.backtrack_item->repeat.repeat_min;
+        // Release a character.
+        context.text_ptr--;
+        if(!SRE_UNMATCH_UNTIL_TAIL(&context, state, min_ptr, tail)) {
+            // Reached the minimum and the tail still couldn't match.
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        }
+        context.backtrack_item->repeat.text_ptr = context.text_ptr;
+        context.backtrack_item->repeat.repeat_counter = context.text_ptr - start_ptr;
+        // Now match the tail.
+        context.pattern_ptr = tail;
+        goto advance;
+    }
+    case SRE_OP_REPEAT_ONE_MAX_REV:
+    {
+        // Greedy repeat.
+        // <REPEAT_ONE_MAX_REV> <skip to end> <min> <max> ...
+        SRE_CODE* repeat_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* min_ptr;
+        TRACE(("REPEAT_ONE_MAX_REV\n"));
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        start_ptr = context.text_ptr + context.backtrack_item->repeat.repeat_counter;
+        // Match down to the minimum until the tail could match.
+        min_ptr = start_ptr - context.backtrack_item->repeat.repeat_min;
+        // Release a character.
+        context.text_ptr++;
+        if(!SRE_UNMATCH_UNTIL_TAIL_REV(&context, state, min_ptr, tail)) {
+            // Reached the minimum and the tail still couldn't match.
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        }
+        context.backtrack_item->repeat.text_ptr = context.text_ptr;
+        context.backtrack_item->repeat.repeat_counter = start_ptr - context.text_ptr;
+        // Now match the tail.
+        context.pattern_ptr = tail;
+        goto advance;
+    }
+    case SRE_OP_REPEAT_ONE_MIN:
+    {
+        // Lazy repeat.
+        // <REPEAT_ONE_MIN> <skip to end> <min> <max> ...
+        SRE_CODE* repeat_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_CODE* body = repeat_ptr + 4;
+        SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+        unsigned int available;
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* max_ptr;
+        TRACE(("REPEAT_ONE_MIN\n"));
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        available = context.text_end - context.text_ptr;
+        start_ptr = context.text_ptr - context.backtrack_item->repeat.repeat_counter;
+        // Consume a character.
+        context.text_ptr++;
+        // Match up to the maximum until the tail could match.
+        max_ptr = start_ptr + context.backtrack_item->repeat.repeat_max;
+        if(context.text_ptr > max_ptr || !SRE_MATCH_UNTIL_TAIL(&context, state, max_ptr, body, tail)) {
+            // Reached the maximum and the tail still couldn't match.
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        }
+        // Now match the tail.
+        context.backtrack_item->repeat.text_ptr = context.text_ptr;
+        context.backtrack_item->repeat.repeat_counter = context.text_ptr - start_ptr;
+        context.pattern_ptr = tail;
+        goto advance;
+    }
+    case SRE_OP_REPEAT_ONE_MIN_REV:
+    {
+        // Lazy repeat.
+        // <REPEAT_ONE_MIN_REV> <skip to end> <min> <max> ...
+        SRE_CODE* repeat_ptr = context.backtrack_item->repeat.pattern_ptr;
+        SRE_CODE* body = repeat_ptr + 4;
+        SRE_CODE* tail = repeat_ptr + 1 + repeat_ptr[1];
+        unsigned int available;
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* max_ptr;
+        TRACE(("REPEAT_ONE_MIN_REV\n"));
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        available = context.text_ptr - context.text_start;
+        start_ptr = context.text_ptr + context.backtrack_item->repeat.repeat_counter;
+        // Consume a character.
+        context.text_ptr--;
+        // Match up to the maximum until the tail could match.
+        max_ptr = start_ptr - context.backtrack_item->repeat.repeat_max;
+        if(context.text_ptr < max_ptr || !SRE_MATCH_UNTIL_TAIL(&context, state, max_ptr, body, tail)) {
+            // Reached the maximum and the tail still couldn't match.
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        }
+        // Now match the tail.
+        context.backtrack_item->repeat.text_ptr = context.text_ptr;
+        context.backtrack_item->repeat.repeat_counter = start_ptr - context.text_ptr;
+        context.pattern_ptr = tail;
+        goto advance;
+    }
+    case SRE_OP_REPEAT_POSS:
+        // Possessive repeat.
+        // <REPEAT_POSS> <skip to end> <min> <max> ... <END_REPEAT_POSS> <skip to start>
+        TRACE(("REPEAT_POSS\n"));
+        // Restore the repeat info for the outer repeat.
+        memmove(context.marks, context.backtrack_item->marks, context.marks_size);
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_POSS_REV:
+        // Possessive repeat.
+        // <REPEAT_POSS_REV> <skip to end> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
+        TRACE(("REPEAT_POSS_REV\n"));
+        // Restore the repeat info for the outer repeat.
+        memmove(context.marks, context.backtrack_item->marks, context.marks_size);
+        top_nested = context.backtrack_item->repeat.top_nested;
+        repeat_min = context.backtrack_item->repeat.repeat_min;
+        repeat_max = context.backtrack_item->repeat.repeat_max;
+        repeat_counter = context.backtrack_item->repeat.repeat_counter;
+        repeat_start = context.backtrack_item->repeat.repeat_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    default:
+        TRACE(("UNKNOWN %u\n", context.backtrack_item->op));
+        return SRE_CLEANUP(&context, state, SRE_ERROR_ILLEGAL);
     }
 
-    return ret; /* should never get here */
+    return 0;
 }
 
-LOCAL(Py_ssize_t)
-SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
-{
-    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
+LOCAL(Py_ssize_t) SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) {
+    SRE_CODE* tail;
+    SRE_CONTEXT context;
     Py_ssize_t status = 0;
-    Py_ssize_t prefix_len = 0;
-    Py_ssize_t prefix_skip = 0;
-    SRE_CODE* prefix = NULL;
-    SRE_CODE* charset = NULL;
-    SRE_CODE* overlap = NULL;
-    int flags = 0;
 
-    if (pattern[0] == SRE_OP_INFO) {
-        /* optimization info block */
-        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
-
-        flags = pattern[2];
-
-        if (pattern[3] > 1) {
-            /* adjust end point (but make sure we leave at least one
-               character in there, so literal search will work) */
-            end -= pattern[3]-1;
-            if (end <= ptr)
-                end = ptr+1;
-        }
-
-        if (flags & SRE_INFO_PREFIX) {
-            /* pattern starts with a known prefix */
-            /* <length> <skip> <prefix data> <overlap data> */
-            prefix_len = pattern[5];
-            prefix_skip = pattern[6];
-            prefix = pattern + 7;
-            overlap = prefix + prefix_len - 1;
-        } else if (flags & SRE_INFO_CHARSET)
-            /* pattern starts with a character from a known set */
-            /* <charset> */
-            charset = pattern + 5;
-
-        pattern += 1 + pattern[1];
-    }
-
-    TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
-    TRACE(("charset = %p\n", charset));
-
-#if defined(USE_FAST_SEARCH)
-    if (prefix_len > 1) {
-        /* pattern starts with a known prefix.  use the overlap
-           table to skip forward as fast as we possibly can */
-        Py_ssize_t i = 0;
-        end = (SRE_CHAR *)state->end;
-        while (ptr < end) {
-            for (;;) {
-                if ((SRE_CODE) ptr[0] != prefix[i]) {
-                    if (!i)
-                        break;
-                    else
-                        i = overlap[i];
-                } else {
-                    if (++i == prefix_len) {
-                        /* found a potential match */
-                        TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
-                        state->start = ptr + 1 - prefix_len;
-                        state->ptr = ptr + 1 - prefix_len + prefix_skip;
-                        if (flags & SRE_INFO_LITERAL)
-                            return 1; /* we got all of it */
-                        status = SRE_MATCH(state, pattern + 2*prefix_skip);
-                        if (status != 0)
-                            return status;
-                        /* close but no cigar -- try again */
-                        i = overlap[i];
-                    }
+    tail = pattern;
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    context.text_beginning = (SRE_CHAR *)state->beginning;
+    context.text_start = (SRE_CHAR *)state->start;
+    context.text_end = (SRE_CHAR *)state->end;
+
+    // Point to the final newline if it's the final character.
+    context.final_linebreak = context.text_beginning < context.text_end &&
+        state->encoding->in_category(SRE_CAT_LineBreak, context.text_end[-1]) ? context.text_end - 1 : NULL;
+
+    // state->reject_zero_width might initially be set to reject an initial zero-width match.
+    // If there's no match initially then state->reject_zero_width will be cleared to allow any kind of match subsequently.
+    if (state->reverse) {
+        context.text_ptr = (SRE_CHAR *)state->end;
+
+        while (context.text_ptr >= context.text_start) {
+            TRACE(("|%p|%p|SEARCH\n", pattern, context.text_ptr));
+            if (SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail)) {
+                state->end = state->ptr = context.text_ptr;
+                status = SRE_MATCH(state, state->pattern_code);
+                if (status != 0)
                     break;
-                }
             }
-            ptr++;
+            context.text_ptr--;
+            state->reject_zero_width = 0;
         }
-        return 0;
-    }
-#endif
+    } else {
+        context.text_ptr = (SRE_CHAR *)state->start;
 
-    if (pattern[0] == SRE_OP_LITERAL) {
-        /* pattern starts with a literal character.  this is used
-           for short prefixes, and if fast search is disabled */
-        SRE_CODE chr = pattern[1];
-        end = (SRE_CHAR *)state->end;
-        for (;;) {
-            while (ptr < end && (SRE_CODE) ptr[0] != chr)
-                ptr++;
-            if (ptr >= end)
-                return 0;
-            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
-            state->start = ptr;
-            state->ptr = ++ptr;
-            if (flags & SRE_INFO_LITERAL)
-                return 1; /* we got all of it */
-            status = SRE_MATCH(state, pattern + 2);
-            if (status != 0)
-                break;
-        }
-    } else if (charset) {
-        /* pattern starts with a character from a known set */
-        end = (SRE_CHAR *)state->end;
-        for (;;) {
-            while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
-                ptr++;
-            if (ptr >= end)
-                return 0;
-            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
-            state->start = ptr;
-            state->ptr = ptr;
-            status = SRE_MATCH(state, pattern);
-            if (status != 0)
-                break;
-            ptr++;
-        }
-    } else
-        /* general case */
-        while (ptr <= end) {
-            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
-            state->start = state->ptr = ptr++;
-            status = SRE_MATCH(state, pattern);
-            if (status != 0)
-                break;
+        while (context.text_ptr <= context.text_end) {
+            TRACE(("|%p|%p|SEARCH\n", pattern, context.text_ptr));
+            if (SRE_POSSIBLE_MATCH_AHEAD(&context, state, tail)) {
+                state->start = state->ptr = context.text_ptr;
+                status = SRE_MATCH(state, state->pattern_code);
+                if (status != 0)
+                    break;
+            }
+            context.text_ptr++;
+            state->reject_zero_width = 0;
         }
+    }
 
     return status;
 }
 
-LOCAL(int)
-SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
-{
+LOCAL(int) SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len) {
     /* check if given string is a literal template (i.e. no escapes) */
     while (len-- > 0)
         if (*ptr++ == '\\')
             return 0;
-    return 1;
+    return TRUE;
 }
 
 #if !defined(SRE_RECURSIVE)
@@ -1630,49 +3555,65 @@
 /* factories and destructors */
 
 /* see sre.h for object declarations */
-static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
-static PyObject*pattern_scanner(PatternObject*, PyObject*);
+static PyObject* pattern_new_match(PatternObject*, SRE_STATE*, int);
+static PyObject* pattern_scanner(PatternObject*, PyObject*);
 
-static PyObject *
-sre_codesize(PyObject* self, PyObject *unused)
-{
+static PyObject* sre_codesize(PyObject* self, PyObject *unused) {
     return Py_BuildValue("l", sizeof(SRE_CODE));
 }
 
-static PyObject *
-sre_getlower(PyObject* self, PyObject* args)
-{
+static PyObject* sre_getlower(PyObject* self, PyObject* args) {
     int character, flags;
     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
         return NULL;
     if (flags & SRE_FLAG_LOCALE)
-        return Py_BuildValue("i", sre_lower_locale(character));
+        return Py_BuildValue("i", loc_lower(character));
     if (flags & SRE_FLAG_UNICODE)
 #if defined(HAVE_UNICODE)
-        return Py_BuildValue("i", sre_lower_unicode(character));
+        return Py_BuildValue("i", uni_lower(character));
 #else
-        return Py_BuildValue("i", sre_lower_locale(character));
+        return Py_BuildValue("i", loc_lower(character));
 #endif
-    return Py_BuildValue("i", sre_lower(character));
+    return Py_BuildValue("i", ascii_lower(character));
 }
 
-LOCAL(void)
-state_reset(SRE_STATE* state)
-{
-    /* FIXME: dynamic! */
-    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
+static PyObject* sre_getupper(PyObject* self, PyObject* args) {
+    int character, flags;
+    if (!PyArg_ParseTuple(args, "ii", &character, &flags))
+        return NULL;
+    if (flags & SRE_FLAG_LOCALE)
+        return Py_BuildValue("i", loc_upper(character));
+    if (flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
+        return Py_BuildValue("i", uni_upper(character));
+#else
+        return Py_BuildValue("i", loc_upper(character));
+#endif
+    return Py_BuildValue("i", ascii_upper(character));
+}
+
+static PyObject* sre_gettitle(PyObject* self, PyObject* args) {
+    int character, flags;
+    if (!PyArg_ParseTuple(args, "ii", &character, &flags))
+        return NULL;
+    if (flags & SRE_FLAG_LOCALE)
+        return Py_BuildValue("i", loc_upper(character));
+    if (flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
+        return Py_BuildValue("i", uni_title(character));
+#else
+        return Py_BuildValue("i", loc_upper(character));
+#endif
+    return Py_BuildValue("i", ascii_upper(character));
+}
 
+LOCAL(void) state_reset(SRE_STATE* state) {
     state->lastmark = -1;
     state->lastindex = -1;
-
-    state->repeat = NULL;
-
-    data_stack_dealloc(state);
+    state->last_named_index = -1;
 }
 
-static void*
-getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
-{
+static void* getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) {
     /* given a python object, return a data pointer, a length (in
        characters), and a character size.  return NULL if the object
        is not a string (or not compatible) */
@@ -1694,7 +3635,7 @@
 #endif
 
     /* get pointer to string buffer */
-    buffer = Py_TYPE(string)->tp_as_buffer;
+    buffer = string->ob_type->tp_as_buffer;
     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
         buffer->bf_getsegcount(string, NULL) != 1) {
         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
@@ -1736,10 +3677,8 @@
     return ptr;
 }
 
-LOCAL(PyObject*)
-state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
-           Py_ssize_t start, Py_ssize_t end)
-{
+LOCAL(PyObject*) state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
+           Py_ssize_t start, Py_ssize_t end, SRE_CODE* pattern_code) {
     /* prepare state object */
 
     Py_ssize_t length;
@@ -1748,12 +3687,25 @@
 
     memset(state, 0, sizeof(SRE_STATE));
 
+    state->pattern_code = pattern_code;
+
+    state->backtrack_chunk = (SRE_BACKTRACK_CHUNK*)PyMem_MALLOC(sizeof(SRE_BACKTRACK_CHUNK));
+    if (state->backtrack_chunk == NULL)
+        goto error;
+
+    state->backtrack_chunk->previous = NULL;
+    state->backtrack_chunk->count = 0;
+
+    state->numbered_mark_count = 2 * pattern->groups;
+    state->named_mark_count = 2 * (pattern->internal_groups - pattern->groups);
+
     state->lastmark = -1;
     state->lastindex = -1;
+    state->last_named_index = -1;
 
     ptr = getstring(string, &length, &charsize);
     if (!ptr)
-        return NULL;
+        goto error;
 
     /* adjust boundaries */
     if (start < 0)
@@ -1773,44 +3725,49 @@
     state->start = (void*) ((char*) ptr + start * state->charsize);
     state->end = (void*) ((char*) ptr + end * state->charsize);
 
+    state->reject_zero_width = 0;
+
     Py_INCREF(string);
     state->string = string;
     state->pos = start;
     state->endpos = end;
 
-    if (pattern->flags & SRE_FLAG_LOCALE)
-        state->lower = sre_lower_locale;
-    else if (pattern->flags & SRE_FLAG_UNICODE)
-#if defined(HAVE_UNICODE)
-        state->lower = sre_lower_unicode;
-#else
-        state->lower = sre_lower_locale;
-#endif
+    if ((pattern->flags & SRE_FLAG_UNICODE) || state->charsize == 2)
+        state->encoding = &sre_unicode_encoding;
+    else if (pattern->flags & SRE_FLAG_LOCALE)
+        state->encoding = &locale_encoding;
     else
-        state->lower = sre_lower;
+        state->encoding = &ascii_encoding;
+
+    state->reverse = pattern->flags & SRE_FLAG_REVERSE;
 
     return string;
+
+error:
+    PyMem_FREE(state->backtrack_chunk);
+    return NULL;
 }
 
-LOCAL(void)
-state_fini(SRE_STATE* state)
-{
+LOCAL(void) state_fini(SRE_STATE* state) {
+    /* There are actually 2 versions of backtrack_chunk, 8-bit and Unicode.
+       This shouldn't be a problem because they have the same format
+       and contain pointers and an int, which are always the same size. */
+    PyMem_FREE(state->backtrack_chunk);
+    state->backtrack_chunk = NULL;
+
     Py_XDECREF(state->string);
-    data_stack_dealloc(state);
 }
 
 /* calculate offset from start of string */
-#define STATE_OFFSET(state, member)\
+#define STATE_OFFSET(state, member) \
     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
 
-LOCAL(PyObject*)
-state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
-{
+LOCAL(PyObject*) state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) {
     Py_ssize_t i, j;
 
     index = (index - 1) * 2;
 
-    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
+    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index + 1]) {
         if (empty)
             /* want empty string */
             i = j = 0;
@@ -1820,15 +3777,13 @@
         }
     } else {
         i = STATE_OFFSET(state, state->mark[index]);
-        j = STATE_OFFSET(state, state->mark[index+1]);
+        j = STATE_OFFSET(state, state->mark[index + 1]);
     }
 
     return PySequence_GetSlice(string, i, j);
 }
 
-static void
-pattern_error(int status)
-{
+static void pattern_error(int status) {
     switch (status) {
     case SRE_ERROR_RECURSION_LIMIT:
         PyErr_SetString(
@@ -1851,23 +3806,19 @@
     }
 }
 
-static void
-pattern_dealloc(PatternObject* self)
-{
+static void pattern_dealloc(PatternObject* self) {
     if (self->weakreflist != NULL)
-        PyObject_ClearWeakRefs((PyObject *) self);
+        PyObject_ClearWeakRefs((PyObject*)self);
     Py_XDECREF(self->pattern);
     Py_XDECREF(self->groupindex);
     Py_XDECREF(self->indexgroup);
     PyObject_DEL(self);
 }
 
-static PyObject*
-pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject* kw) {
     SRE_STATE state;
     int status;
-
+    SRE_CODE* pattern_code;
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -1876,23 +3827,25 @@
                                      &string, &start, &end))
         return NULL;
 
-    string = state_init(&state, self, string, start, end);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, start, end, pattern_code);
     if (!string)
         return NULL;
 
-    state.ptr = state.start;
+    state.ptr = state.reverse ? state.end : state.start;
 
-    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|MATCH\n", pattern_code, state.ptr));
 
     if (state.charsize == 1) {
-        status = sre_match(&state, PatternObject_GetCode(self));
+        status = sre_bmatch(&state, state.pattern_code);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_umatch(&state, PatternObject_GetCode(self));
+        status = sre_umatch(&state, state.pattern_code);
 #endif
     }
 
-    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|END\n", pattern_code, state.ptr));
     if (PyErr_Occurred())
         return NULL;
 
@@ -1901,12 +3854,10 @@
     return pattern_new_match(self, &state, status);
 }
 
-static PyObject*
-pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* kw) {
     SRE_STATE state;
     int status;
-
+    SRE_CODE* pattern_code;
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -1915,17 +3866,19 @@
                                      &string, &start, &end))
         return NULL;
 
-    string = state_init(&state, self, string, start, end);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, start, end, pattern_code);
     if (!string)
         return NULL;
 
-    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|SEARCH\n", pattern_code, state.ptr));
 
     if (state.charsize == 1) {
-        status = sre_search(&state, PatternObject_GetCode(self));
+        status = sre_bsearch(&state, state.pattern_code);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_usearch(&state, PatternObject_GetCode(self));
+        status = sre_usearch(&state, state.pattern_code);
 #endif
     }
 
@@ -1939,9 +3892,7 @@
     return pattern_new_match(self, &state, status);
 }
 
-static PyObject*
-call(char* module, char* function, PyObject* args)
-{
+static PyObject* call(char* module, char* function, PyObject* args) {
     PyObject* name;
     PyObject* mod;
     PyObject* func;
@@ -1967,9 +3918,7 @@
 }
 
 #ifdef USE_BUILTIN_COPY
-static int
-deepcopy(PyObject** object, PyObject* memo)
-{
+static int deepcopy(PyObject** object, PyObject* memo) {
     PyObject* copy;
 
     copy = call(
@@ -1986,9 +3935,7 @@
 }
 #endif
 
-static PyObject*
-join_list(PyObject* list, PyObject* string)
-{
+static PyObject* join_list(PyObject* list, PyObject* string) {
     /* join list elements */
 
     PyObject* joiner;
@@ -2034,14 +3981,12 @@
     return result;
 }
 
-static PyObject*
-pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) {
     SRE_STATE state;
     PyObject* list;
     int status;
     Py_ssize_t i, b, e;
-
+    SRE_CODE* pattern_code;
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -2050,7 +3995,9 @@
                                      &string, &start, &end))
         return NULL;
 
-    string = state_init(&state, self, string, start, end);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, start, end, pattern_code);
     if (!string)
         return NULL;
 
@@ -2066,18 +4013,18 @@
 
         state_reset(&state);
 
-        state.ptr = state.start;
+        state.ptr = state.reverse ? state.end : state.start;
 
         if (state.charsize == 1) {
-            status = sre_search(&state, PatternObject_GetCode(self));
+            status = sre_bsearch(&state, state.pattern_code);
         } else {
 #if defined(HAVE_UNICODE)
-            status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state, state.pattern_code);
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2089,8 +4036,13 @@
         /* don't bother to build a match object */
         switch (self->groups) {
         case 0:
-            b = STATE_OFFSET(&state, state.start);
-            e = STATE_OFFSET(&state, state.ptr);
+            if (state.reverse) {
+                b = STATE_OFFSET(&state, state.ptr);
+                e = STATE_OFFSET(&state, state.end);
+            } else {
+                b = STATE_OFFSET(&state, state.start);
+                e = STATE_OFFSET(&state, state.ptr);
+            }
             item = PySequence_GetSlice(string, b, e);
             if (!item)
                 goto error;
@@ -2105,7 +4057,7 @@
             if (!item)
                 goto error;
             for (i = 0; i < self->groups; i++) {
-                PyObject* o = state_getslice(&state, i+1, string, 1);
+                PyObject* o = state_getslice(&state, i + 1, string, 1);
                 if (!o) {
                     Py_DECREF(item);
                     goto error;
@@ -2120,11 +4072,12 @@
         if (status < 0)
             goto error;
 
-        if (state.ptr == state.start)
-            state.start = (void*) ((char*) state.ptr + state.charsize);
+        // Continue search from where we left off, but reject an initial zero-width match.
+        if (state.reverse)
+            state.end = state.ptr;
         else
             state.start = state.ptr;
-
+        state.reject_zero_width = 1;
     }
 
     state_fini(&state);
@@ -2138,9 +4091,7 @@
 }
 
 #if PY_VERSION_HEX >= 0x02020000
-static PyObject*
-pattern_finditer(PatternObject* pattern, PyObject* args)
-{
+static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args) {
     PyObject* scanner;
     PyObject* search;
     PyObject* iterator;
@@ -2161,16 +4112,16 @@
 }
 #endif
 
-static PyObject*
-pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* kw) {
     SRE_STATE state;
     PyObject* list;
     PyObject* item;
     int status;
+    SRE_CODE* pattern_code;
     Py_ssize_t n;
     Py_ssize_t i;
     void* last;
+    int zero_width;
 
     PyObject* string;
     Py_ssize_t maxsplit = 0;
@@ -2179,7 +4130,9 @@
                                      &string, &maxsplit))
         return NULL;
 
-    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX, pattern_code);
     if (!string)
         return NULL;
 
@@ -2189,25 +4142,27 @@
         return NULL;
     }
 
+    zero_width = self->flags & SRE_FLAG_ZEROWIDTH ? 1 : 0;
+
     n = 0;
-    last = state.start;
+    last = state.reverse ? state.end : state.start;
 
     while (!maxsplit || n < maxsplit) {
 
         state_reset(&state);
 
-        state.ptr = state.start;
+        state.ptr = state.reverse ? state.end : state.start;
 
         if (state.charsize == 1) {
-            status = sre_search(&state, PatternObject_GetCode(self));
+            status = sre_bsearch(&state, state.pattern_code);
         } else {
 #if defined(HAVE_UNICODE)
-            status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state, state.pattern_code);
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2216,19 +4171,37 @@
             goto error;
         }
 
-        if (state.start == state.ptr) {
-            if (last == state.end)
-                break;
-            /* skip one character */
-            state.start = (void*) ((char*) state.ptr + state.charsize);
-            continue;
-        }
+        if (state.reverse) {
+            // Zero-width match?
+            if (state.ptr == state.end) {
+                // Are we permitted to split on zero-width?
+                if (!zero_width) {
+                    state.end = (void*) ((char*) state.ptr - state.charsize);
+                    continue;
+                }
+            }
 
-        /* get segment before this match */
-        item = PySequence_GetSlice(
-            string, STATE_OFFSET(&state, last),
-            STATE_OFFSET(&state, state.start)
-            );
+            /* get segment before this match */
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, state.end),
+                STATE_OFFSET(&state, last)
+                );
+        } else {
+            // Zero-width match?
+            if (state.ptr == state.start) {
+                // Are we permitted to split on zero-width?
+                if (!zero_width) {
+                    state.start = (void*) ((char*) state.ptr + state.charsize);
+                    continue;
+                }
+            }
+
+            /* get segment before this match */
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, last),
+                STATE_OFFSET(&state, state.start)
+                );
+        }
         if (!item)
             goto error;
         status = PyList_Append(list, item);
@@ -2238,7 +4211,7 @@
 
         /* add groups (if any) */
         for (i = 0; i < self->groups; i++) {
-            item = state_getslice(&state, i+1, string, 0);
+            item = state_getslice(&state, i + 1, string, 0);
             if (!item)
                 goto error;
             status = PyList_Append(list, item);
@@ -2249,14 +4222,42 @@
 
         n = n + 1;
 
-        last = state.start = state.ptr;
+        last = state.ptr;
 
+        // Continue search from where we left off, but reject an initial zero-width match.
+        if (state.reverse) {
+            if (zero_width) {
+                state.end = state.ptr;
+                state.reject_zero_width = 1;
+            } else {
+                if (state.ptr == state.end)
+                    state.end = (void*) ((char*) state.ptr - state.charsize);
+                else
+                    state.end = state.ptr;
+            }
+        } else {
+            if (zero_width) {
+                state.start = state.ptr;
+                state.reject_zero_width = 1;
+            } else {
+                if(state.ptr == state.start)
+                    state.start = (void*) ((char*) state.ptr + state.charsize);
+                else
+                    state.start = state.ptr;
+            }
+        }
     }
 
     /* get segment following last match (even if empty) */
-    item = PySequence_GetSlice(
-        string, STATE_OFFSET(&state, last), state.endpos
-        );
+    if (state.reverse)
+        item = PySequence_GetSlice(
+            string, state.pos, STATE_OFFSET(&state, last)
+            );
+    else
+        item = PySequence_GetSlice(
+            string, STATE_OFFSET(&state, last), state.endpos
+            );
+
     if (!item)
         goto error;
     status = PyList_Append(list, item);
@@ -2275,9 +4276,7 @@
 }
 
 static PyObject*
-pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
-             Py_ssize_t count, Py_ssize_t subn)
-{
+pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, Py_ssize_t count, Py_ssize_t subn) {
     SRE_STATE state;
     PyObject* list;
     PyObject* item;
@@ -2287,9 +4286,11 @@
     void* ptr;
     int status;
     Py_ssize_t n;
-    Py_ssize_t i, b, e;
+    Py_ssize_t b;
     int bint;
     int filter_is_callable;
+    SRE_CODE* pattern_code;
+    void* last;
 
     if (PyCallable_Check(ptemplate)) {
         /* sub/subn takes either a function or a template */
@@ -2303,10 +4304,10 @@
         b = bint;
         if (ptr) {
             if (b == 1) {
-		    literal = sre_literal_template((unsigned char *)ptr, n);
+                literal = sre_bliteral_template((unsigned char*)ptr, n);
             } else {
 #if defined(HAVE_UNICODE)
-		    literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
+                literal = sre_uliteral_template((Py_UNICODE*)ptr, n);
 #endif
             }
         } else {
@@ -2329,7 +4330,9 @@
         }
     }
 
-    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX, pattern_code);
     if (!string) {
         Py_DECREF(filter);
         return NULL;
@@ -2342,24 +4345,25 @@
         return NULL;
     }
 
-    n = i = 0;
+    n = 0;
+    last = state.reverse ? state.end : state.start;
 
     while (!count || n < count) {
 
         state_reset(&state);
 
-        state.ptr = state.start;
+        state.ptr = state.reverse ? state.end : state.start;
 
         if (state.charsize == 1) {
-            status = sre_search(&state, PatternObject_GetCode(self));
+            status = sre_bsearch(&state, state.pattern_code);
         } else {
 #if defined(HAVE_UNICODE)
-            status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state, state.pattern_code);
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2368,22 +4372,24 @@
             goto error;
         }
 
-        b = STATE_OFFSET(&state, state.start);
-        e = STATE_OFFSET(&state, state.ptr);
-
-        if (i < b) {
-            /* get segment before this match */
-            item = PySequence_GetSlice(string, i, b);
-            if (!item)
-                goto error;
-            status = PyList_Append(list, item);
-            Py_DECREF(item);
-            if (status < 0)
-                goto error;
-
-        } else if (i == b && i == e && n > 0)
-            /* ignore empty match on latest position */
-            goto next;
+        /* get segment before this match */
+        if (state.reverse) {
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, state.end),
+                STATE_OFFSET(&state, last)
+                );
+        } else {
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, last),
+                STATE_OFFSET(&state, state.start)
+                );
+        }
+        if (!item)
+            goto error;
+        status = PyList_Append(list, item);
+        Py_DECREF(item);
+        if (status < 0)
+            goto error;
 
         if (filter_is_callable) {
             /* pass match object through filter */
@@ -2414,29 +4420,31 @@
                 goto error;
         }
 
-        i = e;
         n = n + 1;
 
-next:
         /* move on */
-        if (state.ptr == state.start)
-            state.start = (void*) ((char*) state.ptr + state.charsize);
-        else
-            state.start = state.ptr;
-
-    }
+        last = state.ptr;
 
-    /* get segment following last match */
-    if (i < state.endpos) {
-        item = PySequence_GetSlice(string, i, state.endpos);
-        if (!item)
-            goto error;
-        status = PyList_Append(list, item);
-        Py_DECREF(item);
-        if (status < 0)
-            goto error;
+        // Continue search from where we left off, but reject an initial zero-width match.
+        if (state.reverse)
+            state.end = state.ptr;
+        else
+            state.start = state.ptr;
+        state.reject_zero_width = 1;
     }
 
+    /* get segment following last match */
+    if (state.reverse)
+        item = PySequence_GetSlice(string, state.pos, STATE_OFFSET(&state, last));
+    else
+        item = PySequence_GetSlice(string, STATE_OFFSET(&state, last), state.endpos);
+    if (!item)
+        goto error;
+    status = PyList_Append(list, item);
+    Py_DECREF(item);
+    if (status < 0)
+        goto error;
+
     state_fini(&state);
 
     Py_DECREF(filter);
@@ -2460,9 +4468,7 @@
 
 }
 
-static PyObject*
-pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* kw) {
     PyObject* ptemplate;
     PyObject* string;
     Py_ssize_t count = 0;
@@ -2474,9 +4480,7 @@
     return pattern_subx(self, ptemplate, string, count, 0);
 }
 
-static PyObject*
-pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_subn(PatternObject* self, PyObject* args, PyObject* kw) {
     PyObject* ptemplate;
     PyObject* string;
     Py_ssize_t count = 0;
@@ -2488,9 +4492,7 @@
     return pattern_subx(self, ptemplate, string, count, 1);
 }
 
-static PyObject*
-pattern_copy(PatternObject* self, PyObject *unused)
-{
+static PyObject* pattern_copy(PatternObject* self, PyObject *unused) {
 #ifdef USE_BUILTIN_COPY
     PatternObject* copy;
     int offset;
@@ -2516,9 +4518,7 @@
 #endif
 }
 
-static PyObject*
-pattern_deepcopy(PatternObject* self, PyObject* memo)
-{
+static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) {
 #ifdef USE_BUILTIN_COPY
     PatternObject* copy;
 
@@ -2577,20 +4577,20 @@
 
 static PyMethodDef pattern_methods[] = {
     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
-	pattern_match_doc},
+     pattern_match_doc},
     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
-	pattern_search_doc},
+     pattern_search_doc},
     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
-	pattern_sub_doc},
+     pattern_sub_doc},
     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
-	pattern_subn_doc},
+     pattern_subn_doc},
     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
-	pattern_split_doc},
+     pattern_split_doc},
     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
-	pattern_findall_doc},
+     pattern_findall_doc},
 #if PY_VERSION_HEX >= 0x02020000
     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
-	pattern_finditer_doc},
+     pattern_finditer_doc},
 #endif
     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
@@ -2598,9 +4598,7 @@
     {NULL, NULL}
 };
 
-static PyObject*
-pattern_getattr(PatternObject* self, char* name)
-{
+static PyObject* pattern_getattr(PatternObject* self, char* name) {
     PyObject* res;
 
     res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
@@ -2636,33 +4634,31 @@
     0, "_" SRE_MODULE ".SRE_Pattern",
     sizeof(PatternObject), sizeof(SRE_CODE),
     (destructor)pattern_dealloc, /*tp_dealloc*/
-    0, /*tp_print*/
+    0,                  /*tp_print*/
     (getattrfunc)pattern_getattr, /*tp_getattr*/
-    0,					/* tp_setattr */
-    0,					/* tp_compare */
-    0,					/* tp_repr */
-    0,					/* tp_as_number */
-    0,					/* tp_as_sequence */
-    0,					/* tp_as_mapping */
-    0,					/* tp_hash */
-    0,					/* tp_call */
-    0,					/* tp_str */
-    0,					/* tp_getattro */
-    0,					/* tp_setattro */
-    0,					/* tp_as_buffer */
-    Py_TPFLAGS_HAVE_WEAKREFS,		/* tp_flags */
-    pattern_doc,			/* tp_doc */
-    0,					/* tp_traverse */
-    0,					/* tp_clear */
-    0,					/* tp_richcompare */
-    offsetof(PatternObject, weakreflist),	/* tp_weaklistoffset */
+    0,                  /* tp_setattr */
+    0,                  /* tp_compare */
+    0,                  /* tp_repr */
+    0,                  /* tp_as_number */
+    0,                  /* tp_as_sequence */
+    0,                  /* tp_as_mapping */
+    0,                  /* tp_hash */
+    0,                  /* tp_call */
+    0,                  /* tp_str */
+    0,                  /* tp_getattro */
+    0,                  /* tp_setattro */
+    0,                  /* tp_as_buffer */
+    Py_TPFLAGS_HAVE_WEAKREFS,       /* tp_flags */
+    pattern_doc,        /* tp_doc */
+    0,                  /* tp_traverse */
+    0,                  /* tp_clear */
+    0,                  /* tp_richcompare */
+    offsetof(PatternObject, weakreflist),    /* tp_weaklistoffset */
 };
 
 static int _validate(PatternObject *self); /* Forward */
 
-static PyObject *
-_compile(PyObject* self_, PyObject* args)
-{
+static PyObject* _compile(PyObject* self_, PyObject* args) {
     /* "compile" pattern descriptor to pattern object */
 
     PatternObject* self;
@@ -2763,454 +4759,404 @@
 #define VTRACE(v)
 #endif
 
-/* Report failure */
-#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
+typedef struct SRE_Validation {
+    unsigned int numbered_mark_count;
+    unsigned int named_mark_count;
+    unsigned int min_numbered_mark;
+    unsigned int max_numbered_mark;
+    unsigned int min_named_mark;
+    unsigned int max_named_mark;
+    unsigned int group_ref_count;
+    unsigned int max_group_ref;
+} SRE_Validation;
+
+/*
+    Validates a charset. Returns a pointer to the following op if valid
+    or NULL if invalid.
+*/
+static SRE_CODE* validate_charset(SRE_CODE* charset) {
+    int hi_byte = charset[0] / 256;
+    int max_index = 0;
+    int index;
+    SRE_CODE* end_charset;
+    for (index = 0; index <= hi_byte; index ++) {
+        int i = (charset[1 + index / 2] >> ((index % 2) * 16)) & 0xFFFF;
+        if (i > max_index + 1)
+            return NULL;
+        if (i > max_index)
+            max_index = i;
+    }
+    end_charset = charset + 1 + hi_byte / 2 + 1 + (max_index + 1) * (256 / SRE_BITS_PER_CODE);
+    return end_charset;
+}
 
-/* Extract opcode, argument, or skip count from code array */
-#define GET_OP                                          \
-    do {                                                \
-        VTRACE(("%p: ", code));                         \
-        if (code >= end) FAIL;                          \
-        op = *code++;                                   \
-        VTRACE(("%lu (op)\n", (unsigned long)op));      \
-    } while (0)
-#define GET_ARG                                         \
-    do {                                                \
-        VTRACE(("%p= ", code));                         \
-        if (code >= end) FAIL;                          \
-        arg = *code++;                                  \
-        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
-    } while (0)
-#define GET_SKIP_ADJ(adj)                               \
-    do {                                                \
-        VTRACE(("%p= ", code));                         \
-        if (code >= end) FAIL;                          \
-        skip = *code;                                   \
-        VTRACE(("%lu (skip to %p)\n",                   \
-               (unsigned long)skip, code+skip));        \
-        if (code+skip-adj < code || code+skip-adj > end)\
-            FAIL;                                       \
-        code++;                                         \
-    } while (0)
-#define GET_SKIP GET_SKIP_ADJ(0)
+/*
+    Validates a set. Returns a pointer to the following op if valid
+    or NULL if invalid.
+*/
+static SRE_CODE* validate_set(SRE_CODE* pattern, SRE_CODE* end_ptr) {
+    SRE_OpInfo* info_ptr;
+    SRE_CODE* charset_end = pattern + pattern[0];
+    if (pattern[0] < 1 || charset_end > end_ptr)
+        return NULL;
 
-static int
-_validate_charset(SRE_CODE *code, SRE_CODE *end)
-{
-    /* Some variables are manipulated by the macros above */
-    SRE_CODE op;
-    SRE_CODE arg;
-    SRE_CODE offset;
-    int i;
-
-    while (code < end) {
-        GET_OP;
-        switch (op) {
+    pattern++;
 
-        case SRE_OP_NEGATE:
+    do {
+        if (pattern[0] > SRE_MAX_OP)
+            return NULL;
+        DEBUG_TRACE(("op is %u\n", pattern[0]));
+        info_ptr = &op_info[pattern[0]];
+        switch (info_ptr->type) {
+        case SRE_TYPE_CATEGORY: // <category> category
+            pattern += 2;
             break;
-
-        case SRE_OP_LITERAL:
-            GET_ARG;
+        case SRE_TYPE_CHARSET: // <charset> skip charset
+        {
+            SRE_CODE* end_charset = pattern + 1 + pattern[1];
+            if (end_charset > end_ptr)
+                return NULL;
+            pattern = validate_charset(pattern + 2);
+            if (pattern != end_charset)
+                return NULL;
             break;
-
-        case SRE_OP_RANGE:
-            GET_ARG;
-            GET_ARG;
+        }
+        case SRE_TYPE_LITERAL: // <literal> code
+            pattern += 2;
             break;
-
-        case SRE_OP_CHARSET:
-            offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            code += offset;
-            break;
-
-        case SRE_OP_BIGCHARSET:
-            GET_ARG; /* Number of blocks */
-            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            /* Make sure that each byte points to a valid block */
-            for (i = 0; i < 256; i++) {
-                if (((unsigned char *)code)[i] >= arg)
-                    FAIL;
-            }
-            code += offset;
-            offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            code += offset;
+        case SRE_TYPE_RANGE: // <range> min max
+            if (pattern[1] > pattern[2])
+                return NULL;
+            pattern += 3;
             break;
+        default:
+            return NULL;
+        }
+    } while (pattern < charset_end);
 
-        case SRE_OP_CATEGORY:
-            GET_ARG;
-            switch (arg) {
-            case SRE_CATEGORY_DIGIT:
-            case SRE_CATEGORY_NOT_DIGIT:
-            case SRE_CATEGORY_SPACE:
-            case SRE_CATEGORY_NOT_SPACE:
-            case SRE_CATEGORY_WORD:
-            case SRE_CATEGORY_NOT_WORD:
-            case SRE_CATEGORY_LINEBREAK:
-            case SRE_CATEGORY_NOT_LINEBREAK:
-            case SRE_CATEGORY_LOC_WORD:
-            case SRE_CATEGORY_LOC_NOT_WORD:
-            case SRE_CATEGORY_UNI_DIGIT:
-            case SRE_CATEGORY_UNI_NOT_DIGIT:
-            case SRE_CATEGORY_UNI_SPACE:
-            case SRE_CATEGORY_UNI_NOT_SPACE:
-            case SRE_CATEGORY_UNI_WORD:
-            case SRE_CATEGORY_UNI_NOT_WORD:
-            case SRE_CATEGORY_UNI_LINEBREAK:
-            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-                break;
-            default:
-                FAIL;
-            }
-            break;
+    return pattern > charset_end ? NULL : pattern;
+}
 
-        default:
-            FAIL;
+/*
+    Validates a single-character op. Returns a pointer to the following op if valid
+    or NULL if invalid.
+*/
+static SRE_CODE* validate_one_pattern(SRE_CODE* pattern, SRE_CODE* end_ptr, int* direction) {
+    SRE_OpInfo* info_ptr;
 
-        }
+    if (pattern[0] > SRE_MAX_OP)
+        return NULL;
+
+    info_ptr = &op_info[pattern[0]];
+    DEBUG_TRACE(("op is %u\n", pattern[0]));
+    if (*direction != 0 && *direction != info_ptr->direction)
+        return NULL;
+
+    switch (info_ptr->type) {
+    case SRE_TYPE_CATEGORY: // <category> category
+        pattern += 2;
+        break;
+    case SRE_TYPE_CHARSET: // <charset> skip charset
+    {
+        SRE_CODE* end_charset = pattern + 1 + pattern[1];
+        if (end_charset > end_ptr)
+            return NULL;
+        pattern = validate_charset(pattern + 2);
+        if (pattern != end_charset)
+            return NULL;
+        break;
+    }
+    case SRE_TYPE_LITERAL: // <literal> code
+        pattern += 2;
+        break;
+    case SRE_TYPE_RANGE: // <range> min max
+        if (pattern[1] > pattern[2])
+            return NULL;
+        pattern += 3;
+        break;
+    case SRE_TYPE_SET: // <set> set
+        pattern = validate_set(pattern + 1, end_ptr);
+        if (pattern == NULL)
+            return NULL;
+        break;
+    case SRE_TYPE_SIMPLE_CATEGORY: // <category>
+        pattern++;
+        break;
+    default:
+        return NULL;
     }
 
-    return 1;
+    if (pattern > end_ptr)
+        return NULL;
+
+    *direction = info_ptr->direction;
+
+    return pattern;
 }
 
-static int
-_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
-{
-    /* Some variables are manipulated by the macros above */
-    SRE_CODE op;
-    SRE_CODE arg;
-    SRE_CODE skip;
-
-    VTRACE(("code=%p, end=%p\n", code, end));
-
-    if (code > end)
-        FAIL;
-
-    while (code < end) {
-        GET_OP;
-        switch (op) {
+static SRE_CODE* validate_subpattern(SRE_CODE* pattern, SRE_CODE* end_ptr, int* direction, SRE_Validation* validation) {
+    int dir = *direction;
 
-        case SRE_OP_MARK:
-            /* We don't check whether marks are properly nested; the
-               sre_match() code is robust even if they don't, and the worst
-               you can get is nonsensical match results. */
-            GET_ARG;
-            if (arg > 2*groups+1) {
-                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
-                FAIL;
-            }
-            break;
+    while (pattern < end_ptr) {
+        SRE_OpInfo* info_ptr;
 
-        case SRE_OP_LITERAL:
-        case SRE_OP_NOT_LITERAL:
-        case SRE_OP_LITERAL_IGNORE:
-        case SRE_OP_NOT_LITERAL_IGNORE:
-            GET_ARG;
-            /* The arg is just a character, nothing to check */
-            break;
+        DEBUG_TRACE(("op is %u\n", pattern[0]));
+        if (pattern[0] > SRE_MAX_OP)
+            return NULL;
 
-        case SRE_OP_SUCCESS:
-        case SRE_OP_FAILURE:
-            /* Nothing to check; these normally end the matching process */
-            break;
+        info_ptr = &op_info[pattern[0]];
+        if (dir != 0 && info_ptr->direction != 0 && dir != info_ptr->direction)
+            return NULL;
 
-        case SRE_OP_AT:
-            GET_ARG;
-            switch (arg) {
-            case SRE_AT_BEGINNING:
-            case SRE_AT_BEGINNING_STRING:
-            case SRE_AT_BEGINNING_LINE:
-            case SRE_AT_END:
-            case SRE_AT_END_LINE:
-            case SRE_AT_END_STRING:
-            case SRE_AT_BOUNDARY:
-            case SRE_AT_NON_BOUNDARY:
-            case SRE_AT_LOC_BOUNDARY:
-            case SRE_AT_LOC_NON_BOUNDARY:
-            case SRE_AT_UNI_BOUNDARY:
-            case SRE_AT_UNI_NON_BOUNDARY:
-                break;
-            default:
-                FAIL;
-            }
+        switch (info_ptr->type) {
+        case SRE_TYPE_ASSERT: // <assert> <skip to end> ... <end_assert>
+        {
+            SRE_CODE* tail_ptr = pattern + 1 + pattern[1];
+            int subdir = 0;
+            if (pattern[1] < 2 || tail_ptr > end_ptr || tail_ptr[-1] != info_ptr->end_marker)
+                return NULL;
+            if (validate_subpattern(pattern + 2, tail_ptr - 1, &subdir, validation) != tail_ptr - 1)
+                return NULL;
+            pattern = tail_ptr;
             break;
-
-        case SRE_OP_ANY:
-        case SRE_OP_ANY_ALL:
-            /* These have no operands */
+        }
+        case SRE_TYPE_ATOMIC: // <ATOMIC> ... <END_ATOMIC>
+        {
+            // The call should return a pointer to the END_ATOMIC, which it doesn't understand.
+            SRE_CODE* ptr = validate_subpattern(pattern + 1, end_ptr, &dir, validation);
+            if (ptr == NULL || ptr >= end_ptr || ptr[0] != info_ptr->end_marker)
+                return NULL;
+            pattern = ptr + 1;
             break;
-
-        case SRE_OP_IN:
-        case SRE_OP_IN_IGNORE:
-            GET_SKIP;
-            /* Stop 1 before the end; we check the FAILURE below */
-            if (!_validate_charset(code, code+skip-2))
-                FAIL;
-            if (code[skip-2] != SRE_OP_FAILURE)
-                FAIL;
-            code += skip-1;
-            break;
-
-        case SRE_OP_INFO:
-            {
-                /* A minimal info field is
-                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
-                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
-                   more follows. */
-                SRE_CODE flags, min, max, i;
-                SRE_CODE *newcode;
-                GET_SKIP;
-                newcode = code+skip-1;
-                GET_ARG; flags = arg;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                /* Check that only valid flags are present */
-                if ((flags & ~(SRE_INFO_PREFIX |
-                               SRE_INFO_LITERAL |
-                               SRE_INFO_CHARSET)) != 0)
-                    FAIL;
-                /* PREFIX and CHARSET are mutually exclusive */
-                if ((flags & SRE_INFO_PREFIX) &&
-                    (flags & SRE_INFO_CHARSET))
-                    FAIL;
-                /* LITERAL implies PREFIX */
-                if ((flags & SRE_INFO_LITERAL) &&
-                    !(flags & SRE_INFO_PREFIX))
-                    FAIL;
-                /* Validate the prefix */
-                if (flags & SRE_INFO_PREFIX) {
-                    SRE_CODE prefix_len, prefix_skip;
-                    GET_ARG; prefix_len = arg;
-                    GET_ARG; prefix_skip = arg;
-                    /* Here comes the prefix string */
-                    if (code+prefix_len < code || code+prefix_len > newcode)
-                        FAIL;
-                    code += prefix_len;
-                    /* And here comes the overlap table */
-                    if (code+prefix_len < code || code+prefix_len > newcode)
-                        FAIL;
-                    /* Each overlap value should be < prefix_len */
-                    for (i = 0; i < prefix_len; i++) {
-                        if (code[i] >= prefix_len)
-                            FAIL;
-                    }
-                    code += prefix_len;
-                }
-                /* Validate the charset */
-                if (flags & SRE_INFO_CHARSET) {
-                    if (!_validate_charset(code, newcode-1))
-                        FAIL;
-                    if (newcode[-1] != SRE_OP_FAILURE)
-                        FAIL;
-                    code = newcode;
-                }
-                else if (code != newcode) {
-                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
-                    FAIL;
-                }
-            }
+        }
+        case SRE_TYPE_BRANCH: // <BRANCH> <skip to next> ... <JUMP> <skip to end> <skip to next> ... <JUMP> <skip to end> 0
+        {
+            // All the jumps should end in the same place.
+            SRE_CODE* skip_end_ptr = NULL;
+            pattern++;
+            do {
+                SRE_CODE* next_ptr = pattern + pattern[0];
+                SRE_CODE* ptr;
+                // The offset to the next alternative's offset.
+                if (pattern[0] < 3 || next_ptr >= end_ptr)
+                    return NULL;
+                // Validate this alternative, which stops at the jump.
+                ptr = validate_subpattern(pattern + 1, next_ptr - 2, &dir, validation);
+                if (ptr != next_ptr - 2 || ptr[0] != SRE_OP_JUMP || ptr[1] < 1)
+                    return NULL;
+                // The jump to the end.
+                ptr += 1 + ptr[1];
+                if (skip_end_ptr == NULL)
+                    skip_end_ptr = ptr;
+                else if (ptr != skip_end_ptr)
+                    return NULL;
+                pattern = next_ptr;
+            } while (pattern[0] != 0);
+            pattern++;
             break;
-
-        case SRE_OP_BRANCH:
-            {
-                SRE_CODE *target = NULL;
-                for (;;) {
-                    GET_SKIP;
-                    if (skip == 0)
-                        break;
-                    /* Stop 2 before the end; we check the JUMP below */
-                    if (!_validate_inner(code, code+skip-3, groups))
-                        FAIL;
-                    code += skip-3;
-                    /* Check that it ends with a JUMP, and that each JUMP
-                       has the same target */
-                    GET_OP;
-                    if (op != SRE_OP_JUMP)
-                        FAIL;
-                    GET_SKIP;
-                    if (target == NULL)
-                        target = code+skip-1;
-                    else if (code+skip-1 != target)
-                        FAIL;
-                }
-            }
+        }
+        case SRE_TYPE_CATEGORY: // <category> category
+            pattern += 2;
             break;
-
-        case SRE_OP_REPEAT_ONE:
-        case SRE_OP_MIN_REPEAT_ONE:
-            {
-                SRE_CODE min, max;
-                GET_SKIP;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                if (min > max)
-                    FAIL;
-#ifdef Py_UNICODE_WIDE
-                if (max > 65535)
-                    FAIL;
-#endif
-                if (!_validate_inner(code, code+skip-4, groups))
-                    FAIL;
-                code += skip-4;
-                GET_OP;
-                if (op != SRE_OP_SUCCESS)
-                    FAIL;
-            }
+        case SRE_TYPE_CHARSET: // <charset> skip charset
+        {
+            SRE_CODE* end_charset = pattern + 1 + pattern[1];
+            if (end_charset > end_ptr)
+                return NULL;
+            pattern = validate_charset(pattern + 2);
+            if (pattern != end_charset)
+                return NULL;
             break;
-
-        case SRE_OP_REPEAT:
-            {
-                SRE_CODE min, max;
-                GET_SKIP;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                if (min > max)
-                    FAIL;
-#ifdef Py_UNICODE_WIDE
-                if (max > 65535)
-                    FAIL;
-#endif
-                if (!_validate_inner(code, code+skip-3, groups))
-                    FAIL;
-                code += skip-3;
-                GET_OP;
-                if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
-                    FAIL;
-            }
+        }
+        case SRE_TYPE_GROUPREF: // <groupref> group_id
+            DEBUG_TRACE(("GROUPREF %u\n", pattern[1]));
+            validation->group_ref_count++;
+            validation->max_group_ref = sre_max(validation->max_group_ref, pattern[1]);
+            pattern += 2;
+            DEBUG_TRACE(("group_ref_count is %u\n", validation->group_ref_count));
+            DEBUG_TRACE(("max_group_ref is %u\n", validation->max_group_ref));
             break;
-
-        case SRE_OP_GROUPREF:
-        case SRE_OP_GROUPREF_IGNORE:
-            GET_ARG;
-            if (arg >= groups)
-                FAIL;
+        case SRE_TYPE_GROUPREF_EXISTS: // <GROUPREF_EXISTS> group_id <skip to code_no> code_yes <JUMP> <skip to end> code_no
+        {
+            SRE_CODE* skip_ptr = pattern + 1 + pattern[2];
+            SRE_CODE* ptr;
+            // Locate code_no.
+            if (pattern[2] < 2 || skip_ptr > end_ptr)
+                return NULL;
+            // code_yes lies between the skip and code_no.
+            ptr = validate_subpattern(pattern + 3, skip_ptr, &dir, validation);
+            // 'ptr' will point after code_yes and at the jump, if present.
+            // (The jump will have been rejected by the call.)
+            // Validate code_no, if present.
+            if (ptr == skip_ptr - 2) {
+                if (ptr[0] != SRE_OP_JUMP || ptr[1] < 1)
+                    return NULL;
+                skip_ptr = ptr + 1 + ptr[1];
+                if (skip_ptr > end_ptr)
+                    return NULL;
+                ptr = validate_subpattern(ptr + 2, skip_ptr, &dir, validation);
+                if (ptr < skip_ptr)
+                    return NULL;
+            } else if (ptr != skip_ptr)
+                return NULL;
+            validation->group_ref_count++;
+            validation->max_group_ref = sre_max(validation->max_group_ref, pattern[1]);
+            pattern = skip_ptr;
             break;
-
-        case SRE_OP_GROUPREF_EXISTS:
-            /* The regex syntax for this is: '(?(group)then|else)', where
-               'group' is either an integer group number or a group name,
-               'then' and 'else' are sub-regexes, and 'else' is optional. */
-            GET_ARG;
-            if (arg >= groups)
-                FAIL;
-            GET_SKIP_ADJ(1);
-            code--; /* The skip is relative to the first arg! */
-            /* There are two possibilities here: if there is both a 'then'
-               part and an 'else' part, the generated code looks like:
-
-               GROUPREF_EXISTS
-               <group>
-               <skipyes>
-               ...then part...
-               JUMP
-               <skipno>
-               (<skipyes> jumps here)
-               ...else part...
-               (<skipno> jumps here)
-
-               If there is only a 'then' part, it looks like:
-
-               GROUPREF_EXISTS
-               <group>
-               <skip>
-               ...then part...
-               (<skip> jumps here)
-
-               There is no direct way to decide which it is, and we don't want
-               to allow arbitrary jumps anywhere in the code; so we just look
-               for a JUMP opcode preceding our skip target.
-            */
-            if (skip >= 3 && code+skip-3 >= code &&
-                code[skip-3] == SRE_OP_JUMP)
-            {
-                VTRACE(("both then and else parts present\n"));
-                if (!_validate_inner(code+1, code+skip-3, groups))
-                    FAIL;
-                code += skip-2; /* Position after JUMP, at <skipno> */
-                GET_SKIP;
-                if (!_validate_inner(code, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
-            }
-            else {
-                VTRACE(("only a then part present\n"));
-                if (!_validate_inner(code+1, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
+        }
+        case SRE_TYPE_LITERAL: // <literal> code
+            pattern += 2;
+            break;
+        case SRE_TYPE_LITERAL_STRING: // <literal_string> length ...
+            if (pattern[1] == 0)
+                return NULL;
+            pattern += 2 + pattern[1];
+            break;
+        case SRE_TYPE_MARK: // <MARK> <numbered_id> <named_id>
+            // All named marks are also numbered.
+            // The named marks all have higher ids than the numbered ones.
+            DEBUG_TRACE(("mark %u %u at 0x%p\n", pattern[1], pattern[2], pattern));
+            if (pattern[1] > pattern[2])
+                return NULL;
+            validation->numbered_mark_count++;
+            validation->min_numbered_mark = sre_min(validation->min_numbered_mark, pattern[1]);
+            validation->max_numbered_mark = sre_max(validation->max_numbered_mark, pattern[1]);
+            if (pattern[2] > pattern[1]) {
+                validation->named_mark_count++;
+                validation->min_named_mark = sre_min(validation->min_named_mark, pattern[2]);
+                validation->max_named_mark = sre_max(validation->max_named_mark, pattern[2]);
             }
+            pattern += 3;
+            DEBUG_TRACE(("numbered_mark_count is %u\n", validation->numbered_mark_count));
+            DEBUG_TRACE(("min_numbered_mark is %u\n", validation->min_numbered_mark));
+            DEBUG_TRACE(("max_numbered_mark is %u\n", validation->max_numbered_mark));
+            DEBUG_TRACE(("named_mark_count is %u\n", validation->named_mark_count));
+            DEBUG_TRACE(("min_named_mark is %u\n", validation->min_named_mark));
+            DEBUG_TRACE(("max_named_mark is %u\n", validation->max_named_mark));
             break;
-
-        case SRE_OP_ASSERT:
-        case SRE_OP_ASSERT_NOT:
-            GET_SKIP;
-            GET_ARG; /* 0 for lookahead, width for lookbehind */
-            code--; /* Back up over arg to simplify math below */
-            if (arg & 0x80000000)
-                FAIL; /* Width too large */
-            /* Stop 1 before the end; we check the SUCCESS below */
-            if (!_validate_inner(code+1, code+skip-2, groups))
-                FAIL;
-            code += skip-2;
-            GET_OP;
-            if (op != SRE_OP_SUCCESS)
-                FAIL;
+        case SRE_TYPE_POSITION: // <position>
+            pattern++;
+            break;
+        case SRE_TYPE_RANGE: // <range> min max
+            if (pattern[1] > pattern[2])
+                return NULL;
+            pattern += 3;
+            break;
+        case SRE_TYPE_REPEAT: // <repeat> <skip to end> <min> <max> ... <end_repeat> <skip to start>
+        {
+            SRE_CODE* skip_end_ptr;
+            if (pattern[1] < 4 || pattern[2] > pattern[3])
+                return NULL;
+            skip_end_ptr = pattern + pattern[1];
+            if (skip_end_ptr + 2 > end_ptr || skip_end_ptr[0] != info_ptr->end_marker || skip_end_ptr[1] != pattern[1])
+                return NULL;
+            if (validate_subpattern(pattern + 4, skip_end_ptr, &dir, validation) != skip_end_ptr)
+                return NULL;
+            pattern = skip_end_ptr + 2;
+            break;
+        }
+        case SRE_TYPE_REPEAT_ONE: // <repeat_one> <skip to end> <min> <max> ...
+        {
+            SRE_CODE* tail_ptr;
+            DEBUG_TRACE(("skip is %u, min is %u, max is %u\n", pattern[1], pattern[2], pattern[3]));
+            if (pattern[1] < 4 || pattern[2] > pattern[3])
+                return NULL;
+            tail_ptr = pattern + 1 + pattern[1];
+            if (tail_ptr > end_ptr)
+                return NULL;
+            if (validate_one_pattern(pattern + 4, tail_ptr, &dir) != tail_ptr)
+                return NULL;
+            pattern = tail_ptr;
+            break;
+        }
+        case SRE_TYPE_SET: // <set> set
+            pattern = validate_set(pattern + 1, end_ptr);
+            if (pattern == NULL)
+                return NULL;
+            break;
+        case SRE_TYPE_SIMPLE_CATEGORY: // <category>
+            pattern++;
             break;
-
         default:
-            FAIL;
-
+            // Anything else might be meaningful to the caller.
+            *direction = dir;
+            return pattern;
         }
+
+        if (info_ptr->direction != 0)
+            dir = info_ptr->direction;
     }
 
-    VTRACE(("okay\n"));
-    return 1;
-}
+    *direction = dir;
 
-static int
-_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
-{
-    if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
-        FAIL;
-    if (groups == 0)  /* fix for simplejson */
-        groups = 100; /* 100 groups should always be safe */
-    return _validate_inner(code, end-1, groups);
+    return pattern > end_ptr ? NULL : pattern;
 }
 
-static int
-_validate(PatternObject *self)
-{
-    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
-    {
-        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
-        return 0;
-    }
-    else
-        VTRACE(("Success!\n"));
+static int _validate(PatternObject* self) {
+    SRE_Validation validation;
+    int direction = 0;
+    SRE_CODE* end_ptr = self->code + self->codesize;
+
+    validation.numbered_mark_count = 0;
+    validation.min_numbered_mark = ~(unsigned int)0;
+    validation.max_numbered_mark = 0;
+    validation.named_mark_count = 0;
+    validation.min_named_mark = ~(unsigned int)0;
+    validation.max_named_mark = 0;
+    validation.group_ref_count = 0;
+    validation.max_group_ref = 0;
+
+    /* _validate_subpattern will return a pointer to the first op it doesn't understand
+       or NULL if the pattern is invalid. It doesn't understand SRE_OP_SUCCESS (which
+       occurs only at the end of the pattern), so the result should be a pointer to that. */
+    if (self->codesize < 1 || end_ptr[-1] != SRE_OP_SUCCESS)
+        goto error;
+    if (validate_subpattern(self->code, end_ptr, &direction, &validation) != end_ptr - 1)
+        goto error;
+
+    // There should be an even number of marks (start and end of a group).
+    if (validation.numbered_mark_count % 2 != 0 || validation.named_mark_count % 2 != 0)
+        goto error;
+    // The numbered marks should be in the range 0 .. numbered_mark_count - 1.
+    // (We're not checking for duplicates.)
+    if (validation.numbered_mark_count > 0 && (validation.min_numbered_mark != 0 ||
+        validation.min_numbered_mark + validation.numbered_mark_count - 1 != validation.max_numbered_mark))
+        goto error;
+    // All the named marks should be in the range numbered_mark_count .. numbered_mark_count + named_mark_count - 1.
+    // (We're not checking for duplicates.)
+    // We can guarantee that named_mark_count <= numbered_mark_count.
+    if (validation.named_mark_count > 0 && (validation.min_named_mark != validation.numbered_mark_count ||
+        validation.min_named_mark + validation.named_mark_count - 1 != validation.max_named_mark))
+        goto error;
+    // All the group refs should be in the range 0 .. numbered_mark_count + named_mark_count - 1.
+    if (validation.group_ref_count > 0 && validation.max_group_ref * 2 >= validation.numbered_mark_count + validation.named_mark_count)
+        goto error;
+
+    self->groups = validation.numbered_mark_count / 2;
+    self->internal_groups = (validation.numbered_mark_count + validation.named_mark_count) / 2;
+
+    VTRACE(("Success!\n"));
     return 1;
+
+error:
+    PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
+    return 0;
 }
 
 /* -------------------------------------------------------------------- */
 /* match methods */
 
-static void
-match_dealloc(MatchObject* self)
-{
+static void match_dealloc(MatchObject* self) {
     Py_XDECREF(self->regs);
     Py_XDECREF(self->string);
     Py_DECREF(self->pattern);
     PyObject_DEL(self);
 }
 
-static PyObject*
-match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
-{
-    if (index < 0 || index >= self->groups) {
+static PyObject* match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def, BOOL include_internal) {
+    int groups = include_internal ? self->internal_groups : self->groups;
+    DEBUG_TRACE(("match_getslice_by_index: include_internal is %d, internal_groups is %d, groups is %d, index is %d\n", include_internal, self->internal_groups, self->groups, index));
+    if (index < 0 || index >= groups) {
         /* raise IndexError if we were given a bad group number */
         PyErr_SetString(
             PyExc_IndexError,
@@ -3228,17 +5174,19 @@
     }
 
     return PySequence_GetSlice(
-        self->string, self->mark[index], self->mark[index+1]
+        self->string, self->mark[index], self->mark[index + 1]
         );
 }
 
-static Py_ssize_t
-match_getindex(MatchObject* self, PyObject* index)
-{
+static Py_ssize_t match_getindex(MatchObject* self, PyObject* index, BOOL include_internal) {
     Py_ssize_t i;
 
     if (PyInt_Check(index))
-        return PyInt_AsSsize_t(index);
+    {
+        Py_ssize_t groups = include_internal ? self->internal_groups : self->groups;
+        i = PyInt_AsSsize_t(index);
+        return i > groups ? -1 : i;
+    }
 
     i = -1;
 
@@ -3255,15 +5203,11 @@
     return i;
 }
 
-static PyObject*
-match_getslice(MatchObject* self, PyObject* index, PyObject* def)
-{
-    return match_getslice_by_index(self, match_getindex(self, index), def);
+static PyObject* match_getslice(MatchObject* self, PyObject* index, PyObject* def, BOOL include_internal) {
+    return match_getslice_by_index(self, match_getindex(self, index, include_internal), def, TRUE);
 }
 
-static PyObject*
-match_expand(MatchObject* self, PyObject* ptemplate)
-{
+static PyObject* match_expand(MatchObject* self, PyObject* ptemplate) {
     /* delegate to Python code */
     return call(
         SRE_PY_MODULE, "_expand",
@@ -3271,9 +5215,7 @@
         );
 }
 
-static PyObject*
-match_group(MatchObject* self, PyObject* args)
-{
+static PyObject* sre_get_match_group(MatchObject* self, PyObject* args, BOOL include_internal) {
     PyObject* result;
     Py_ssize_t i, size;
 
@@ -3281,10 +5223,10 @@
 
     switch (size) {
     case 0:
-        result = match_getslice(self, Py_False, Py_None);
+        result = match_getslice(self, Py_False, Py_None, include_internal);
         break;
     case 1:
-        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
+        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None, include_internal);
         break;
     default:
         /* fetch multiple items */
@@ -3293,7 +5235,7 @@
             return NULL;
         for (i = 0; i < size; i++) {
             PyObject* item = match_getslice(
-                self, PyTuple_GET_ITEM(args, i), Py_None
+                self, PyTuple_GET_ITEM(args, i), Py_None, include_internal
                 );
             if (!item) {
                 Py_DECREF(result);
@@ -3306,9 +5248,22 @@
     return result;
 }
 
-static PyObject*
-match_groups(MatchObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* match_group(MatchObject* self, PyObject* args) {
+    return sre_get_match_group(self, args, FALSE);
+}
+
+static PyObject* match_internal_group(MatchObject* self, PyObject* args) {
+    return sre_get_match_group(self, args, TRUE);
+}
+
+static PyObject* match_subscript(MatchObject* self, register PyObject* group) {
+    if (PyTuple_GET_SIZE(group) != 1)
+        return NULL;
+
+    return match_getslice(self, PyTuple_GET_ITEM(group, 0), Py_None, FALSE);
+}
+
+static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* kw) {
     PyObject* result;
     Py_ssize_t index;
 
@@ -3317,26 +5272,24 @@
     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
         return NULL;
 
-    result = PyTuple_New(self->groups-1);
+    result = PyTuple_New(self->groups - 1);
     if (!result)
         return NULL;
 
     for (index = 1; index < self->groups; index++) {
         PyObject* item;
-        item = match_getslice_by_index(self, index, def);
+        item = match_getslice_by_index(self, index, def, FALSE);
         if (!item) {
             Py_DECREF(result);
             return NULL;
         }
-        PyTuple_SET_ITEM(result, index-1, item);
+        PyTuple_SET_ITEM(result, index - 1, item);
     }
 
     return result;
 }
 
-static PyObject*
-match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject* kw) {
     PyObject* result;
     PyObject* keys;
     Py_ssize_t index;
@@ -3361,7 +5314,7 @@
         key = PyList_GET_ITEM(keys, index);
         if (!key)
             goto failed;
-        value = match_getslice(self, key, def);
+        value = match_getslice(self, key, def, FALSE);
         if (!value) {
             Py_DECREF(key);
             goto failed;
@@ -3382,18 +5335,16 @@
     return NULL;
 }
 
-static PyObject*
-match_start(MatchObject* self, PyObject* args)
-{
+static PyObject* match_start(MatchObject* self, PyObject* args) {
     Py_ssize_t index;
 
     PyObject* index_ = Py_False; /* zero */
     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
         return NULL;
 
-    index = match_getindex(self, index_);
+    index = match_getindex(self, index_, FALSE);
 
-    if (index < 0 || index >= self->groups) {
+    if (index < 0 || index >= self->internal_groups) {
         PyErr_SetString(
             PyExc_IndexError,
             "no such group"
@@ -3402,21 +5353,19 @@
     }
 
     /* mark is -1 if group is undefined */
-    return Py_BuildValue("i", self->mark[index*2]);
+    return Py_BuildValue("i", self->mark[index * 2]);
 }
 
-static PyObject*
-match_end(MatchObject* self, PyObject* args)
-{
+static PyObject* match_end(MatchObject* self, PyObject* args) {
     Py_ssize_t index;
 
     PyObject* index_ = Py_False; /* zero */
     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
         return NULL;
 
-    index = match_getindex(self, index_);
+    index = match_getindex(self, index_, FALSE);
 
-    if (index < 0 || index >= self->groups) {
+    if (index < 0 || index >= self->internal_groups) {
         PyErr_SetString(
             PyExc_IndexError,
             "no such group"
@@ -3425,12 +5374,10 @@
     }
 
     /* mark is -1 if group is undefined */
-    return Py_BuildValue("i", self->mark[index*2+1]);
+    return Py_BuildValue("i", self->mark[index * 2 + 1]);
 }
 
-LOCAL(PyObject*)
-_pair(Py_ssize_t i1, Py_ssize_t i2)
-{
+LOCAL(PyObject*) _pair(Py_ssize_t i1, Py_ssize_t i2) {
     PyObject* pair;
     PyObject* item;
 
@@ -3450,23 +5397,21 @@
 
     return pair;
 
-  error:
+error:
     Py_DECREF(pair);
     return NULL;
 }
 
-static PyObject*
-match_span(MatchObject* self, PyObject* args)
-{
+static PyObject* match_span(MatchObject* self, PyObject* args) {
     Py_ssize_t index;
 
     PyObject* index_ = Py_False; /* zero */
     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
         return NULL;
 
-    index = match_getindex(self, index_);
+    index = match_getindex(self, index_, FALSE);
 
-    if (index < 0 || index >= self->groups) {
+    if (index < 0 || index >= self->internal_groups) {
         PyErr_SetString(
             PyExc_IndexError,
             "no such group"
@@ -3475,12 +5420,10 @@
     }
 
     /* marks are -1 if group is undefined */
-    return _pair(self->mark[index*2], self->mark[index*2+1]);
+    return _pair(self->mark[index * 2], self->mark[index * 2 + 1]);
 }
 
-static PyObject*
-match_regs(MatchObject* self)
-{
+static PyObject* match_regs(MatchObject* self) {
     PyObject* regs;
     PyObject* item;
     Py_ssize_t index;
@@ -3490,7 +5433,7 @@
         return NULL;
 
     for (index = 0; index < self->groups; index++) {
-        item = _pair(self->mark[index*2], self->mark[index*2+1]);
+        item = _pair(self->mark[index * 2], self->mark[index * 2 + 1]);
         if (!item) {
             Py_DECREF(regs);
             return NULL;
@@ -3504,14 +5447,12 @@
     return regs;
 }
 
-static PyObject*
-match_copy(MatchObject* self, PyObject *unused)
-{
+static PyObject* match_copy(MatchObject* self, PyObject* unused) {
 #ifdef USE_BUILTIN_COPY
     MatchObject* copy;
     Py_ssize_t slots, offset;
 
-    slots = 2 * (self->pattern->groups+1);
+    slots = 2 * (self->pattern->groups + 1);
 
     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
     if (!copy)
@@ -3525,23 +5466,21 @@
     Py_XINCREF(self->string);
     Py_XINCREF(self->regs);
 
-    memcpy((char*) copy + offset, (char*) self + offset,
+    memcpy((char*)copy + offset, (char*)self + offset,
            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
 
-    return (PyObject*) copy;
+    return (PyObject*)copy;
 #else
     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
     return NULL;
 #endif
 }
 
-static PyObject*
-match_deepcopy(MatchObject* self, PyObject* memo)
-{
+static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) {
 #ifdef USE_BUILTIN_COPY
     MatchObject* copy;
 
-    copy = (MatchObject*) match_copy(self);
+    copy = (MatchObject*)match_copy(self);
     if (!copy)
         return NULL;
 
@@ -3568,15 +5507,15 @@
     {"expand", (PyCFunction) match_expand, METH_O},
     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
+    {"__getitem__", (PyCFunction) match_subscript, METH_O|METH_COEXIST},
+    {"_internal_group", (PyCFunction) match_internal_group, METH_VARARGS},
     {NULL, NULL}
 };
 
-static PyObject*
-match_getattr(MatchObject* self, char* name)
-{
+static PyObject* match_getattr(MatchObject* self, char* name) {
     PyObject* res;
 
-    res = Py_FindMethod(match_methods, (PyObject*) self, name);
+    res = Py_FindMethod(match_methods, (PyObject*)self, name);
     if (res)
         return res;
 
@@ -3590,9 +5529,9 @@
     }
 
     if (!strcmp(name, "lastgroup")) {
-        if (self->pattern->indexgroup && self->lastindex >= 0) {
+        if (self->pattern->indexgroup && self->last_named_index >= 0) {
             PyObject* result = PySequence_GetItem(
-                self->pattern->indexgroup, self->lastindex
+                self->pattern->indexgroup, self->last_named_index
                 );
             if (result)
                 return result;
@@ -3642,27 +5581,43 @@
     PyObject_HEAD_INIT(NULL)
     0, "_" SRE_MODULE ".SRE_Match",
     sizeof(MatchObject), sizeof(Py_ssize_t),
-    (destructor)match_dealloc, /*tp_dealloc*/
-    0, /*tp_print*/
-    (getattrfunc)match_getattr /*tp_getattr*/
+    (destructor)match_dealloc,  /*tp_dealloc*/
+    0,                  /*tp_print*/
+    (getattrfunc)match_getattr, /*tp_getattr*/
+    0,                  /* tp_setattr */
+    0,                  /* tp_compare */
+    0,                  /* tp_repr */
+    0,                  /* tp_as_number */
+    0,                  /* tp_as_sequence */
+    0,                  /* tp_as_mapping */
+    0,                  /* tp_hash */
+    0,                  /* tp_call */
+    0,                  /* tp_str */
+    0,                  /* tp_getattro */
+    0,                  /* tp_setattro */
+    0,                  /* tp_as_buffer */
+    Py_TPFLAGS_HAVE_INDEX,  /* tp_flags */
+    0,                  /* tp_doc */
+    0,                  /* tp_traverse */
+    0,                  /* tp_clear */
+    0,                  /* tp_richcompare */
+    0,                  /* tp_weaklistoffset */
+    0,                  /* tp_iter */
+    0,                  /* tp_iternext */
+    match_methods,      /* tp_methods */
 };
 
-static PyObject*
-pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
-{
+static PyObject* pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status) {
     /* create match object (from state object) */
-
-    MatchObject* match;
-    Py_ssize_t i, j;
-    char* base;
-    int n;
-
     if (status > 0) {
+        MatchObject* match;
+        char* base = (char*) state->beginning;
+        Py_ssize_t mark_index;
+        int charsize = state->charsize;
 
         /* create match object (with room for extra group marks) */
         /* coverity[ampersand_in_size] */
-        match = PyObject_NEW_VAR(MatchObject, &Match_Type,
-                                 2*(pattern->groups+1));
+        match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2 * (pattern->internal_groups + 1));
         if (!match)
             return NULL;
 
@@ -3673,36 +5628,37 @@
         match->string = state->string;
 
         match->regs = NULL;
-        match->groups = pattern->groups+1;
+        match->groups = pattern->groups + 1;
+        match->internal_groups = pattern->internal_groups + 1;
 
         /* fill in group slices */
+        if (state->reverse) {
+            match->mark[0] = ((char*) state->ptr - base) / charsize;
+            match->mark[1] = ((char*) state->end - base) / charsize;
+        } else {
+            match->mark[0] = ((char*) state->start - base) / charsize;
+            match->mark[1] = ((char*) state->ptr - base) / charsize;
+        }
 
-        base = (char*) state->beginning;
-        n = state->charsize;
-
-        match->mark[0] = ((char*) state->start - base) / n;
-        match->mark[1] = ((char*) state->ptr - base) / n;
-
-        for (i = j = 0; i < pattern->groups; i++, j+=2)
-            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
-                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
-                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
+        for (mark_index = 0; mark_index < pattern->internal_groups * 2; mark_index += 2) {
+            if (state->mark[mark_index] != NULL && state->mark[mark_index] <= state->mark[mark_index + 1]) {
+                match->mark[mark_index + 2] = ((char*) state->mark[mark_index] - base) / charsize;
+                match->mark[mark_index + 3] = ((char*) state->mark[mark_index + 1] - base) / charsize;
             } else
-                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
+                match->mark[mark_index + 2] = match->mark[mark_index + 3] = -1; /* unmatched */
+        }
 
         match->pos = state->pos;
         match->endpos = state->endpos;
 
         match->lastindex = state->lastindex;
+        match->last_named_index = state->last_named_index;
 
-        return (PyObject*) match;
-
+        return (PyObject*)match;
     } else if (status == 0) {
-
         /* no match */
         Py_INCREF(Py_None);
         return Py_None;
-
     }
 
     /* internal error */
@@ -3714,30 +5670,27 @@
 /* -------------------------------------------------------------------- */
 /* scanner methods (experimental) */
 
-static void
-scanner_dealloc(ScannerObject* self)
-{
+static void scanner_dealloc(ScannerObject* self) {
     state_fini(&self->state);
     Py_DECREF(self->pattern);
     PyObject_DEL(self);
 }
 
-static PyObject*
-scanner_match(ScannerObject* self, PyObject *unused)
-{
+static PyObject* scanner_match(ScannerObject* self, PyObject* unused) {
     SRE_STATE* state = &self->state;
     PyObject* match;
     int status;
 
     state_reset(state);
 
-    state->ptr = state->start;
+    state->ptr = state->reverse ? state->end : state->start;
+    memset(state->mark, 0, state->pattern_code[0] * sizeof(SRE_CHAR*));
 
     if (state->charsize == 1) {
-        status = sre_match(state, PatternObject_GetCode(self->pattern));
+        status = sre_bmatch(state, state->pattern_code);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_umatch(state, PatternObject_GetCode(self->pattern));
+        status = sre_umatch(state, state->pattern_code);
 #endif
     }
     if (PyErr_Occurred())
@@ -3746,31 +5699,36 @@
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
-        state->start = (void*) ((char*) state->ptr + state->charsize);
-    else
-        state->start = state->ptr;
+    if (state->reverse) {
+        if (status == 0 || state->ptr == state->end)
+            state->end = (void*) ((char*) state->ptr - state->charsize);
+        else
+            state->end = state->ptr;
+    } else {
+        if (status == 0 || state->ptr == state->start)
+            state->start = (void*) ((char*) state->ptr + state->charsize);
+        else
+            state->start = state->ptr;
+    }
 
     return match;
 }
 
 
-static PyObject*
-scanner_search(ScannerObject* self, PyObject *unused)
-{
+static PyObject* scanner_search(ScannerObject* self, PyObject* unused) {
     SRE_STATE* state = &self->state;
     PyObject* match;
     int status;
 
     state_reset(state);
 
-    state->ptr = state->start;
+    state->ptr = state->reverse ? state->end : state->start;
 
     if (state->charsize == 1) {
-        status = sre_search(state, PatternObject_GetCode(self->pattern));
+        status = sre_bsearch(state, state->pattern_code);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_usearch(state, PatternObject_GetCode(self->pattern));
+        status = sre_usearch(state, state->pattern_code);
 #endif
     }
     if (PyErr_Occurred())
@@ -3779,10 +5737,17 @@
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
-        state->start = (void*) ((char*) state->ptr + state->charsize);
-    else
-        state->start = state->ptr;
+    if (state->reverse) {
+        if (status == 0 || state->ptr == state->end)
+            state->end = (void*) ((char*) state->ptr - state->charsize);
+        else
+            state->end = state->ptr;
+    } else {
+        if (status == 0 || state->ptr == state->start)
+            state->start = (void*) ((char*) state->ptr + state->charsize);
+        else
+            state->start = state->ptr;
+    }
 
     return match;
 }
@@ -3793,12 +5758,10 @@
     {NULL, NULL}
 };
 
-static PyObject*
-scanner_getattr(ScannerObject* self, char* name)
-{
+static PyObject* scanner_getattr(ScannerObject* self, char* name) {
     PyObject* res;
 
-    res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
+    res = Py_FindMethod(scanner_methods, (PyObject*)self, name);
     if (res)
         return res;
 
@@ -3823,9 +5786,7 @@
     (getattrfunc)scanner_getattr, /*tp_getattr*/
 };
 
-static PyObject*
-pattern_scanner(PatternObject* pattern, PyObject* args)
-{
+static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args) {
     /* create search state object */
 
     ScannerObject* self;
@@ -3833,6 +5794,8 @@
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
+    SRE_CODE* pattern_code;
+
     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
         return NULL;
 
@@ -3841,22 +5804,26 @@
     if (!self)
         return NULL;
 
-    string = state_init(&self->state, pattern, string, start, end);
+    pattern_code = PatternObject_GetCode(pattern);
+
+    string = state_init(&self->state, pattern, string, start, end, pattern_code);
     if (!string) {
         PyObject_DEL(self);
         return NULL;
     }
 
     Py_INCREF(pattern);
-    self->pattern = (PyObject*) pattern;
+    self->pattern = (PyObject*)pattern;
 
-    return (PyObject*) self;
+    return (PyObject*)self;
 }
 
 static PyMethodDef _functions[] = {
     {"compile", _compile, METH_VARARGS},
     {"getcodesize", sre_codesize, METH_NOARGS},
     {"getlower", sre_getlower, METH_VARARGS},
+    {"getupper", sre_getupper, METH_VARARGS},
+    {"gettitle", sre_gettitle, METH_VARARGS},
     {NULL, NULL}
 };
 
@@ -3876,7 +5843,7 @@
 
     m = Py_InitModule("_" SRE_MODULE, _functions);
     if (m == NULL)
-    	return;
+        return;
     d = PyModule_GetDict(m);
 
     x = PyInt_FromLong(SRE_MAGIC);