=== modified file Lib/sre_constants.py
--- Lib/sre_constants.py 2004-08-25 02:22:30 +0000
+++ Lib/sre_constants.py 2009-03-04 15:26:20 +0000
@@ -13,11 +13,22 @@
 
 # update when constants are added or removed
 
-MAGIC = 20031017
-
-# max code word in this release
-
-MAXREPEAT = 65535
+MAGIC = 20081218
+
+import operator
+import unicodedata
+from collections import defaultdict
+
+# size of code word in this release
+BYTES_PER_CODE = 4
+BITS_PER_CODE = 8 * BYTES_PER_CODE
+MAXCODE = (1 << BITS_PER_CODE) - 1
+
+MAXREPEAT = MAXCODE
+
+DIGITS = set("0123456789")
+OCTDIGITS = set("01234567")
+HEXDIGITS = set("0123456789abcdefABCDEF")
 
 # SRE standard exception (access as sre.error)
 # should this really be here?
@@ -25,181 +36,175 @@
 class error(Exception):
     pass
 
-# operators
-
-FAILURE = "failure"
-SUCCESS = "success"
-
-ANY = "any"
-ANY_ALL = "any_all"
-ASSERT = "assert"
-ASSERT_NOT = "assert_not"
-AT = "at"
-BIGCHARSET = "bigcharset"
-BRANCH = "branch"
-CALL = "call"
-CATEGORY = "category"
-CHARSET = "charset"
-GROUPREF = "groupref"
-GROUPREF_IGNORE = "groupref_ignore"
-GROUPREF_EXISTS = "groupref_exists"
-IN = "in"
-IN_IGNORE = "in_ignore"
-INFO = "info"
-JUMP = "jump"
-LITERAL = "literal"
-LITERAL_IGNORE = "literal_ignore"
-MARK = "mark"
-MAX_REPEAT = "max_repeat"
-MAX_UNTIL = "max_until"
-MIN_REPEAT = "min_repeat"
-MIN_UNTIL = "min_until"
-NEGATE = "negate"
-NOT_LITERAL = "not_literal"
-NOT_LITERAL_IGNORE = "not_literal_ignore"
-RANGE = "range"
-REPEAT = "repeat"
-REPEAT_ONE = "repeat_one"
-SUBPATTERN = "subpattern"
-MIN_REPEAT_ONE = "min_repeat_one"
-
-# positions
-AT_BEGINNING = "at_beginning"
-AT_BEGINNING_LINE = "at_beginning_line"
-AT_BEGINNING_STRING = "at_beginning_string"
-AT_BOUNDARY = "at_boundary"
-AT_NON_BOUNDARY = "at_non_boundary"
-AT_END = "at_end"
-AT_END_LINE = "at_end_line"
-AT_END_STRING = "at_end_string"
-AT_LOC_BOUNDARY = "at_loc_boundary"
-AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
-AT_UNI_BOUNDARY = "at_uni_boundary"
-AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
-
-# categories
-CATEGORY_DIGIT = "category_digit"
-CATEGORY_NOT_DIGIT = "category_not_digit"
-CATEGORY_SPACE = "category_space"
-CATEGORY_NOT_SPACE = "category_not_space"
-CATEGORY_WORD = "category_word"
-CATEGORY_NOT_WORD = "category_not_word"
-CATEGORY_LINEBREAK = "category_linebreak"
-CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
-CATEGORY_LOC_WORD = "category_loc_word"
-CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
-CATEGORY_UNI_DIGIT = "category_uni_digit"
-CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
-CATEGORY_UNI_SPACE = "category_uni_space"
-CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
-CATEGORY_UNI_WORD = "category_uni_word"
-CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
-CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
-CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
-
-OPCODES = [
-
-    # failure=0 success=1 (just because it looks better that way :-)
-    FAILURE, SUCCESS,
-
-    ANY, ANY_ALL,
-    ASSERT, ASSERT_NOT,
-    AT,
-    BRANCH,
-    CALL,
-    CATEGORY,
-    CHARSET, BIGCHARSET,
-    GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
-    IN, IN_IGNORE,
-    INFO,
-    JUMP,
-    LITERAL, LITERAL_IGNORE,
-    MARK,
-    MAX_UNTIL,
-    MIN_UNTIL,
-    NOT_LITERAL, NOT_LITERAL_IGNORE,
-    NEGATE,
-    RANGE,
-    REPEAT,
-    REPEAT_ONE,
-    SUBPATTERN,
-    MIN_REPEAT_ONE
-
-]
-
-ATCODES = [
-    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
-    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
-    AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
-    AT_UNI_NON_BOUNDARY
-]
-
-CHCODES = [
-    CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
-    CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
-    CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
-    CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
-    CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
-    CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
-    CATEGORY_UNI_NOT_LINEBREAK
-]
-
-def makedict(list):
-    d = {}
-    i = 0
-    for item in list:
-        d[item] = i
-        i = i + 1
-    return d
-
-OPCODES = makedict(OPCODES)
-ATCODES = makedict(ATCODES)
-CHCODES = makedict(CHCODES)
-
-# replacement operations for "ignore case" mode
-OP_IGNORE = {
-    GROUPREF: GROUPREF_IGNORE,
-    IN: IN_IGNORE,
-    LITERAL: LITERAL_IGNORE,
-    NOT_LITERAL: NOT_LITERAL_IGNORE
-}
-
-AT_MULTILINE = {
-    AT_BEGINNING: AT_BEGINNING_LINE,
-    AT_END: AT_END_LINE
-}
-
-AT_LOCALE = {
-    AT_BOUNDARY: AT_LOC_BOUNDARY,
-    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
-}
-
-AT_UNICODE = {
-    AT_BOUNDARY: AT_UNI_BOUNDARY,
-    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
-}
-
-CH_LOCALE = {
-    CATEGORY_DIGIT: CATEGORY_DIGIT,
-    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
-    CATEGORY_SPACE: CATEGORY_SPACE,
-    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
-    CATEGORY_WORD: CATEGORY_LOC_WORD,
-    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
-    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
-    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
-}
-
-CH_UNICODE = {
-    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
-    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
-    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
-    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
-    CATEGORY_WORD: CATEGORY_UNI_WORD,
-    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
-    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
-    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
-}
+# list of all the operators
+# the fields are: name, op_type, negative, directional, end_marker
+# those with a negative form start with NOT_
+# those with a reverse directional form end with _REV
+_OPERATOR_LIST = """
+FAILURE               INVALID         N N -
+SUCCESS               INVALID         N N -
+ANY                   SIMPLE_CATEGORY N Y -
+ANY_ALL               SIMPLE_CATEGORY N Y -
+ASSERT                ASSERT          N N END_ASSERT
+ASSERT_NOT            ASSERT          N N END_ASSERT_NOT
+ATOMIC                ATOMIC          N N END_ATOMIC
+BOUNDARY              POSITION        Y N -
+BRANCH                BRANCH          N N -
+CATEGORY              CATEGORY        Y Y -
+CHARSET               CHARSET         Y Y -
+CHARSET_IGNORE        CHARSET         Y Y -
+END_OF_LINE           POSITION        N N -
+END_OF_STRING         POSITION        N N -
+END_OF_STRING_LN      POSITION        N N -
+GROUPREF              GROUPREF        N Y -
+GROUPREF_EXISTS       GROUPREF_EXISTS N N -
+GROUPREF_IGNORE       GROUPREF        N Y -
+JUMP                  INVALID         N N -
+LITERAL               LITERAL         Y Y -
+LITERAL_IGNORE        LITERAL         Y Y -
+LITERAL_STRING        LITERAL_STRING  N Y -
+LITERAL_STRING_IGNORE LITERAL_STRING  N Y -
+MARK                  MARK            N N -
+RANGE                 RANGE           Y Y -
+RANGE_IGNORE          RANGE           Y Y -
+REPEAT_MAX            REPEAT          N Y END_REPEAT_MAX
+REPEAT_MIN            REPEAT          N Y END_REPEAT_MIN
+REPEAT_ONE_MAX        REPEAT_ONE      N Y -
+REPEAT_ONE_MIN        REPEAT_ONE      N Y -
+REPEAT_ONE_POSS       REPEAT_ONE      N Y -
+REPEAT_POSS           REPEAT          N Y END_REPEAT_POSS
+SET                   SET             Y Y -
+SET_IGNORE            SET             Y Y -
+START_OF_LINE         POSITION        N N -
+START_OF_SEARCH       POSITION        N N -
+START_OF_STRING       POSITION        N N -
+SUBPATTERN            INVALID         N N -
+"""
+
+# enumerates the operators
+def _build_operator_list(OPERATOR_LIST):
+    neg_prefix = {"N": [""], "Y": ["", "NOT_"]}
+    dir_suffix = {"N": [(0, "")], "Y": [(1, ""), (-1, "_REV")]}
+
+    operators = []
+    for line in _OPERATOR_LIST.splitlines():
+        fields = line.split()
+        if not fields:
+            continue
+
+        name, op_type, negative, directional, end_marker = fields
+        # some opcodes have a negative "NOT_x" form
+        for n in neg_prefix[negative]:
+            # some opcodes are directional; they have a reverse "x_REV" form
+            for d, r in dir_suffix[directional]:
+                operators.append((n + name + r, op_type, d, end_marker))
+                if end_marker != "-":
+                    operators.append((n + end_marker + r, "INVALID", d, "-"))
+
+    return operators
+
+_operator_list = _build_operator_list(_OPERATOR_LIST)
+
+# builds a dict of positive<->negative opcodes
+def _build_not_opcodes(operator_list):
+    not_opcodes = {}
+    for name, op_type, direction, end_marker in _operator_list:
+        if name.startswith("NOT_"):
+            short_name = name[4 : ]
+            not_opcodes[name] = short_name
+            not_opcodes[short_name] = name
+    return not_opcodes
+
+_not_opcodes = _build_not_opcodes(_operator_list)
+
+# converts between positive/negative opcodes
+def not_op(op):
+    return _not_opcodes[op[0]], op[1]
+
+# builds a dict of normal_case<->ignore_case opcodes
+def _build_ignore_opcodes(operator_list):
+    ignore_opcodes = {}
+    for name, op_type, direction, end_marker in operator_list:
+        if name.endswith("_IGNORE"):
+            short_name = name[ : -7]
+            ignore_opcodes[name] = short_name
+            ignore_opcodes[short_name] = name
+    for op in ["CATEGORY", "NOT_CATEGORY"]:
+        ignore_opcodes[op] = op
+    return ignore_opcodes
+
+_ignore_opcodes = _build_ignore_opcodes(_operator_list)
+
+# converts between normal_case/ignore_case opcodes
+def ignore_op(op):
+    return _ignore_opcodes[op[0]], op[1]
+
+# sorts the operators and assigns opcode numbers
+def _sorted_operators(operator_list):
+    # FAILURE and SUCCESS are always first and second
+    sorted_operators = operator_list[ : 2] + sorted(operator_list[2 : ])
+
+    return [(name, number, op_type, direction, end_marker) for number,
+      (name, op_type, direction, end_marker) in enumerate(sorted_operators)]
+
+_operator_list = _sorted_operators(_operator_list)
+
+# build the OPCODES dict
+OPCODES = dict((name, number) for name, number, op_type, direction,
+  end_marker in _operator_list)
+
+# collect the op_types
+_op_types = set(op_type for name, number, op_type, direction,
+  end_marker in _operator_list)
+
+# create an attribute in OP for each operator
+class _Record(object):
+    pass
+
+OP = _Record()
+for _name in OPCODES:
+    setattr(OP, _name, _name)
+
+# unicode codepoint categories (property "\p{Lu}", etc)
+# (these entries must have certain fixed values)
+_UNI_CATEGORY_LIST = """- Lu Ll Lt Mn Mc Me Nd Nl No Zs Zl Zp Cc Cf Cs
+  Co - Lm Lo Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So -"""
+
+# additional unicode categories (property "\p{Alpha}", etc)
+_COMMON_CATEGORY_LIST = """Alpha Alnum ASCII Blank Cntrl Digit Graph LineBreak
+  Lower Print Punct Space Upper Word XDigit"""
+
+# builds the categories dict
+def _build_categories(UNI_CATEGORY_LIST, COMMON_CATEGORY_LIST):
+    category_number = 0
+    categories = {}
+    all_categories = 0
+    for name in UNI_CATEGORY_LIST.split():
+        if name != "-":
+            categories[name] = category_number
+            all_categories |= 1 << category_number
+        category_number += 1
+
+    assert category_number <= 0x20
+
+    category_number = 0x20
+
+    # add the unicode supercategories (properties "\p{L}", "\p{L&}", etc)
+    for name in UNI_CATEGORY_LIST.split():
+        if name != "-" and name[0] not in categories:
+            categories[name[0]] = category_number
+            categories[name[0] + "&"] = category_number
+            category_number += 1
+
+    common_category_start = category_number
+    for name in COMMON_CATEGORY_LIST.split():
+        categories[name] = category_number
+        category_number += 1
+
+    return categories, common_category_start, all_categories
+
+CATEGORIES, COMMON_CATEGORY_START, _ALL_CAT = _build_categories(
+  _UNI_CATEGORY_LIST, _COMMON_CATEGORY_LIST)
 
 # flags
 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
@@ -210,6 +215,8 @@
 SRE_FLAG_UNICODE = 32 # use unicode locale
 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
 SRE_FLAG_DEBUG = 128 # debugging
+SRE_FLAG_REVERSE = 256 # search backwards
+SRE_FLAG_ZEROWIDTH = 512 # permit split on zero-width
 
 # flags for INFO primitive
 SRE_INFO_PREFIX = 1 # has prefix
@@ -217,12 +224,10 @@
 SRE_INFO_CHARSET = 4 # pattern starts with character from given set
 
 if __name__ == "__main__":
-    def dump(f, d, prefix):
-        items = d.items()
-        items.sort(key=lambda a: a[1])
-        for k, v in items:
-            f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
-    f = open("sre_constants.h", "w")
+    # generate the sre_constants.h header file
+    f = open("sre_constants.h", "wb")
+
+    # the title comment
     f.write("""\
 /*
  * Secret Labs' Regular Expression Engine
@@ -239,23 +244,121 @@
 
 """)
 
+    # the magic value
     f.write("#define SRE_MAGIC %d\n" % MAGIC)
 
-    dump(f, OPCODES, "SRE_OP")
-    dump(f, ATCODES, "SRE")
-    dump(f, CHCODES, "SRE")
-
-    f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
-    f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
-    f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
-    f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
-    f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
-    f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
-    f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
-
-    f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
-    f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
-    f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
+    # the codeword definition
+    f.write("\n")
+    f.write("/* size of a code word (must be unsigned short or larger, and\n")
+    f.write("   large enough to hold a Py_UNICODE character) */\n")
+    if BYTES_PER_CODE == 4:
+        f.write("typedef unsigned int SRE_CODE;\n")
+    else:
+        f.write("typedef unsigned short SRE_CODE;\n")
+
+    # the codeword size
+    f.write("\n")
+    f.write("#define SRE_BYTES_PER_CODE %d\n" % BYTES_PER_CODE)
+    f.write("#define SRE_BITS_PER_CODE %d\n" % BITS_PER_CODE)
+
+    # the constant for unlimited repeats
+    f.write("#define SRE_UNLIMITED_REPEATS 0x%X\n" % MAXREPEAT)
+
+    # the opcodes
+    f.write("\n")
+    for name, number, op_type, direction, end_marker in _operator_list:
+        f.write("#define SRE_OP_%s %d\n" % (name, number))
+    f.write("#define SRE_MAX_OP %d\n" % (len(_operator_list) - 1))
+
+    # the regex flags
+    f.write("\n")
+    f.write("#define SRE_FLAG_TEMPLATE 0x%X\n" % SRE_FLAG_TEMPLATE)
+    f.write("#define SRE_FLAG_IGNORECASE 0x%X\n" % SRE_FLAG_IGNORECASE)
+    f.write("#define SRE_FLAG_LOCALE 0x%X\n" % SRE_FLAG_LOCALE)
+    f.write("#define SRE_FLAG_MULTILINE 0x%X\n" % SRE_FLAG_MULTILINE)
+    f.write("#define SRE_FLAG_DOTALL 0x%X\n" % SRE_FLAG_DOTALL)
+    f.write("#define SRE_FLAG_UNICODE 0x%X\n" % SRE_FLAG_UNICODE)
+    f.write("#define SRE_FLAG_VERBOSE 0x%X\n" % SRE_FLAG_VERBOSE)
+    f.write("#define SRE_FLAG_REVERSE 0x%X\n" % SRE_FLAG_REVERSE)
+    f.write("#define SRE_FLAG_ZEROWIDTH 0x%X\n" % SRE_FLAG_ZEROWIDTH)
+
+    # the info constants
+    f.write("\n")
+    f.write("#define SRE_INFO_PREFIX 0x%X\n" % SRE_INFO_PREFIX)
+    f.write("#define SRE_INFO_LITERAL 0x%X\n" % SRE_INFO_LITERAL)
+    f.write("#define SRE_INFO_CHARSET 0x%X\n" % SRE_INFO_CHARSET)
+
+    # the unicode categories and supercategories
+    f.write("\n")
+    categories = sorted(CATEGORIES.items(), key=operator.itemgetter(1))
+    for name, value in categories:
+        # include "L" but exclude "L&" when making the names
+        if value < COMMON_CATEGORY_START and name.isalnum():
+            f.write("#define SRE_UNI_CAT_%s 0x%X\n" % (name, value))
+
+    # the common categories
+    f.write("\n")
+    for name, value in categories:
+        if value >= COMMON_CATEGORY_START:
+            f.write("#define SRE_CAT_%s 0x%X\n" % (name, value))
+
+    # build the supercategories ("L&")
+    f.write("\n")
+    groups = defaultdict(int)
+    for name, value in categories:
+        if value < COMMON_CATEGORY_START and len(name) == 2 and name.isalpha():
+            groups[name[ : 1]] |= 1 << value
+
+    # the supercategories
+    for name, value in sorted(groups.items()):
+        f.write("#define SRE_CAT_MASK_%s 0x%08X\n" % (name, value))
+
+    f.write("\n")
+    f.write("#define SRE_CAT_MASK_Alnum 0x%08X\n" % (groups["L"] |
+      (1 << CATEGORIES["Nd"])))
+    f.write("#define SRE_CAT_MASK_Alpha 0x%08X\n" % groups["L"])
+    f.write("#define SRE_CAT_MASK_Graph 0x%08X\n" % ((groups["Z"] | groups["C"])
+      ^ _ALL_CAT))
+    f.write("#define SRE_CAT_MASK_Print 0x%08X\n" % (groups["C"] ^ _ALL_CAT))
+    f.write("#define SRE_CAT_MASK_Punct 0x%08X\n" % (groups["P"] | groups["S"]))
+    f.write("#define SRE_CAT_MASK_Word 0x%08X\n" % (groups["L"] | groups["N"] |
+      groups["M"] | (1 << CATEGORIES["Pc"])))
+
+    # the opcode type info
+    f.write("""
+// info for operator validation
+typedef struct SRE_OpInfo {
+    char* name;
+    int type;
+    int direction;
+    int end_marker;
+} SRE_OpInfo;
+
+""")
+
+    # sort the op_types (putting "INVALID" first) and assign numbers
+    _op_types = sorted(_op_types, key=lambda name:
+      ("" if name == "INVALID" else name))
+    _op_types = [(name, number) for number, name in enumerate(_op_types)]
+    for name, number in _op_types:
+        f.write("#define SRE_TYPE_%s %d\n" % (name, number))
+
+    # the opcode type info
+    _op_types = dict(_op_types)
+    f.write("""
+static SRE_OpInfo sre_op_info[] = {
+""")
+    for name, number, op_type, direction, end_marker in _operator_list:
+        if end_marker == "-":
+            end_marker = "0"
+        else:
+            end_marker = "SRE_OP_%s" % end_marker
+        f.write("    {\"%s\", %s, %s, %s},\n" % (name, _op_types[op_type],
+          direction, end_marker))
+    f.write("};\n")
 
     f.close()
     print "done"
+else:
+    # make all the names lowercase so we can be case-insensitive when parsing
+    CATEGORIES = dict((n.lower(), v) for n, v in CATEGORIES.items())
=== modified file Lib/sre_compile.py
--- Lib/sre_compile.py 2008-10-14 22:37:18 +0000
+++ Lib/sre_compile.py 2009-03-06 18:05:22 +0000
@@ -11,275 +11,286 @@
 """Internal support module for sre"""
 
 import _sre, sys
-import sre_parse
+
 from sre_constants import *
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
-if _sre.CODESIZE == 2:
-    MAXCODE = 65535
-else:
-    MAXCODE = 0xFFFFFFFFL
-
-def _identityfunction(x):
-    return x
-
-_LITERAL_CODES = set([LITERAL, NOT_LITERAL])
-_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
-_SUCCESS_CODES = set([SUCCESS, FAILURE])
-_ASSERT_CODES = set([ASSERT, ASSERT_NOT])
-
-def _compile(code, pattern, flags):
+ASSERT_OP_CODES = {
+    OP.ASSERT: OP.END_ASSERT,
+    OP.ASSERT_NOT: OP.END_ASSERT_NOT,
+}
+
+REPEAT_OP_CODES = {
+    OP.REPEAT_MAX: OP.END_REPEAT_MAX,
+    OP.REPEAT_MIN: OP.END_REPEAT_MIN,
+    OP.REPEAT_POSS: OP.END_REPEAT_POSS,
+}
+
+SINGLE_CHAR_OP_CODES = set([
+    OP.ANY, OP.ANY_ALL,
+    OP.CATEGORY, OP.NOT_CATEGORY,
+    OP.CHARSET, OP.CHARSET_IGNORE, OP.NOT_CHARSET, OP.NOT_CHARSET_IGNORE,
+    OP.LITERAL, OP.LITERAL_IGNORE, OP.NOT_LITERAL, OP.NOT_LITERAL_IGNORE,
+    OP.RANGE, OP.RANGE_IGNORE,
+    OP.SET, OP.NOT_SET,
+])
+
+NORMAL_OP_CODES, REVERSE_OP_CODES = {}, {}
+for op in dir(OP):
+    if not op.startswith("_"):
+        NORMAL_OP_CODES[op] = op
+        if op.endswith("_REV"):
+            REVERSE_OP_CODES[op[ : -4]] = op
+        else:
+            REVERSE_OP_CODES.setdefault(op, op)
+
+REPEAT_ONE_OP_CODES = {
+    OP.REPEAT_MAX: OP.REPEAT_ONE_MAX,
+    OP.REPEAT_MIN: OP.REPEAT_ONE_MIN,
+    OP.REPEAT_POSS: OP.REPEAT_ONE_POSS,
+}
+
+CATEGORY_OP_SET = set([OP.CATEGORY, OP.NOT_CATEGORY])
+CHARSET_OP_SET = set([OP.CHARSET, OP.CHARSET_IGNORE, OP.NOT_CHARSET,
+  OP.NOT_CHARSET_IGNORE])
+GROUPREF_OP_SET = set([OP.GROUPREF, OP.GROUPREF_IGNORE])
+LITERAL_OP_SET = set([OP.LITERAL, OP.LITERAL_IGNORE, OP.NOT_LITERAL,
+  OP.NOT_LITERAL_IGNORE])
+POSITION_OP_SET = set([OP.BOUNDARY, OP.END_OF_LINE, OP.END_OF_STRING,
+  OP.END_OF_STRING_LN, OP.NOT_BOUNDARY, OP.START_OF_LINE, OP.START_OF_SEARCH,
+  OP.START_OF_STRING])
+RANGE_OP_SET = set([OP.NOT_RANGE, OP.NOT_RANGE_IGNORE, OP.RANGE,
+  OP.RANGE_IGNORE])
+REPEAT_OP_SET = set([OP.REPEAT_MAX, OP.REPEAT_MIN, OP.REPEAT_POSS])
+SET_OP_SET = set([OP.SET, OP.SET_IGNORE, OP.NOT_SET, OP.NOT_SET_IGNORE])
+SIMPLE_CATEGORY_OP_SET = set([OP.ANY, OP.ANY_ALL])
+
+def _compile(code, pattern, flags, info, dir=1):
     # internal: compile a (sub)pattern
     emit = code.append
-    _len = len
-    LITERAL_CODES = _LITERAL_CODES
-    REPEATING_CODES = _REPEATING_CODES
-    SUCCESS_CODES = _SUCCESS_CODES
-    ASSERT_CODES = _ASSERT_CODES
+    literal_op, literal_string = None, []
+    if dir < 0:
+        fix_direction = REVERSE_OP_CODES
+    else:
+        fix_direction = NORMAL_OP_CODES
+    if dir < 0:
+        # Within lookbehind, so reverse the order of the matching
+        pattern = reversed(pattern)
+    def flush_literal():
+        if literal_string:
+            emit_literal_string(code, literal_op, literal_string[ : : dir],
+              fix_direction)
     for op, av in pattern:
-        if op in LITERAL_CODES:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-                emit(_sre.getlower(av, flags))
+        if op in SET_OP_SET:
+            op, av = _optimize_set(op, av, flags)
+        if op == literal_op:
+            literal_string.append(av)
+        else:
+            flush_literal()
+            if op in (OP.LITERAL, OP.LITERAL_IGNORE):
+                literal_op, literal_string = op, [av]
             else:
-                emit(OPCODES[op])
-                emit(av)
-        elif op is IN:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-                def fixup(literal, flags=flags):
-                    return _sre.getlower(literal, flags)
-            else:
-                emit(OPCODES[op])
-                fixup = _identityfunction
-            skip = _len(code); emit(0)
-            _compile_charset(av, flags, code, fixup)
-            code[skip] = _len(code) - skip
-        elif op is ANY:
-            if flags & SRE_FLAG_DOTALL:
-                emit(OPCODES[ANY_ALL])
-            else:
-                emit(OPCODES[ANY])
-        elif op in REPEATING_CODES:
-            if flags & SRE_FLAG_TEMPLATE:
-                raise error, "internal: unsupported template operator"
-                emit(OPCODES[REPEAT])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                emit(OPCODES[SUCCESS])
-                code[skip] = _len(code) - skip
-            elif _simple(av) and op is not REPEAT:
-                if op is MAX_REPEAT:
-                    emit(OPCODES[REPEAT_ONE])
+                literal_op, literal_string = None, []
+                if op in ASSERT_OP_CODES:
+                    # <assert> <skip to end> ... <end_assert>
+                    emit(OPCODES[op])
+                    skip = len(code); emit(0)
+                    _compile(code, av[1], flags, info, av[0])
+                    emit(OPCODES[ASSERT_OP_CODES[op]])
+                    code[skip] = len(code) - skip
+                elif op == OP.ATOMIC:
+                    # <ATOMIC> ... <END_ATOMIC>
+                    emit(OPCODES[OP.ATOMIC])
+                    _compile(code, av[1], flags, info, dir)
+                    emit(OPCODES[OP.END_ATOMIC])
+                elif op == OP.BRANCH:
+                    # <BRANCH>
+                    # <skip to next>
+                    #     ...
+                    # <JUMP> <skip to end>
+                    # <skip to next>
+                    #     ...
+                    # <JUMP> <skip to end>
+                    # 0
+                    emit(OPCODES[op])
+                    tail = []
+                    tailappend = tail.append
+                    for av in av[1]:
+                        skip = len(code); emit(0)
+                        _compile(code, av, flags, info, dir)
+                        emit(OPCODES[OP.JUMP])
+                        tailappend(len(code)); emit(0)
+                        code[skip] = len(code) - skip
+                    emit(0) # end of branchs
+                    for tail in tail:
+                        code[tail] = len(code) - tail
+                elif op in CATEGORY_OP_SET:
+                    # <category> category
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av)
+                elif op in CHARSET_OP_SET:
+                    # <charset> skip charset
+                    emit(OPCODES[fix_direction[op]])
+                    skip = len(code); emit(0)
+                    _compile_charset(code, av)
+                    code[skip] = len(code) - skip
+                elif op in GROUPREF_OP_SET:
+                    # <groupref> group_id
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av - 1)
+                elif op == OP.GROUPREF_EXISTS:
+                    # <GROUPREF_EXISTS> group_id
+                    # <skip to code_no>
+                    # code_yes
+                    # <JUMP> <skip to end>
+                    # code_no
+                    emit(OPCODES[op])
+                    emit(av[0] - 1)
+                    skipyes = len(code); emit(0)
+                    _compile(code, av[1], flags, info, dir)
+                    if av[2]:
+                        emit(OPCODES[OP.JUMP])
+                        skipno = len(code); emit(0)
+                        code[skipyes] = len(code) - skipyes + 1
+                        _compile(code, av[2], flags, info, dir)
+                        code[skipno] = len(code) - skipno
+                    else:
+                        code[skipyes] = len(code) - skipyes + 1
+                elif op in LITERAL_OP_SET:
+                    # <literal> code
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av)
+                elif op in POSITION_OP_SET:
+                    # <position>
+                    emit(OPCODES[op])
+                elif op in RANGE_OP_SET:
+                    # <range> min max
+                    emit(OPCODES[fix_direction[op]])
+                    emit(av[0])
+                    emit(av[1])
+                elif op in REPEAT_OP_SET:
+                    if flags & SRE_FLAG_TEMPLATE:
+                        raise error("internal: unsupported template operator")
+                    else:
+                        single = get_single_character(av[2])
+                        if single:
+                            # <repeat_one> <skip to end> <min> <max> ...
+                            emit(OPCODES[fix_direction[REPEAT_ONE_OP_CODES[op]]])
+                            skip = len(code); emit(0)
+                            emit(av[0])
+                            emit(av[1])
+                            _compile(code, single, flags, info, dir)
+                            code[skip] = len(code) - skip
+                        else:
+                            # <repeat> <skip to end> <min> <max>
+                            #     ...
+                            # <end_repeat> <skip to start>
+                            emit(OPCODES[fix_direction[op]])
+                            skip = len(code); emit(0)
+                            emit(av[0])
+                            emit(av[1])
+                            _compile(code, av[2], flags, info, dir)
+                            emit(OPCODES[fix_direction[REPEAT_OP_CODES[op]]])
+                            offset = len(code) - skip
+                            code[skip] = offset
+                            emit(offset)
+                elif op in SET_OP_SET:
+                    # <set> set
+                    emit(OPCODES[fix_direction[op]])
+                    _compile_set(code, av)
+                elif op in SIMPLE_CATEGORY_OP_SET:
+                    # <category>
+                    emit(OPCODES[fix_direction[op]])
+                elif op == OP.SUBPATTERN:
+                    if av[0]:
+                        number_id, name_id = av[0]
+                        info.group_count += 1
+                        number_start_mark, number_end_mark = (number_id * 2 - 2,
+                          number_id * 2 - 1)
+                        name_start_mark, name_end_mark = (name_id * 2 - 2,
+                          name_id * 2 - 1)
+                        if dir < 0:
+                            number_start_mark, number_end_mark = (number_end_mark,
+                              number_start_mark)
+                            name_start_mark, name_end_mark = (name_end_mark,
+                              name_start_mark)
+                        # <MARK> <numbered_id> <named_id>
+                        emit(OPCODES[OP.MARK])
+                        emit(number_start_mark)
+                        emit(name_start_mark)
+                    _compile(code, av[1], flags, info, dir)
+                    if av[0]:
+                        # <MARK> <numbered_id> <named_id>
+                        emit(OPCODES[OP.MARK])
+                        emit(number_end_mark)
+                        emit(name_end_mark)
                 else:
-                    emit(OPCODES[MIN_REPEAT_ONE])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                emit(OPCODES[SUCCESS])
-                code[skip] = _len(code) - skip
-            else:
-                emit(OPCODES[REPEAT])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                code[skip] = _len(code) - skip
-                if op is MAX_REPEAT:
-                    emit(OPCODES[MAX_UNTIL])
-                else:
-                    emit(OPCODES[MIN_UNTIL])
-        elif op is SUBPATTERN:
-            if av[0]:
-                emit(OPCODES[MARK])
-                emit((av[0]-1)*2)
-            # _compile_info(code, av[1], flags)
-            _compile(code, av[1], flags)
-            if av[0]:
-                emit(OPCODES[MARK])
-                emit((av[0]-1)*2+1)
-        elif op in SUCCESS_CODES:
+                    raise ValueError("unsupported operand type: %s" % op)
+    flush_literal()
+
+def emit_literal_string(code, literal_op, literal_string, fix_direction):
+    emit = code.append
+    if len(literal_string) > 1:
+        # a string
+        if literal_op == OP.LITERAL_IGNORE:
+            # <literal_string> length ...
+            emit(OPCODES[fix_direction[OP.LITERAL_STRING_IGNORE]])
+        else:
+            # <literal_string> length ...
+            emit(OPCODES[fix_direction[OP.LITERAL_STRING]])
+        emit(len(literal_string))
+        code.extend(literal_string)
+    else:
+        # <literal> code
+        # a single character
+        emit(OPCODES[fix_direction[literal_op]])
+        emit(literal_string[0])
+
+def get_single_character(pattern):
+    if len(pattern) == 1 and pattern[0][0] in SINGLE_CHAR_OP_CODES:
+        return pattern
+    return None
+
+def _compile_set(code, charset):
+    emit = code.append
+    skip_set = len(code); emit(0)
+    for op, av in charset:
+        if op in CHARSET_OP_SET:
+            # <charset> skip charset
             emit(OPCODES[op])
-        elif op in ASSERT_CODES:
+            skip = len(code); emit(0)
+            _compile_charset(code, av)
+            code[skip] = len(code) - skip
+        elif op in CATEGORY_OP_SET:
+            # <category> category
             emit(OPCODES[op])
-            skip = _len(code); emit(0)
-            if av[0] >= 0:
-                emit(0) # look ahead
-            else:
-                lo, hi = av[1].getwidth()
-                if lo != hi:
-                    raise error, "look-behind requires fixed-width pattern"
-                emit(lo) # look behind
-            _compile(code, av[1], flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = _len(code) - skip
-        elif op is CALL:
+            emit(av)
+        elif op == OP.LITERAL:
+            # <literal> code
             emit(OPCODES[op])
-            skip = _len(code); emit(0)
-            _compile(code, av, flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = _len(code) - skip
-        elif op is AT:
+            emit(av)
+        elif op == OP.RANGE:
+            # <range> min max
             emit(OPCODES[op])
-            if flags & SRE_FLAG_MULTILINE:
-                av = AT_MULTILINE.get(av, av)
-            if flags & SRE_FLAG_LOCALE:
-                av = AT_LOCALE.get(av, av)
-            elif flags & SRE_FLAG_UNICODE:
-                av = AT_UNICODE.get(av, av)
-            emit(ATCODES[av])
-        elif op is BRANCH:
+            emit(av[0])
+            emit(av[1])
+        elif op in SIMPLE_CATEGORY_OP_SET:
+            # <category>
             emit(OPCODES[op])
-            tail = []
-            tailappend = tail.append
-            for av in av[1]:
-                skip = _len(code); emit(0)
-                # _compile_info(code, av, flags)
-                _compile(code, av, flags)
-                emit(OPCODES[JUMP])
-                tailappend(_len(code)); emit(0)
-                code[skip] = _len(code) - skip
-            emit(0) # end of branch
-            for tail in tail:
-                code[tail] = _len(code) - tail
-        elif op is CATEGORY:
-            emit(OPCODES[op])
-            if flags & SRE_FLAG_LOCALE:
-                av = CH_LOCALE[av]
-            elif flags & SRE_FLAG_UNICODE:
-                av = CH_UNICODE[av]
-            emit(CHCODES[av])
-        elif op is GROUPREF:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-            else:
-                emit(OPCODES[op])
-            emit(av-1)
-        elif op is GROUPREF_EXISTS:
-            emit(OPCODES[op])
-            emit(av[0]-1)
-            skipyes = _len(code); emit(0)
-            _compile(code, av[1], flags)
-            if av[2]:
-                emit(OPCODES[JUMP])
-                skipno = _len(code); emit(0)
-                code[skipyes] = _len(code) - skipyes + 1
-                _compile(code, av[2], flags)
-                code[skipno] = _len(code) - skipno
-            else:
-                code[skipyes] = _len(code) - skipyes + 1
         else:
-            raise ValueError, ("unsupported operand type", op)
-
-def _compile_charset(charset, flags, code, fixup=None):
-    # compile charset subprogram
-    emit = code.append
-    if fixup is None:
-        fixup = _identityfunction
-    for op, av in _optimize_charset(charset, fixup):
-        emit(OPCODES[op])
-        if op is NEGATE:
-            pass
-        elif op is LITERAL:
-            emit(fixup(av))
-        elif op is RANGE:
-            emit(fixup(av[0]))
-            emit(fixup(av[1]))
-        elif op is CHARSET:
-            code.extend(av)
-        elif op is BIGCHARSET:
-            code.extend(av)
-        elif op is CATEGORY:
-            if flags & SRE_FLAG_LOCALE:
-                emit(CHCODES[CH_LOCALE[av]])
-            elif flags & SRE_FLAG_UNICODE:
-                emit(CHCODES[CH_UNICODE[av]])
-            else:
-                emit(CHCODES[av])
-        else:
-            raise error, "internal: unsupported set operator"
-    emit(OPCODES[FAILURE])
-
-def _optimize_charset(charset, fixup):
-    # internal: optimize character set
-    out = []
-    outappend = out.append
-    charmap = [0]*256
-    try:
-        for op, av in charset:
-            if op is NEGATE:
-                outappend((op, av))
-            elif op is LITERAL:
-                charmap[fixup(av)] = 1
-            elif op is RANGE:
-                for i in range(fixup(av[0]), fixup(av[1])+1):
-                    charmap[i] = 1
-            elif op is CATEGORY:
-                # XXX: could append to charmap tail
-                return charset # cannot compress
-    except IndexError:
-        # character set contains unicode characters
-        return _optimize_unicode(charset, fixup)
-    # compress character map
-    i = p = n = 0
-    runs = []
-    runsappend = runs.append
-    for c in charmap:
-        if c:
-            if n == 0:
-                p = i
-            n = n + 1
-        elif n:
-            runsappend((p, n))
-            n = 0
-        i = i + 1
-    if n:
-        runsappend((p, n))
-    if len(runs) <= 2:
-        # use literal/range
-        for p, n in runs:
-            if n == 1:
-                outappend((LITERAL, p))
-            else:
-                outappend((RANGE, (p, p+n-1)))
-        if len(out) < len(charset):
-            return out
-    else:
-        # use bitmap
-        data = _mk_bitmap(charmap)
-        outappend((CHARSET, data))
-        return out
-    return charset
-
-def _mk_bitmap(bits):
-    data = []
-    dataappend = data.append
-    if _sre.CODESIZE == 2:
-        start = (1, 0)
-    else:
-        start = (1L, 0L)
-    m, v = start
-    for c in bits:
-        if c:
-            v = v + m
-        m = m + m
-        if m > MAXCODE:
-            dataappend(v)
-            m, v = start
-    return data
-
-# To represent a big charset, first a bitmap of all characters in the
+            raise error("internal: unsupported set member: %s" % op)
+    code[skip_set] = len(code) - skip_set
+
+# The characters may be mapped to a bitmap.
+
+# To represent a charset, first a bitmap of all characters in the
 # set is constructed. Then, this bitmap is sliced into chunks of 256
 # characters, duplicate chunks are eliminated, and each chunk is
 # given a number. In the compiled expression, the charset is
-# represented by a 16-bit word sequence, consisting of one word for
-# the number of different chunks, a sequence of 256 bytes (128 words)
-# of chunk numbers indexed by their original chunk position, and a
-# sequence of chunks (16 words each).
+# represented by a codeword sequence, consisting of one codeword for
+# the maximum character code, a sequence of chunk numbers
+# (2 per codeword), and a sequence of chunks (8 codewords each).
 
 # Compression is normally good: in a typical charset, large ranges of
 # Unicode will be either completely excluded (e.g. if only cyrillic
@@ -287,217 +298,148 @@
 # subranges of Kanji match). These ranges will be represented by
 # chunks of all one-bits or all zero-bits.
 
-# Matching can be also done efficiently: the more significant byte of
+# Matching can be also done efficiently: the most significant bits of
 # the Unicode character is an index into the chunk number, and the
-# less significant byte is a bit index in the chunk (just like the
-# CHARSET matching).
-
-# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
-# of the basic multilingual plane; an efficient representation
-# for all of UTF-16 has not yet been developed. This means,
-# in particular, that negated charsets cannot be represented as
-# bigcharsets.
-
-def _optimize_unicode(charset, fixup):
-    try:
-        import array
-    except ImportError:
-        return charset
-    charmap = [0]*65536
-    negate = 0
-    try:
-        for op, av in charset:
-            if op is NEGATE:
-                negate = 1
-            elif op is LITERAL:
-                charmap[fixup(av)] = 1
-            elif op is RANGE:
-                for i in xrange(fixup(av[0]), fixup(av[1])+1):
-                    charmap[i] = 1
-            elif op is CATEGORY:
-                # XXX: could expand category
-                return charset # cannot compress
-    except IndexError:
-        # non-BMP characters
-        return charset
-    if negate:
-        if sys.maxunicode != 65535:
-            # XXX: negation does not work with big charsets
-            return charset
-        for i in xrange(65536):
-            charmap[i] = not charmap[i]
-    comps = {}
-    mapping = [0]*256
-    block = 0
-    data = []
-    for i in xrange(256):
-        chunk = tuple(charmap[i*256:(i+1)*256])
-        new = comps.setdefault(chunk, block)
-        mapping[i] = new
-        if new == block:
-            block = block + 1
-            data = data + _mk_bitmap(chunk)
-    header = [block]
-    if _sre.CODESIZE == 2:
-        code = 'H'
+# least significant byte is a bit index into the chunk.
+
+# A charset is a 3-tuple, consisting of the maximum character code,
+# a list of indexes and a list of 256-bit bitsets
+def _compile_charset(code, charset):
+    # the maximum character code
+    code.append(charset[0])
+    # pack the 16-bit indexes into 32-bit codewords
+    # (adding an extra index ensures that zip() doesn't drop
+    # the last one if there are an odd number of them)
+    for lo, hi in zip(charset[1][0 : : 2], charset[1][1 : : 2] + [0]):
+        code.append(lo | (hi << 16))
+    # pack the 256-bit bitsets to 32-bit codewords
+    for chunk in charset[2]:
+        for i in range(256 // BITS_PER_CODE):
+            code.append(chunk & MAXCODE)
+            chunk >>= BITS_PER_CODE
+
+def _ones(n):
+    return (1 << n) - 1
+
+def _optimize_set(set_op, set_members, flags):
+    # consolidate the ranges (the bounds are inclusive)
+    charset = set()
+    categories = []
+    for o, a in set_members:
+        if o == OP.LITERAL:
+            charset.add(a)
+        elif o == OP.RANGE:
+            for c in xrange(a[0], a[1] + 1):
+                charset.add(c)
+        else:
+            categories.append((o, a))
+    categories = sorted(set(categories))
+    # convert charset to list of ranges
+    ranges = []
+    start, end = None, None
+    for c in sorted(charset):
+        try:
+            if c == end + 1:
+                end = c
+            else:
+                ranges.append((start, end))
+                start, end = c, c
+        except TypeError:
+            start, end = c, c
+    if start is not None:
+        ranges.append((start, end))
+    # try to optimise the set
+    if len(ranges) <= 1:
+        # only a few ranges
+        for r in ranges:
+            if r[0] == r[1]:
+                # a range of 1 character!
+                categories.append((OP.LITERAL, r[0]))
+            else:
+                categories.append((OP.RANGE, r))
     else:
-        code = 'I'
-    # Convert block indices to byte array of 256 bytes
-    mapping = array.array('b', mapping).tostring()
-    # Convert byte array to word array
-    mapping = array.array(code, mapping)
-    assert mapping.itemsize == _sre.CODESIZE
-    header = header + mapping.tolist()
-    data[0:0] = header
-    return [(BIGCHARSET, data)]
-
-def _simple(av):
-    # check if av is a "simple" operator
-    lo, hi = av[2].getwidth()
-    if lo == 0 and hi == MAXREPEAT:
-        raise error, "nothing to repeat"
-    return lo == hi == 1 and av[2][0][0] != SUBPATTERN
-
-def _compile_info(code, pattern, flags):
-    # internal: compile an info block.  in the current version,
-    # this contains min/max pattern width, and an optional literal
-    # prefix or a character map
-    lo, hi = pattern.getwidth()
-    if lo == 0:
-        return # not worth it
-    # look for a literal prefix
-    prefix = []
-    prefixappend = prefix.append
-    prefix_skip = 0
-    charset = [] # not used
-    charsetappend = charset.append
-    if not (flags & SRE_FLAG_IGNORECASE):
-        # look for literal prefix
-        for op, av in pattern.data:
-            if op is LITERAL:
-                if len(prefix) == prefix_skip:
-                    prefix_skip = prefix_skip + 1
-                prefixappend(av)
-            elif op is SUBPATTERN and len(av[1]) == 1:
-                op, av = av[1][0]
-                if op is LITERAL:
-                    prefixappend(av)
-                else:
-                    break
-            else:
-                break
-        # if no prefix, look for charset prefix
-        if not prefix and pattern.data:
-            op, av = pattern.data[0]
-            if op is SUBPATTERN and av[1]:
-                op, av = av[1][0]
-                if op is LITERAL:
-                    charsetappend((op, av))
-                elif op is BRANCH:
-                    c = []
-                    cappend = c.append
-                    for p in av[1]:
-                        if not p:
-                            break
-                        op, av = p[0]
-                        if op is LITERAL:
-                            cappend((op, av))
-                        else:
-                            break
-                    else:
-                        charset = c
-            elif op is BRANCH:
-                c = []
-                cappend = c.append
-                for p in av[1]:
-                    if not p:
-                        break
-                    op, av = p[0]
-                    if op is LITERAL:
-                        cappend((op, av))
-                    else:
-                        break
-                else:
-                    charset = c
-            elif op is IN:
-                charset = av
-##     if prefix:
-##         print "*** PREFIX", prefix, prefix_skip
-##     if charset:
-##         print "*** CHARSET", charset
-    # add an info block
-    emit = code.append
-    emit(OPCODES[INFO])
-    skip = len(code); emit(0)
-    # literal flag
-    mask = 0
-    if prefix:
-        mask = SRE_INFO_PREFIX
-        if len(prefix) == prefix_skip == len(pattern.data):
-            mask = mask + SRE_INFO_LITERAL
-    elif charset:
-        mask = mask + SRE_INFO_CHARSET
-    emit(mask)
-    # pattern length
-    if lo < MAXCODE:
-        emit(lo)
-    else:
-        emit(MAXCODE)
-        prefix = prefix[:MAXCODE]
-    if hi < MAXCODE:
-        emit(hi)
-    else:
-        emit(0)
-    # add literal prefix
-    if prefix:
-        emit(len(prefix)) # length
-        emit(prefix_skip) # skip
-        code.extend(prefix)
-        # generate overlap table
-        table = [-1] + ([0]*len(prefix))
-        for i in xrange(len(prefix)):
-            table[i+1] = table[i]+1
-            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
-                table[i+1] = table[table[i+1]-1]+1
-        code.extend(table[1:]) # don't store first entry
-    elif charset:
-        _compile_charset(charset, flags, code)
-    code[skip] = len(code) - skip
-
-try:
-    unicode
-except NameError:
-    STRING_TYPES = (type(""),)
-else:
-    STRING_TYPES = (type(""), type(unicode("")))
-
-def isstring(obj):
-    for tp in STRING_TYPES:
-        if isinstance(obj, tp):
-            return 1
-    return 0
+        # many ranges, so use a charset instead
+        max_char = ranges[-1][1]
+        subset_list = [0] * (max_char // 256 + 1)
+        for lo, hi in ranges:
+            base = lo - lo % 256
+            while lo <= hi:
+                subset_list[base // 256] |= (_ones(min(hi - base + 1, 256)) ^
+                  _ones(lo % 256))
+                base += 256
+                lo = base
+        # build the index and chunks, consolidating duplicate subsets/chunks
+        index_list, chunk_list = [], []
+        for subset in subset_list:
+            try:
+                index_list.append(chunk_list.index(subset))
+            except ValueError:
+                index_list.append(len(chunk_list))
+                chunk_list.append(subset)
+        categories.append((OP.CHARSET, (max_char, index_list, chunk_list)))
+    if len(categories) == 1:
+        # only 1 test in the set, so don't use a set
+        cat = categories[0]
+        if set_op.startswith("NOT_"):
+            cat = not_op(cat)
+        if set_op.endswith("_IGNORE"):
+            cat = ignore_op(cat)
+        return cat
+    return set_op, categories
+
+def create_charset(iterable):
+    # (UNUSED)
+    # enumerate the characters and create the subsets
+    subset_list = []
+    max_code = 0
+    for ch in iterable:
+        ch = ord(ch)
+        max_code = max(max_code, ch)
+        hi, lo = divmod(ch, 256)
+        mask = 1 << lo
+        try:
+            subset_list[hi] |= mask
+        except IndexError:
+            subset_list.extend([0] * (hi - len(subset_list)))
+            subset_list.append(mask)
+    # optimise the subsets
+    index_list, chunk_list = [], []
+    for subset in subset_list:
+        try:
+            index_list.append(chunk_list.index(subset))
+        except ValueError:
+            index_list.append(len(chunk_list))
+            chunk_list.append(subset)
+    return max_code, index_list, chunk_list
 
 def _code(p, flags):
-
     flags = p.pattern.flags | flags
     code = []
 
     # compile info block
-    _compile_info(code, p, flags)
+    #_compile_info(code, p, flags)
 
     # compile the pattern
-    _compile(code, p.data, flags)
-
-    code.append(OPCODES[SUCCESS])
+    class Record(object):
+        pass
+    info = Record()
+    info.group_count = 0
+    if flags & SRE_FLAG_REVERSE:
+        dir = -1
+    else:
+        dir = 1
+    _compile(code, p.data, flags, info, dir)
+    code.append(OPCODES[OP.SUCCESS])
 
     return code
 
-def compile(p, flags=0):
+def compile(p, flags=0, scanner=0):
     # internal: convert pattern list to internal format
 
-    if isstring(p):
+    if isinstance(p, basestring):
+        import sre_parse
         pattern = p
-        p = sre_parse.parse(p, flags)
+        p = sre_parse.parse(p, flags, scanner=scanner)
     else:
         pattern = None
 
@@ -505,20 +447,12 @@
 
     # print code
 
-    # XXX: <fl> get rid of this limitation!
-    if p.pattern.groups > 100:
-        raise AssertionError(
-            "sorry, but this version only supports 100 named groups"
-            )
-
     # map in either direction
-    groupindex = p.pattern.groupdict
-    indexgroup = [None] * p.pattern.groups
-    for k, i in groupindex.items():
-        indexgroup[i] = k
-
-    return _sre.compile(
-        pattern, flags | p.pattern.flags, code,
-        p.pattern.groups-1,
-        groupindex, indexgroup
-        )
+    groupindex = p.pattern.named_groups
+    indexgroup = [None] * (max(groupindex.values() + [-1]) + 1)
+
+    for name, index in groupindex.items():
+        indexgroup[index] = name
+
+    return _sre.compile(pattern, flags | p.pattern.flags, code,
+      p.pattern.groups, groupindex, indexgroup)
=== modified file Lib/sre_parse.py
--- Lib/sre_parse.py 2008-10-14 22:37:18 +0000
+++ Lib/sre_parse.py 2009-03-05 03:00:47 +0000
@@ -15,75 +15,68 @@
 import sys
 
 from sre_constants import *
-
-SPECIAL_CHARS = ".\\[{()*+?^$|"
-REPEAT_CHARS = "*+?{"
-
-DIGITS = set("0123456789")
-
-OCTDIGITS = set("01234567")
-HEXDIGITS = set("0123456789abcdefABCDEF")
-
-WHITESPACE = set(" \t\n\r\v\f")
+import unicodedata
+
+SPECIAL_CHARS = set(".\\[{()*+?^$|")
+REPEAT_CHARS = set("*+?{")
+WHITESPACE_CHARS = set(" \t\n\r\v\f")
 
 ESCAPES = {
-    r"\a": (LITERAL, ord("\a")),
-    r"\b": (LITERAL, ord("\b")),
-    r"\f": (LITERAL, ord("\f")),
-    r"\n": (LITERAL, ord("\n")),
-    r"\r": (LITERAL, ord("\r")),
-    r"\t": (LITERAL, ord("\t")),
-    r"\v": (LITERAL, ord("\v")),
-    r"\\": (LITERAL, ord("\\"))
+    r"\a": (OP.LITERAL, ord("\a")),
+    r"\b": (OP.LITERAL, ord("\b")),
+    r"\f": (OP.LITERAL, ord("\f")),
+    r"\n": (OP.LITERAL, ord("\n")),
+    r"\r": (OP.LITERAL, ord("\r")),
+    r"\t": (OP.LITERAL, ord("\t")),
+    r"\v": (OP.LITERAL, ord("\v")),
+    r"\\": (OP.LITERAL, ord("\\")),
 }
 
-CATEGORIES = {
-    r"\A": (AT, AT_BEGINNING_STRING), # start of string
-    r"\b": (AT, AT_BOUNDARY),
-    r"\B": (AT, AT_NON_BOUNDARY),
-    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
-    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
-    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
-    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
-    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
-    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
-    r"\Z": (AT, AT_END_STRING), # end of string
+POSITIONS = {
+    r"\A": (OP.START_OF_STRING, None),
+    r"\b": (OP.BOUNDARY, None),
+    r"\B": (OP.NOT_BOUNDARY, None),
+    r"\G": (OP.START_OF_SEARCH, None),
+    r"\Z": (OP.END_OF_STRING, None),
 }
 
+STD_CATEGORIES = {
+    r"\d": (OP.CATEGORY, CATEGORIES["digit"]),
+    r"\D": (OP.NOT_CATEGORY, CATEGORIES["digit"]),
+    r"\s": (OP.CATEGORY, CATEGORIES["space"]),
+    r"\S": (OP.NOT_CATEGORY, CATEGORIES["space"]),
+    r"\w": (OP.CATEGORY, CATEGORIES["word"]),
+    r"\W": (OP.NOT_CATEGORY, CATEGORIES["word"]),
+}
+
 FLAGS = {
-    # standard flags
     "i": SRE_FLAG_IGNORECASE,
     "L": SRE_FLAG_LOCALE,
     "m": SRE_FLAG_MULTILINE,
+    "r": SRE_FLAG_REVERSE,
     "s": SRE_FLAG_DOTALL,
     "x": SRE_FLAG_VERBOSE,
-    # extensions
     "t": SRE_FLAG_TEMPLATE,
     "u": SRE_FLAG_UNICODE,
+    "z": SRE_FLAG_ZEROWIDTH,
 }
 
+SCOPED_FLAGS_MASK = (SRE_FLAG_IGNORECASE | SRE_FLAG_MULTILINE |
+  SRE_FLAG_DOTALL | SRE_FLAG_VERBOSE)
+
 class Pattern:
-    # master pattern object.  keeps track of global attributes
+    # master pattern object. keeps track of global attributes
     def __init__(self):
         self.flags = 0
-        self.open = []
-        self.groups = 1
-        self.groupdict = {}
-    def opengroup(self, name=None):
-        gid = self.groups
-        self.groups = gid + 1
+        self.groups = 0
+        self.named_groups = {}
+        self.fix_list = []
+    def new_group(self, name=None):
+        self.groups += 1
+        group_number = self.groups
         if name is not None:
-            ogid = self.groupdict.get(name, None)
-            if ogid is not None:
-                raise error, ("redefinition of group name %s as group %d; "
-                              "was group %d" % (repr(name), gid,  ogid))
-            self.groupdict[name] = gid
-        self.open.append(gid)
-        return gid
-    def closegroup(self, gid):
-        self.open.remove(gid)
-    def checkgroup(self, gid):
-        return gid < self.groups and gid not in self.open
+            self.named_groups.setdefault(name, len(self.named_groups))
+        return group_number, name
 
 class SubPattern:
     # a subpattern, in intermediate form
@@ -93,33 +86,37 @@
             data = []
         self.data = data
         self.width = None
+        self._inv_categories = dict((value, name) for name, value in
+          CATEGORIES.items())
     def dump(self, level=0):
-        nl = 1
-        seqtypes = type(()), type([])
+        nl = True
+        seqtypes = tuple, list
         for op, av in self.data:
-            print level*"  " + op,; nl = 0
-            if op == "in":
+            print level * "  " + op,; nl = False
+            if "SET" in op:
                 # member sublanguage
-                print; nl = 1
+                print; nl = True
                 for op, a in av:
-                    print (level+1)*"  " + op, a
-            elif op == "branch":
-                print; nl = 1
+                    print (level + 1) * "  " + op, a
+            elif op == OP.BRANCH:
+                print; nl = True
                 i = 0
                 for a in av[1]:
                     if i > 0:
-                        print level*"  " + "or"
-                    a.dump(level+1); nl = 1
-                    i = i + 1
-            elif type(av) in seqtypes:
+                        print level * "  " + "or"
+                    a.dump(level + 1); nl = True
+                    i += 1
+            elif isinstance(av, seqtypes):
                 for a in av:
                     if isinstance(a, SubPattern):
                         if not nl: print
-                        a.dump(level+1); nl = 1
+                        a.dump(level + 1); nl = True
                     else:
-                        print a, ; nl = 0
-            else:
-                print av, ; nl = 0
+                        print a, ; nl = False
+            elif "CATEGORY" in op:
+                print self._inv_categories[av], ; nl = False
+            else:
+                print av, ; nl = False
             if not nl: print
     def __repr__(self):
         return repr(self.data)
@@ -137,363 +134,465 @@
         self.data.insert(index, code)
     def append(self, code):
         self.data.append(code)
-    def getwidth(self):
-        # determine the width (min, max) for this subpattern
-        if self.width:
-            return self.width
-        lo = hi = 0L
-        UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
-        REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
-        for op, av in self.data:
-            if op is BRANCH:
-                i = sys.maxint
-                j = 0
-                for av in av[1]:
-                    l, h = av.getwidth()
-                    i = min(i, l)
-                    j = max(j, h)
-                lo = lo + i
-                hi = hi + j
-            elif op is CALL:
-                i, j = av.getwidth()
-                lo = lo + i
-                hi = hi + j
-            elif op is SUBPATTERN:
-                i, j = av[1].getwidth()
-                lo = lo + i
-                hi = hi + j
-            elif op in REPEATCODES:
-                i, j = av[2].getwidth()
-                lo = lo + long(i) * av[0]
-                hi = hi + long(j) * av[1]
-            elif op in UNITCODES:
-                lo = lo + 1
-                hi = hi + 1
-            elif op == SUCCESS:
-                break
-        self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
-        return self.width
+    def extend(self, code):
+        self.data.extend(code)
 
 class Tokenizer:
     def __init__(self, string):
         self.string = string
         self.index = 0
-        self.__next()
-    def __next(self):
-        if self.index >= len(self.string):
+        self._next()
+    def _next(self):
+        try:
+            char = self.string[self.index]
+            if char == "\\":
+                try:
+                    char += self.string[self.index + 1]
+                except IndexError:
+                    raise error("bad escape (end of line)")
+            self.index += len(char)
+            self.next = char
+        except IndexError:
             self.next = None
-            return
-        char = self.string[self.index]
-        if char[0] == "\\":
-            try:
-                c = self.string[self.index + 1]
-            except IndexError:
-                raise error, "bogus escape (end of line)"
-            char = char + c
-        self.index = self.index + len(char)
-        self.next = char
-    def match(self, char, skip=1):
-        if char == self.next:
-            if skip:
-                self.__next()
-            return 1
-        return 0
+    def match(self, char, skip=True):
+        if char != self.next:
+            return False
+        if skip:
+            self._next()
+        return True
     def get(self):
         this = self.next
-        self.__next()
+        self._next()
         return this
     def tell(self):
         return self.index, self.next
     def seek(self, index):
         self.index, self.next = index
 
-def isident(char):
-    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
-
-def isdigit(char):
-    return "0" <= char <= "9"
-
-def isname(name):
+def is_name(name):
     # check that group name is a valid string
-    if not isident(name[0]):
-        return False
-    for char in name[1:]:
-        if not isident(char) and not isdigit(char):
-            return False
-    return True
-
-def _class_escape(source, escape):
+    return (name[0] == "_" or name[0].isalpha()) and all(char == "_" or
+      char.isalnum() for char in name[1 : ])
+
+# names can be delimited in a number of ways
+NAME_DELIMITERS = {"<": ">", "{": "}"}
+
+def hex_escape(source, escape, max_digits):
+    # hexadecimal escape
+    digits = ""
+    while source.next in HEXDIGITS and len(digits) < max_digits:
+        digits += source.get()
+    if len(digits) != max_digits:
+        raise error("bad escape: %s" % (escape + digits))
+    return int(digits, 16)
+
+def oct_escape(source, escape, digits):
+    # octal escape
+    while source.next in OCTDIGITS and len(digits) < 3:
+        digits += source.get()
+    try:
+        return int(digits, 8) & 0xFF
+    except ValueError:
+        raise error("bad escape: %s" % (escape + digits))
+
+def parse_name(source, terminator, name_type, prefix):
+    name = ""
+    while True:
+        char = source.get()
+        if char is None:
+            raise error("unterminated %s name: %s" % (name_type, prefix))
+        if char == terminator:
+            break
+        name += char
+    return name
+
+HEX_ESCAPE_LENGTH = {"x": 2, "u": 4, "U": 8}
+
+def class_escape(source, escape):
     # handle escape code inside character class
-    code = ESCAPES.get(escape)
-    if code:
-        return code
-    code = CATEGORIES.get(escape)
+    code = STD_CATEGORIES.get(escape) or ESCAPES.get(escape)
     if code:
         return code
     try:
-        c = escape[1:2]
-        if c == "x":
-            # hexadecimal escape (exactly two digits)
-            while source.next in HEXDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            escape = escape[2:]
-            if len(escape) != 2:
-                raise error, "bogus escape: %s" % repr("\\" + escape)
-            return LITERAL, int(escape, 16) & 0xff
+        c = escape[1 : 2]
+        if c in HEX_ESCAPE_LENGTH:
+            # hex escape
+            return OP.LITERAL, hex_escape(source, escape, HEX_ESCAPE_LENGTH[c])
         elif c in OCTDIGITS:
-            # octal escape (up to three digits)
-            while source.next in OCTDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            escape = escape[1:]
-            return LITERAL, int(escape, 8) & 0xff
+            # octal escape
+            return OP.LITERAL, oct_escape(source, escape[ : 1], c)
         elif c in DIGITS:
-            raise error, "bogus escape: %s" % repr(escape)
-        if len(escape) == 2:
-            return LITERAL, ord(escape[1])
+            raise error("bad escape: %s" % escape)
+        elif c == "N":
+            # named character
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing character name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "character",
+              escape + delimiter)
+            try:
+                return OP.LITERAL, ord(unicodedata.lookup(name))
+            except KeyError:
+                raise error("bad character name: %s" % name)
+        elif c == "p":
+            # character property
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing property name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "property",
+              escape + delimiter)
+            try:
+                return OP.CATEGORY, CATEGORIES[name.lower()]
+            except KeyError:
+                raise error("bad property name: %s" % name)
+        else:
+            return OP.LITERAL, ord(c)
     except ValueError:
         pass
-    raise error, "bogus escape: %s" % repr(escape)
-
-def _escape(source, escape, state):
+    raise error("bad escape: %s" % escape)
+
+def posix_class(source):
+    if not source.match(":"):
+        return None
+    name = parse_name(source, ":", "class", ":")
+    try:
+        cat = CATEGORIES[name.lower()]
+        if cat < COMMON_CATEGORY_START:
+            raise error("bad class name: %s" % name)
+    except KeyError:
+        raise error("bad class name: %s" % name)
+    if not source.match(":") or not source.match("]"):
+        raise error("unterminated class name: %s" % name)
+    return OP.CATEGORY, cat
+
+# group references can be delimited in a number of ways
+GROUP_DELIMITERS = {"<": ">", "{": "}", "'": "'", '"': '"'}
+
+# group references can be relative
+GROUP_DIRECTION = {"+": 1, "-": -1}
+
+def escape(source, escape, state):
     # handle escape code in expression
-    code = CATEGORIES.get(escape)
+    # group references returned as list instead of tuple so that they can be
+    # fixed later
+    code = POSITIONS.get(escape) or STD_CATEGORIES.get(escape) or ESCAPES.get(escape)
     if code:
         return code
-    code = ESCAPES.get(escape)
-    if code:
-        return code
+    if state.flags & SRE_FLAG_IGNORECASE:
+        literal_op, groupref_op = OP.LITERAL_IGNORE, OP.GROUPREF_IGNORE
+    else:
+        literal_op, groupref_op = OP.LITERAL, OP.GROUPREF
     try:
-        c = escape[1:2]
-        if c == "x":
-            # hexadecimal escape
-            while source.next in HEXDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            if len(escape) != 4:
-                raise ValueError
-            return LITERAL, int(escape[2:], 16) & 0xff
+        c = escape[1 : 2]
+        if c in HEX_ESCAPE_LENGTH:
+            # hex escape
+            return literal_op, hex_escape(source, escape, HEX_ESCAPE_LENGTH[c])
         elif c == "0":
             # octal escape
-            while source.next in OCTDIGITS and len(escape) < 4:
-                escape = escape + source.get()
-            return LITERAL, int(escape[1:], 8) & 0xff
+            return literal_op, oct_escape(source, escape[ : 1], c)
         elif c in DIGITS:
             # octal escape *or* decimal group reference (sigh)
             if source.next in DIGITS:
-                escape = escape + source.get()
-                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
-                    source.next in OCTDIGITS):
+                escape += source.get()
+                if set(escape[1 : ]) <= OCTDIGITS and source.next in OCTDIGITS:
                     # got three octal digits; this is an octal escape
-                    escape = escape + source.get()
-                    return LITERAL, int(escape[1:], 8) & 0xff
+                    escape += source.get()
+                    return literal_op, int(escape[1 : ], 8) & 0xFF
             # not an octal escape, so this is a group reference
-            group = int(escape[1:])
-            if group < state.groups:
-                if not state.checkgroup(group):
-                    raise error, "cannot refer to open group"
-                return GROUPREF, group
-            raise ValueError
-        if len(escape) == 2:
-            return LITERAL, ord(escape[1])
+            ref = [groupref_op, escape[1 : ]]
+            state.fix_list.append(ref)
+            return ref
+        elif c == "g":
+            # group reference
+            if source.next in GROUP_DELIMITERS:
+                # delimited group reference
+                delimiter = source.get()
+                name = parse_name(source, GROUP_DELIMITERS[delimiter], "group",
+                  escape + delimiter)
+                if name[0] in GROUP_DIRECTION and name[1 : ].isdigit():
+                    # relative group reference, so convert to absolute
+                    name = str(state.groups + GROUP_DIRECTION[name[0]] *
+                      int(name[1 : ]))
+                if not name.isdigit() and not is_name(name):
+                    raise error("bad group name: %s" % name)
+                # return the group reference
+                ref = [groupref_op, name]
+                state.fix_list.append(ref)
+                return ref
+            elif source.next in DIGITS:
+                # non-delimited group reference (single digit)
+                ref = [groupref_op, source.get()]
+                state.fix_list.append(ref)
+                return ref
+            else:
+                raise error("missing group name: %s" % escape)
+        elif c == "k":
+            # named group reference
+            if source.next in GROUP_DELIMITERS:
+                # delimited group reference
+                delimiter = source.get()
+                name = parse_name(source, GROUP_DELIMITERS[delimiter], "group",
+                  escape + delimiter)
+                if not is_name(name):
+                    raise error("bad group name: %s" % name)
+                ref = [groupref_op, name]
+                state.fix_list.append(ref)
+                return ref
+            else:
+                # non-delimited group reference; invalid for \k
+                raise error("missing group name: %s" % escape)
+        elif c == "N":
+            # named character
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing character name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "character",
+              escape + delimiter)
+            try:
+                return literal_op, ord(unicodedata.lookup(name))
+            except KeyError:
+                raise error("bad character name: %s" % name)
+        elif c in "pP":
+            # character property
+            if source.next not in NAME_DELIMITERS:
+                raise error("missing property name: %s" % escape)
+            delimiter = source.get()
+            name = parse_name(source, NAME_DELIMITERS[delimiter], "property",
+              escape + delimiter)
+            try:
+                op = OP.CATEGORY, CATEGORIES[name.lower()]
+                if c == "P":
+                    op = not_op(op)
+                return op
+            except KeyError:
+                raise error("bad property name: %s" % name)
+        else:
+            return literal_op, ord(c)
     except ValueError:
         pass
-    raise error, "bogus escape: %s" % repr(escape)
-
-def _parse_sub(source, state, nested=1):
+    raise error("bad escape: %s" % escape)
+
+def _parse_sub(source, state, named_groups, reuse):
     # parse an alternation: a|b|c
+    #
+    # group names can be duplicated if they are mutually exclusive
+    #
+    # group numbers can be duplicated in mutually-exclusive branches if 'reuse'
+    # is True
+    initial_groups = state.groups
+    max_groups = state.groups
+    named_groups_out = named_groups.copy()
 
     items = []
-    itemsappend = items.append
-    sourcematch = source.match
-    while 1:
-        itemsappend(_parse(source, state))
-        if sourcematch("|"):
-            continue
-        if not nested:
+    while True:
+        # parse the branch
+        # returns the parsed items and the set of named groups
+        i, n = _parse(source, state, named_groups)
+        items.append(i)
+        named_groups_out |= n
+        max_groups = max(max_groups, state.groups)
+        if not source.match("|"):
             break
-        if not source.next or sourcematch(")", 0):
-            break
-        else:
-            raise error, "pattern not properly closed"
+
+        # do we want to reuse group numbers, ie start all the branches at the
+        # same group number?
+        if reuse:
+            state.groups = initial_groups
+
+    # the next group number should be higher than all previous ones
+    state.groups = max_groups
 
     if len(items) == 1:
-        return items[0]
+        return items[0], named_groups_out
 
     subpattern = SubPattern(state)
-    subpatternappend = subpattern.append
-
-    # check if all items share a common prefix
-    while 1:
-        prefix = None
-        for item in items:
-            if not item:
+
+    # check whether all branches share a common prefix
+    # (the prefix shouldn't contain a capture group)
+    index = 0
+    try:
+        while all(items[0][index] == item[index] for item in items[1 : ]):
+            if is_capture(items[0][index]):
                 break
-            if prefix is None:
-                prefix = item[0]
-            elif item[0] != prefix:
-                break
-        else:
-            # all subitems start with a common "prefix".
-            # move it out of the branch
-            for item in items:
-                del item[0]
-            subpatternappend(prefix)
-            continue # check next one
-        break
-
-    # check if the branch can be replaced by a character set
-    for item in items:
-        if len(item) != 1 or item[0][0] != LITERAL:
-            break
+            index += 1
+    except IndexError:
+        pass
+
+    if index > 0:
+        subpattern.extend(items[0][ : index])
+        items = [item[index : ] for item in items]
+
+    # check whether the alternation can be replaced by a character set
+    if all(len(item) == 1 and item[0][0] == OP.LITERAL for item in items):
+        # we can store this as a set instead of a
+        # branch (the compiler may optimize this even more)
+        subpattern.append((OP.SET, [item[0] for item in items]))
     else:
-        # we can store this as a character set instead of a
-        # branch (the compiler may optimize this even more)
-        set = []
-        setappend = set.append
-        for item in items:
-            setappend(item[0])
-        subpatternappend((IN, set))
-        return subpattern
-
-    subpattern.append((BRANCH, (None, items)))
-    return subpattern
-
-def _parse_sub_cond(source, state, condgroup):
-    item_yes = _parse(source, state)
+        subpattern.append((OP.BRANCH, (None, items)))
+
+    return subpattern, named_groups_out
+
+def is_capture(pattern):
+    if not pattern:
+        return False
+    o, a = pattern
+    if o in [OP.ASSERT, OP.ASSERT_NOT, OP.ATOMIC]:
+        return has_capture(a[1])
+    elif o == OP.BRANCH:
+        return any(has_capture(i) for i in a[1])
+    elif o == OP.GROUPREF_EXISTS:
+        return any(has_capture(i) for i in a[1 : 3])
+    elif o in [OP.REPEAT_MAX, OP.REPEAT_MIN, OP.REPEAT_POSS]:
+        return has_capture(a[2])
+    elif o in [OP.REPEAT_ONE_MAX, OP.REPEAT_ONE_MIN, OP.REPEAT_ONE_POSS]:
+        return is_capture(a[2])
+    elif o == OP.SUBPATTERN:
+        return a[0] is not None or has_capture(a[1])
+    else:
+        return False
+
+def has_capture(pattern):
+    if not pattern:
+        return False
+    return any(i for i in pattern)
+
+def _parse_sub_cond(source, state, named_groups, condgroup):
+    item_yes, n_yes = _parse(source, state, named_groups)
     if source.match("|"):
-        item_no = _parse(source, state)
+        item_no, n_no = _parse(source, state, named_groups)
         if source.match("|"):
-            raise error, "conditional backref with more than two branches"
+            raise error("conditional reference with more than two branches")
     else:
-        item_no = None
-    if source.next and not source.match(")", 0):
-        raise error, "pattern not properly closed"
+        item_no, n_no = None, named_groups
+    if source.next and not source.match(")", False):
+        raise error("pattern not properly closed")
     subpattern = SubPattern(state)
-    subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
-    return subpattern
-
-_PATTERNENDERS = set("|)")
-_ASSERTCHARS = set("=!<")
-_LOOKBEHINDASSERTCHARS = set("=!")
-_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
-
-def _parse(source, state):
+    ref = (OP.GROUPREF_EXISTS, [condgroup, item_yes, item_no])
+    state.fix_list.append(ref)
+    subpattern.append(ref)
+    return subpattern, n_yes | n_no
+
+PATTERN_ENDERS = set("|)")
+ASSERT_CHARS = set("=!<")
+LOOKBEHIND_ASSERT_CHARS = set("=!")
+POSITION_CODES = set([OP.BOUNDARY, OP.END_OF_LINE, OP.END_OF_STRING,
+  OP.END_OF_STRING_LN, OP.NOT_BOUNDARY, OP.START_OF_LINE, OP.START_OF_SEARCH,
+  OP.START_OF_STRING])
+QUERY_GROUP = 0
+CAPTURE_GROUP = 1
+NONCAPTURE_GROUP = 2
+ATOMIC_GROUP = 3
+
+def _parse(source, state, named_groups):
     # parse a simple pattern
     subpattern = SubPattern(state)
-
-    # precompute constants into local variables
-    subpatternappend = subpattern.append
-    sourceget = source.get
-    sourcematch = source.match
-    _len = len
-    PATTERNENDERS = _PATTERNENDERS
-    ASSERTCHARS = _ASSERTCHARS
-    LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
-    REPEATCODES = _REPEATCODES
-
-    while 1:
-
-        if source.next in PATTERNENDERS:
+    named_groups = named_groups.copy()
+
+    while True:
+        if state.flags & SRE_FLAG_VERBOSE:
+            # skip whitespace and comments
+            while source.next in WHITESPACE_CHARS:
+                source.get()
+            if source.next == "#":
+                while source.next not in (None, "\n"):
+                    source.get()
+                source.get()
+                continue
+
+        if source.next in PATTERN_ENDERS:
             break # end of subpattern
-        this = sourceget()
+
+        this = source.get()
+
         if this is None:
             break # end of pattern
 
-        if state.flags & SRE_FLAG_VERBOSE:
-            # skip whitespace and comments
-            if this in WHITESPACE:
-                continue
-            if this == "#":
-                while 1:
-                    this = sourceget()
-                    if this in (None, "\n"):
-                        break
-                continue
-
-        if this and this[0] not in SPECIAL_CHARS:
-            subpatternappend((LITERAL, ord(this)))
-
+        if this[0] not in SPECIAL_CHARS:
+            if state.flags & SRE_FLAG_IGNORECASE:
+                subpattern.append((OP.LITERAL_IGNORE, ord(this)))
+            else:
+                subpattern.append((OP.LITERAL, ord(this)))
         elif this == "[":
             # character set
-            set = []
-            setappend = set.append
-##          if sourcematch(":"):
-##              pass # handle character classes
-            if sourcematch("^"):
-                setappend((NEGATE, None))
+            char_set = []
+            negate = source.match("^")
             # check remaining characters
-            start = set[:]
-            while 1:
-                this = sourceget()
-                if this == "]" and set != start:
+            while True:
+                this = source.get()
+                if not this:
+                    raise error("unexpected end of pattern")
+                if this == "]" and char_set:
+                    # terminating ]
                     break
-                elif this and this[0] == "\\":
-                    code1 = _class_escape(source, this)
-                elif this:
-                    code1 = LITERAL, ord(this)
-                else:
-                    raise error, "unexpected end of regular expression"
-                if sourcematch("-"):
+                elif this[0] == "\\":
+                    code1 = class_escape(source, this)
+                elif this[0] == "[":
+                    code1 = posix_class(source)
+                    if not code1:
+                        code1 = OP.LITERAL, ord(this)
+                else:
+                    code1 = OP.LITERAL, ord(this)
+                if source.match("-"):
                     # potential range
-                    this = sourceget()
+                    this = source.get()
+                    if not this:
+                        raise error("unexpected end of pattern")
                     if this == "]":
-                        if code1[0] is IN:
-                            code1 = code1[1][0]
-                        setappend(code1)
-                        setappend((LITERAL, ord("-")))
+                        # at end of pattern, so literal char and "-"
+                        char_set.append(code1)
+                        char_set.append((OP.LITERAL, ord("-")))
                         break
-                    elif this:
-                        if this[0] == "\\":
-                            code2 = _class_escape(source, this)
-                        else:
-                            code2 = LITERAL, ord(this)
-                        if code1[0] != LITERAL or code2[0] != LITERAL:
-                            raise error, "bad character range"
-                        lo = code1[1]
-                        hi = code2[1]
-                        if hi < lo:
-                            raise error, "bad character range"
-                        setappend((RANGE, (lo, hi)))
+                    if this[0] == "\\":
+                        code2 = class_escape(source, this)
+                    elif this[0] == "[":
+                        code2 = posix_class(source)
+                        if not code2:
+                            code2 = OP.LITERAL, ord(this)
                     else:
-                        raise error, "unexpected end of regular expression"
-                else:
-                    if code1[0] is IN:
-                        code1 = code1[1][0]
-                    setappend(code1)
-
-            # XXX: <fl> should move set optimization to compiler!
-            if _len(set)==1 and set[0][0] is LITERAL:
-                subpatternappend(set[0]) # optimization
-            elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
-                subpatternappend((NOT_LITERAL, set[1][1])) # optimization
-            else:
-                # XXX: <fl> should add charmap optimization here
-                subpatternappend((IN, set))
-
-        elif this and this[0] in REPEAT_CHARS:
+                        code2 = OP.LITERAL, ord(this)
+                    if code1[0] != OP.LITERAL or code2[0] != OP.LITERAL:
+                        raise error("bad character range")
+                    lo = code1[1]
+                    hi = code2[1]
+                    if hi < lo:
+                        raise error("bad character range")
+                    char_set.append((OP.RANGE, (lo, hi)))
+                else:
+                    char_set.append(code1)
+            if negate:
+                if state.flags & SRE_FLAG_IGNORECASE:
+                    subpattern.append((OP.NOT_SET_IGNORE, char_set))
+                else:
+                    subpattern.append((OP.NOT_SET, char_set))
+            else:
+                if state.flags & SRE_FLAG_IGNORECASE:
+                    subpattern.append((OP.SET_IGNORE, char_set))
+                else:
+                    subpattern.append((OP.SET, char_set))
+        elif this[0] in REPEAT_CHARS:
             # repeat previous item
             if this == "?":
                 min, max = 0, 1
             elif this == "*":
                 min, max = 0, MAXREPEAT
-
             elif this == "+":
                 min, max = 1, MAXREPEAT
             elif this == "{":
                 if source.next == "}":
-                    subpatternappend((LITERAL, ord(this)))
+                    subpattern.append((OP.LITERAL, ord(this)))
                     continue
                 here = source.tell()
                 min, max = 0, MAXREPEAT
                 lo = hi = ""
                 while source.next in DIGITS:
-                    lo = lo + source.get()
-                if sourcematch(","):
+                    lo += source.get()
+                if source.match(","):
                     while source.next in DIGITS:
-                        hi = hi + sourceget()
+                        hi += source.get()
                 else:
                     hi = lo
-                if not sourcematch("}"):
-                    subpatternappend((LITERAL, ord(this)))
+                if not source.match("}"):
+                    subpattern.append((OP.LITERAL, ord(this)))
                     source.seek(here)
                     continue
                 if lo:
@@ -501,166 +600,245 @@
                 if hi:
                     max = int(hi)
                 if max < min:
-                    raise error, "bad repeat interval"
-            else:
-                raise error, "not supported"
+                    raise error("bad repeat interval")
+            else:
+                raise error("not supported")
             # figure out which item to repeat
-            if subpattern:
-                item = subpattern[-1:]
-            else:
-                item = None
-            if not item or (_len(item) == 1 and item[0][0] == AT):
-                raise error, "nothing to repeat"
-            if item[0][0] in REPEATCODES:
-                raise error, "multiple repeat"
-            if sourcematch("?"):
-                subpattern[-1] = (MIN_REPEAT, (min, max, item))
-            else:
-                subpattern[-1] = (MAX_REPEAT, (min, max, item))
-
+            item = subpattern[-1 : ]
+            if not item or len(item) == 1 and item[0][0] in POSITION_CODES:
+                raise error("nothing to repeat")
+            if source.match("?"):
+                subpattern[-1] = (OP.REPEAT_MIN, (min, max, item))
+            elif source.match("+"):
+                subpattern[-1] = (OP.REPEAT_POSS, (min, max, item))
+            else:
+                subpattern[-1] = (OP.REPEAT_MAX, (min, max, item))
         elif this == ".":
-            subpatternappend((ANY, None))
-
+            if state.flags & SRE_FLAG_DOTALL:
+                subpattern.append((OP.ANY_ALL, None))
+            else:
+                subpattern.append((OP.ANY, None))
         elif this == "(":
-            group = 1
+            group = CAPTURE_GROUP
             name = None
             condgroup = None
-            if sourcematch("?"):
-                group = 0
+            scoped_flags = None
+            reuse = False
+            if source.match("?"):
+                group = QUERY_GROUP
                 # options
-                if sourcematch("P"):
+                if source.match("P"):
                     # python extensions
-                    if sourcematch("<"):
-                        # named group: skip forward to end of name
-                        name = ""
-                        while 1:
-                            char = sourceget()
-                            if char is None:
-                                raise error, "unterminated name"
-                            if char == ">":
-                                break
-                            name = name + char
-                        group = 1
-                        if not isname(name):
-                            raise error, "bad character in group name"
-                    elif sourcematch("="):
-                        # named backreference
-                        name = ""
-                        while 1:
-                            char = sourceget()
-                            if char is None:
-                                raise error, "unterminated name"
-                            if char == ")":
-                                break
-                            name = name + char
-                        if not isname(name):
-                            raise error, "bad character in group name"
-                        gid = state.groupdict.get(name)
-                        if gid is None:
-                            raise error, "unknown group name"
-                        subpatternappend((GROUPREF, gid))
+                    if source.match("<"):
+                        # named group
+                        name = parse_name(source, ">", "group", "(?P<")
+                        group = CAPTURE_GROUP
+                        if not is_name(name):
+                            raise error("bad group name: %s" % name)
+                        if name in named_groups:
+                            raise error("duplicate group name: %s" % name)
+                        named_groups.add(name)
+                    elif source.match("="):
+                        # named group reference
+                        # group reference stored as list instead of tuple so
+                        # that it can be fixed later
+                        name = parse_name(source, ")", "group", "(?P=")
+                        if not is_name(name):
+                            raise error("bad group name: %s" % name)
+                        if state.flags & SRE_FLAG_IGNORECASE:
+                            ref = [OP.GROUPREF_IGNORE, name]
+                        else:
+                            ref = [OP.GROUPREF, name]
+                        state.fix_list.append(ref)
+                        subpattern.append(ref)
                         continue
                     else:
-                        char = sourceget()
+                        char = source.get()
                         if char is None:
-                            raise error, "unexpected end of pattern"
-                        raise error, "unknown specifier: ?P%s" % char
-                elif sourcematch(":"):
-                    # non-capturing group
-                    group = 2
-                elif sourcematch("#"):
+                            raise error("unexpected end of pattern")
+                        raise error("unknown specifier: (?P%s" % char)
+                elif source.match("<"):
+                    # named group or look-behind
+                    if source.next in LOOKBEHIND_ASSERT_CHARS:
+                        # lookbehind assertion
+                        dir = -1 # lookbehind
+                        char = source.get()
+                        saved_flags = state.flags
+                        p, named_groups = _parse_sub(source, state, named_groups, False)
+                        state.flags = ((state.flags & ~SCOPED_FLAGS_MASK) |
+                          (saved_flags & SCOPED_FLAGS_MASK))
+                        if not source.match(")"):
+                            raise error("unbalanced parenthesis")
+                        if char == "=":
+                            subpattern.append((OP.ASSERT, (dir, p)))
+                        else:
+                            subpattern.append((OP.ASSERT_NOT, (dir, p)))
+                        continue
+                    # named group
+                    name = parse_name(source, ">", "group", "(?<")
+                    group = CAPTURE_GROUP
+                    if not is_name(name):
+                        raise error("bad group name: %s" % name)
+                    if name in named_groups:
+                        raise error("duplicate group name: %s" % name)
+                    named_groups.add(name)
+                elif source.match(">"):
+                    # atomic group
+                    group = ATOMIC_GROUP
+                elif source.match("#"):
                     # comment
-                    while 1:
-                        if source.next is None or source.next == ")":
+                    while True:
+                        if source.next in (None, ")"):
                             break
-                        sourceget()
-                    if not sourcematch(")"):
-                        raise error, "unbalanced parenthesis"
+                        source.get()
+                    if not source.match(")"):
+                        raise error("unbalanced parenthesis")
                     continue
-                elif source.next in ASSERTCHARS:
+                elif source.next in ASSERT_CHARS:
                     # lookahead assertions
-                    char = sourceget()
+                    char = source.get()
                     dir = 1
                     if char == "<":
-                        if source.next not in LOOKBEHINDASSERTCHARS:
-                            raise error, "syntax error"
+                        if source.next not in LOOKBEHIND_ASSERT_CHARS:
+                            raise error("syntax error: (?%s" % char)
                         dir = -1 # lookbehind
-                        char = sourceget()
-                    p = _parse_sub(source, state)
-                    if not sourcematch(")"):
-                        raise error, "unbalanced parenthesis"
+                        char = source.get()
+                    saved_flags = state.flags
+                    p, named_groups = _parse_sub(source, state, named_groups, False)
+                    state.flags = ((state.flags & ~SCOPED_FLAGS_MASK) |
+                      (saved_flags & SCOPED_FLAGS_MASK))
+                    if not source.match(")"):
+                        raise error("unbalanced parenthesis")
                     if char == "=":
-                        subpatternappend((ASSERT, (dir, p)))
+                        subpattern.append((OP.ASSERT, (dir, p)))
                     else:
-                        subpatternappend((ASSERT_NOT, (dir, p)))
+                        subpattern.append((OP.ASSERT_NOT, (dir, p)))
                     continue
-                elif sourcematch("("):
+                elif source.match("("):
                     # conditional backreference group
-                    condname = ""
-                    while 1:
-                        char = sourceget()
-                        if char is None:
-                            raise error, "unterminated name"
-                        if char == ")":
-                            break
-                        condname = condname + char
-                    group = 2
-                    if isname(condname):
-                        condgroup = state.groupdict.get(condname)
-                        if condgroup is None:
-                            raise error, "unknown group name"
+                    condgroup = parse_name(source, ")", "group", "(?(")
+                    group = NONCAPTURE_GROUP
+                    if not is_name(condgroup) and not condgroup.isdigit():
+                        raise error("bad group name: %s" % condgroup)
+                elif source.match("|"):
+                    # reuse group numbers for multiple branches
+                    group = NONCAPTURE_GROUP
+                    reuse = True
+                else:
+                    # probably non-capturing group or flags
+                    # might be scoped (set at start of group and local to group)
+                    scoped_flags = state.flags
+                    seen_on, seen_off = False, False
+                    while source.next in FLAGS:
+                        scoped_flags |= FLAGS[source.get()]
+                        seen_on = True
+                    if source.match("-"):
+                        while source.next in FLAGS:
+                            if (FLAGS[source.next] & SCOPED_FLAGS_MASK) == 0:
+                                raise error("bad pattern flag: %s" %
+                                  source.next)
+                            scoped_flags &= ~FLAGS[source.get()]
+                            seen_off = True
+                        if not seen_off:
+                            raise error("bad pattern flag")
+                    # update just global flags
+                    state.flags |= scoped_flags & ~SCOPED_FLAGS_MASK
+                    if source.match(":"):
+                        # non-capturing group with scoped flags
+                        group = NONCAPTURE_GROUP
+                    elif seen_on or seen_off:
+                        # not start of group, just setting flags
+                        state.flags = scoped_flags
+                        scoped_flags = None
                     else:
-                        try:
-                            condgroup = int(condname)
-                        except ValueError:
-                            raise error, "bad character in group name"
-                else:
-                    # flags
-                    if not source.next in FLAGS:
-                        raise error, "unexpected end of pattern"
-                    while source.next in FLAGS:
-                        state.flags = state.flags | FLAGS[sourceget()]
+                        raise error("unexpected end of pattern")
             if group:
+                atomic = group == ATOMIC_GROUP
                 # parse group contents
-                if group == 2:
+                if group in [NONCAPTURE_GROUP, ATOMIC_GROUP]:
                     # anonymous group
                     group = None
                 else:
-                    group = state.opengroup(name)
+                    group = state.new_group(name)
+                saved_flags = state.flags
+                if scoped_flags is not None:
+                    state.flags = scoped_flags
                 if condgroup:
-                    p = _parse_sub_cond(source, state, condgroup)
-                else:
-                    p = _parse_sub(source, state)
-                if not sourcematch(")"):
-                    raise error, "unbalanced parenthesis"
-                if group is not None:
-                    state.closegroup(group)
-                subpatternappend((SUBPATTERN, (group, p)))
-            else:
-                while 1:
-                    char = sourceget()
+                    p, named_groups = _parse_sub_cond(source, state, named_groups, condgroup)
+                else:
+                    p, named_groups = _parse_sub(source, state, named_groups, reuse)
+                state.flags = ((state.flags & ~SCOPED_FLAGS_MASK) |
+                  (saved_flags & SCOPED_FLAGS_MASK))
+                if not source.match(")"):
+                    raise error("unbalanced parenthesis")
+                if atomic:
+                    subpattern.append((OP.ATOMIC, (group, p)))
+                else:
+                    if group is None:
+                        subpattern.append((OP.SUBPATTERN, (None, p)))
+                    else:
+                        # group reference stored as list instead of tuple so
+                        # that it can be fixed later
+                        ref = OP.SUBPATTERN, (list(group), p)
+                        state.fix_list.append(ref)
+                        subpattern.append(ref)
+            else:
+                while True:
+                    char = source.get()
                     if char is None:
-                        raise error, "unexpected end of pattern"
+                        raise error("unexpected end of pattern")
                     if char == ")":
                         break
-                    raise error, "unknown extension"
-
+                    raise error("unknown extension")
         elif this == "^":
-            subpatternappend((AT, AT_BEGINNING))
-
+            if state.flags & SRE_FLAG_MULTILINE:
+                subpattern.append((OP.START_OF_LINE, None))
+            else:
+                subpattern.append((OP.START_OF_STRING, None))
         elif this == "$":
-            subpattern.append((AT, AT_END))
-
-        elif this and this[0] == "\\":
-            code = _escape(source, this, state)
-            subpatternappend(code)
-
+            if state.flags & SRE_FLAG_MULTILINE:
+                subpattern.append((OP.END_OF_LINE, None))
+            else:
+                subpattern.append((OP.END_OF_STRING_LN, None))
+        elif this[0] == "\\":
+            code = escape(source, this, state)
+            subpattern.append(code)
         else:
-            raise error, "parser error"
-
-    return subpattern
-
-def parse(str, flags=0, pattern=None):
+            raise error("parser error: %s" % this)
+
+    return subpattern, named_groups
+
+def fix_ref(ref, index, state):
+    if ref[index].isdigit():
+        ref[index] = int(ref[index])
+        if not (1 <= ref[index] <= state.groups):
+            raise error("invalid group reference: %s" % ref[index])
+    else:
+        try:
+            ref[index] = state.named_groups[ref[index]]
+        except KeyError:
+            raise error("invalid group reference: %s" % ref[index])
+
+def fix_grouprefs(p, state):
+    for name, value in state.named_groups.items():
+        state.named_groups[name] = state.groups + 1 + value
+    GROUPREF_SET = set([OP.GROUPREF, OP.GROUPREF_IGNORE])
+    for ref in state.fix_list:
+        if ref[0] in GROUPREF_SET:
+            fix_ref(ref, 1, state)
+        elif ref[0] == OP.GROUPREF_EXISTS:
+            fix_ref(ref[1], 0, state)
+        elif ref[0] == OP.SUBPATTERN:
+            ref = ref[1][0]
+            if ref[1] is None:
+                ref[1] = ref[0]
+            else:
+                try:
+                    ref[1] = state.named_groups[ref[1]]
+                except KeyError:
+                    raise error("invalid group reference: %s" % ref[1])
+
+def parse(str, flags=0, pattern=None, scanner=False):
     # parse 're' pattern into list of (opcode, argument) tuples
 
     source = Tokenizer(str)
@@ -669,122 +847,202 @@
         pattern = Pattern()
     pattern.flags = flags
     pattern.str = str
-
-    p = _parse_sub(source, pattern, 0)
-
-    tail = source.get()
-    if tail == ")":
-        raise error, "unbalanced parenthesis"
-    elif tail:
-        raise error, "bogus characters at end of regular expression"
+    pattern.group_count = 0
+
+    p, named_groups = _parse_sub(source, pattern, set(), False)
+
+    if source.match(")"):
+        raise error("unbalanced parenthesis")
+
+    if source.next is not None:
+        raise error("bad characters at end of pattern")
+
+    fix_grouprefs(p, pattern)
+
+    if scanner:
+        # check that the scanner pattern doesn't have any extra capture groups
+        # or group references
+        # (actually, plain capture groups are turned into non-capture groups)
+        _validate_scanner(p)
 
     if flags & SRE_FLAG_DEBUG:
         p.dump()
 
-    if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
-        # the VERBOSE flag was switched on inside the pattern.  to be
-        # on the safe side, we'll parse the whole thing again...
-        return parse(str, p.pattern.flags)
-
     return p
 
 def parse_template(source, pattern):
-    # parse 're' replacement string into list of literals and
-    # group references
+    # parse 're' replacement string into list of literals and group references
+    sep = source[ : 0]
+    char_type = unichr if isinstance(sep, unicode) else chr
     s = Tokenizer(source)
-    sget = s.get
-    p = []
-    a = p.append
-    def literal(literal, p=p, pappend=a):
-        if p and p[-1][0] is LITERAL:
-            p[-1] = LITERAL, p[-1][1] + literal
-        else:
-            pappend((LITERAL, literal))
-    sep = source[:0]
-    if type(sep) is type(""):
-        makechar = chr
-    else:
-        makechar = unichr
-    while 1:
-        this = sget()
+    literals, groups = [], []
+    current_literal = []
+    def add_literal(char_code):
+        current_literal.append(char_type(char_code))
+    def flush_literal():
+        if current_literal:
+            literals.append(sep.join(current_literal))
+            current_literal[:] = []
+    def add_group(index):
+        flush_literal()
+        groups.append((index, len(literals)))
+        literals.append(None)
+    while True:
+        this = s.get()
         if this is None:
             break # end of replacement string
-        if this and this[0] == "\\":
-            # group
-            c = this[1:2]
-            if c == "g":
-                name = ""
-                if s.match("<"):
-                    while 1:
-                        char = sget()
-                        if char is None:
-                            raise error, "unterminated group name"
-                        if char == ">":
-                            break
-                        name = name + char
-                if not name:
-                    raise error, "bad group name"
-                try:
+        if this[0] == "\\":
+            c = this[1 : 2]
+            if c in HEX_ESCAPE_LENGTH:
+                # hex escape
+                add_literal(hex_escape(s, escape, HEX_ESCAPE_LENGTH[c]))
+            elif c == "0":
+                add_literal(oct_escape(s, this[0], this[1 : ]))
+            elif c in DIGITS:
+                if s.next in DIGITS:
+                    this += s.get()
+                    if set(this[1 : ]) <= OCTDIGITS and s.next in OCTDIGITS:
+                        this += s.get()
+                        add_literal(int(this[1 : ], 8) & 0xFF)
+                    else:
+                        index = int(this[1 : ])
+                        if index > pattern.groups:
+                            raise error("invalid group reference: %s" % index)
+                        add_group(index)
+                else:
+                    index = int(this[1 : ])
+                    if index > pattern.groups:
+                        raise error("invalid group reference: %s" % index)
+                    add_group(index)
+            elif c == "g":
+                # group reference
+                if s.next in GROUP_DELIMITERS:
+                    # delimited group reference
+                    delimiter = s.get()
+                    name = parse_name(s, GROUP_DELIMITERS[delimiter], "group",
+                      this + delimiter)
+                elif s.next in DIGITS:
+                    # non-delimited group reference (single digit)
+                    name = s.get()
+                else:
+                    raise error("missing group name: %s" + this)
+                if name.isdigit():
                     index = int(name)
-                    if index < 0:
-                        raise error, "negative group number"
-                except ValueError:
-                    if not isname(name):
-                        raise error, "bad character in group name"
+                    if not (0 <= index <= pattern.groups):
+                        raise error("invalid group reference: %s" % index)
+                elif is_name(name):
                     try:
                         index = pattern.groupindex[name]
                     except KeyError:
-                        raise IndexError, "unknown group name"
-                a((MARK, index))
-            elif c == "0":
-                if s.next in OCTDIGITS:
-                    this = this + sget()
-                    if s.next in OCTDIGITS:
-                        this = this + sget()
-                literal(makechar(int(this[1:], 8) & 0xff))
-            elif c in DIGITS:
-                isoctal = False
-                if s.next in DIGITS:
-                    this = this + sget()
-                    if (c in OCTDIGITS and this[2] in OCTDIGITS and
-                        s.next in OCTDIGITS):
-                        this = this + sget()
-                        isoctal = True
-                        literal(makechar(int(this[1:], 8) & 0xff))
-                if not isoctal:
-                    a((MARK, int(this[1:])))
-            else:
+                        raise error("invalid group reference: %s" % name)
+                else:
+                    raise error("bad group name: %s" % name)
+                add_group(index)
+            elif c == "k":
+                # named group reference
+                if s.next in GROUP_DELIMITERS:
+                    # delimited group reference
+                    delimiter = s.get()
+                    name = parse_name(s, GROUP_DELIMITERS[delimiter], "group",
+                      this + delimiter)
+                else:
+                    # non-delimited group reference; invalid for \k
+                    raise error("missing group name: %s" + this)
+                if is_name(name):
+                    try:
+                        index = pattern.groupindex[name]
+                    except KeyError:
+                        raise error("invalid group reference: %s" % name)
+                else:
+                    raise error("bad group name: %s" % name)
+                add_group(index)
+            elif c == "N":
+                # named character
+                if not s.match("{"):
+                    raise error("missing character name: %s" + this)
+                name = parse_name(s, "}", "character", this + "{")
                 try:
-                    this = makechar(ESCAPES[this][1])
+                    add_literal(ord(unicodedata.lookup(name)))
                 except KeyError:
-                    pass
-                literal(this)
+                    raise error("bad character name: %s" % name)
+            else:
+                try:
+                    add_literal(ESCAPES[this][1])
+                except KeyError:
+                    add_literal(ord(this[0]))
+                    add_literal(ord(this[1]))
         else:
-            literal(this)
-    # convert template to groups and literals lists
-    i = 0
-    groups = []
-    groupsappend = groups.append
-    literals = [None] * len(p)
-    for c, s in p:
-        if c is MARK:
-            groupsappend((i, s))
-            # literal[i] is already None
-        else:
-            literals[i] = s
-        i = i + 1
-    return groups, literals
-
-def expand_template(template, match):
-    g = match.group
-    sep = match.string[:0]
-    groups, literals = template
+            add_literal(ord(this))
+    flush_literal()
+    return literals, groups
+
+def expand_template(template, match, unmatched_as_empty=False):
+    g = match._internal_group
+    sep = match.string[ : 0]
+    literals, groups = template
     literals = literals[:]
     try:
-        for index, group in groups:
-            literals[index] = s = g(group)
+        for index, pos in groups:
+            s = g(index)
             if s is None:
-                raise error, "unmatched group"
+                if unmatched_as_empty:
+                    s = sep
+                else:
+                    raise error("unmatched group")
+            literals[pos] = s
     except IndexError:
-        raise error, "invalid group reference"
+        raise error("invalid group reference: %s" % a)
     return sep.join(literals)
+
+def _validate_scanner(pattern):
+    # checks that the scanner pattern doesn't have any extra capture groups
+    # or group references
+    # (actually, plain capture groups are turned into non-capture groups)
+
+    # pattern must be an alternative
+    if len(pattern) != 1 or pattern[0][0] != OP.BRANCH:
+        raise error("invalid scanner pattern")
+
+    # each alternative must be a capture group
+    items = pattern[0][1][1]
+    for index, item in enumerate(items, start=1):
+        if len(item) != 1 or item[0][0] != OP.SUBPATTERN:
+            error("invalid scanner pattern")
+
+        # ensure that the capture groups are numbered consecutively
+        op, av = item[0]
+        marks, subitems = av
+        marks = [index, index]
+        av = marks, _validate_scanner_items(subitems)
+        item[0] = op, av
+
+def _validate_scanner_items(items):
+    # validates the scanner items
+    new_items = []
+    for item in items:
+        op, av = item
+        if op in (OP.ASSERT, OP.ASSERT_NOT, OP.ATOMIC, OP.BRANCH):
+            av = av[0], _validate_scanner_items(av[1])
+            item = op, av
+        elif op in (OP.GROUPREF, OP.GROUPREF_EXISTS, OP.GROUPREF_IGNORE):
+            # reject group references
+            error("capture group reference in scanner pattern")
+        elif op == OP.SUBPATTERN:
+            # turn plain capture groups onto non-capture groups and
+            # reject named capture groups
+            marks, subitems = av
+            if marks:
+                # it's a capture group
+                if marks[0] != marks[1]:
+                    # it's a named capture group
+                    error("capture group in scanner pattern")
+                marks = None
+            subitems = _validate_scanner_items(subitems)
+            if not marks and len(subitems) == 1:
+                # it's a non-capture group containing one item, so promote it
+                op, av = subitems[0]
+            else:
+                av = marks, subitems
+            item = op, av
+        new_items.append(item)
+    return new_items
=== modified file Lib/re.py
--- Lib/re.py 2009-01-01 12:00:19 +0000
+++ Lib/re.py 2009-03-05 21:30:48 +0000
@@ -27,52 +27,86 @@
 concatenate ordinary characters, so last matches the string 'last'.
 
 The special characters are:
-    "."      Matches any character except a newline.
-    "^"      Matches the start of the string.
-    "$"      Matches the end of the string or just before the newline at
-             the end of the string.
-    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
-             Greedy means that it will match as many repetitions as possible.
-    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
-    "?"      Matches 0 or 1 (greedy) of the preceding RE.
-    *?,+?,?? Non-greedy versions of the previous three special characters.
-    {m,n}    Matches from m to n repetitions of the preceding RE.
-    {m,n}?   Non-greedy version of the above.
-    "\\"     Either escapes special characters or signals a special sequence.
-    []       Indicates a set of characters.
-             A "^" as the first character indicates a complementing set.
-    "|"      A|B, creates an RE that will match either A or B.
-    (...)    Matches the RE inside the parentheses.
-             The contents can be retrieved or matched later in the string.
-    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
-    (?:...)  Non-grouping version of regular parentheses.
-    (?P<name>...) The substring matched by the group is accessible by name.
-    (?P=name)     Matches the text matched earlier by the group named name.
-    (?#...)  A comment; ignored.
-    (?=...)  Matches if ... matches next, but doesn't consume the string.
-    (?!...)  Matches if ... doesn't match next.
-    (?<=...) Matches if preceded by ... (must be fixed length).
-    (?<!...) Matches if not preceded by ... (must be fixed length).
-    (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,
-                       the (optional) no pattern otherwise.
+    "."                Matches any character except a newline.
+    "^"                Matches the start of the string.
+    "$"                Matches the end of the string or just before the
+                       newline at the end of the string.
+    "*"                Matches 0 or more (greedy) repetitions of the
+                       preceding RE. Greedy means that it will match as
+                       many repetitions as possible.
+    "+"                Matches 1 or more (greedy) repetitions of the
+                       preceding RE.
+    "?"                Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,??           Non-greedy versions of the previous three special
+                       characters.
+    *+,++,?+           Possessive versions of the previous three special
+                       characters.
+    {m,n}              Matches from m to n repetitions of the preceding
+                       RE.
+    {m,n}?             Non-greedy version of the above.
+    {m,n}+             Possessive version of the above.
+    "\\"               Either escapes special characters or signals a
+                       special sequence.
+    []                 Indicates a set of characters. A "^" as the first
+                       character indicates a complementing set.
+    "|"                A|B, creates an RE that will match either A or B.
+    (...)              Matches the RE inside the parentheses. The contents
+                       can be retrieved or matched later in the string.
+    (?Lruz)            Set the L, R, U or Z flag for the entire RE (see
+                       below).
+    (?imsx-imsx)       Set/clear the I, M, S or X flag for the following RE
+                       (see below).
+    (?imsx-imsx:...)   Set/clear the I, M, S or X flag for the enclosed RE.
+    (?:...)            Non-capturing version of regular parentheses.
+    (?P<name>...)      The substring matched by the group is accessible by
+                       name.
+    (?<name>...)       The substring matched by the group is accessible by
+                       name.
+    (?#...)            A comment; ignored.
+    (?>...)            Atomic group. Like (?:...) but won't retry the RE
+                       within the parentheses.
+    (?=...)            Matches if ... matches next, but doesn't consume
+                       the string.
+    (?!...)            Matches if ... doesn't match next.
+    (?<=...)           Matches if preceded by ....
+    (?<!...)           Matches if not preceded by ....
+    (?(id/name)yes|no) Matches yes pattern if the group with id/name
+                       matched, the (optional) no pattern otherwise.
+    (?|...|...)        Group numbers are re-used between the alternatives.
 
 The special sequences consist of "\\" and a character from the list
 below.  If the ordinary character is not on the list, then the
 resulting RE will match the second character.
-    \number  Matches the contents of the group of the same number.
-    \A       Matches only at the start of the string.
-    \Z       Matches only at the end of the string.
-    \b       Matches the empty string, but only at the start or end of a word.
-    \B       Matches the empty string, but not at the start or end of a word.
-    \d       Matches any decimal digit; equivalent to the set [0-9].
-    \D       Matches any non-digit character; equivalent to the set [^0-9].
-    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
-    \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
-    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
-             With LOCALE, it will match the set [0-9_] plus characters defined
-             as letters for the current locale.
-    \W       Matches the complement of \w.
-    \\       Matches a literal backslash.
+    \number     Matches the contents of the group of the same number.
+    \A          Matches only at the start of the string.
+    \b          Matches the empty string, but only at the start or end of
+                a word.
+    \B          Matches the empty string, but not at the start or end of a
+                word.
+    \d          Matches any decimal digit; equivalent to the set [0-9].
+    \D          Matches any non-digit character; equivalent to the set
+                [^0-9].
+    \g<name>    Matches the text matched by the group named name.
+    \g<number>  Matches the contents of the group of the same number.
+    \g<+number> Matches the contents of the group of the relative number.
+    \g<-number> Matches the contents of the group of the relative number.
+    \G          Matches the empty string, but only at the place where the
+                previous match ended.
+    \k<name>    Matches the text matched earlier by the group named name.
+    \N{name}    Matches named Unicode character.
+    \p{name}    Matches any character having the named property.
+    \P{name}    Matches any character not having the named property.
+    \s          Matches any whitespace character; equivalent to
+                [ \t\n\r\f\v].
+    \S          Matches any non-whitespace character; equivalent to
+                [^ \t\n\r\f\v].
+    \w          Matches any alphanumeric character; equivalent to
+                [a-zA-Z0-9_]. With LOCALE, it will match the set
+                [0-9_] plus characters defined as letters for the current
+                locale.
+    \W          Matches the complement of \w.
+    \Z          Matches only at the end of the string.
+    \\          Matches a literal backslash.
 
 This module exports the following functions:
     match    Match a regular expression pattern to the beginning of a string.
@@ -84,18 +118,20 @@
     finditer Return an iterator yielding a match object for each match.
     compile  Compile a pattern into a RegexObject.
     purge    Clear the regular expression cache.
-    escape   Backslash all non-alphanumerics in a string.
+    escape   Backslash all non-alphanumerics and underscores in a string.
 
 Some of the functions in this module takes flags as optional parameters:
-    I  IGNORECASE  Perform case-insensitive matching.
-    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
-    M  MULTILINE   "^" matches the beginning of lines (after a newline)
-                   as well as the string.
-                   "$" matches the end of lines (before a newline) as well
-                   as the end of the string.
-    S  DOTALL      "." matches any character at all, including the newline.
-    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
-    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
+    I  IGNORECASE Perform case-insensitive matching.
+    L  LOCALE     Make \w, \W, \b, \B, dependent on the current locale.
+    M  MULTILINE  "^" matches the beginning of lines (after a newline) as
+                  well as the string.
+                  "$" matches the end of lines (before a newline) as well
+                  as the end of the string.
+    R  REVERSE    Search backwards, from the end to the start.
+    S  DOTALL     "." matches any character at all, including the newline.
+    X  VERBOSE    Ignore whitespace and comments for nicer looking RE's.
+    U  UNICODE    Make \w, \W, \b, \B, dependent on the Unicode locale.
+    Z  ZEROWIDTH  Permit splitting on zero-width separators.
 
 This module also defines an exception 'error'.
 
@@ -109,24 +145,27 @@
 __all__ = [ "match", "search", "sub", "subn", "split", "findall",
     "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
     "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
-    "UNICODE", "error" ]
-
-__version__ = "2.2.1"
+    "UNICODE", "REVERSE", "error" ]
+
+__version__ = "2.2.2"
 
 # flags
 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
+M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
+R = REVERSE = sre_compile.SRE_FLAG_REVERSE # search backwards
+S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
 U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
-M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
-S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
-
+Z = ZEROWIDTH = sre_compile.SRE_FLAG_ZEROWIDTH # permit splitting on zero-width
+                                               # separators.
 # sre extensions (experimental, don't rely on these)
 T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
 DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
 
 # sre exception
-error = sre_compile.error
+class error(Exception):
+    pass
 
 # --------------------------------------------------------------------
 # public interface
@@ -141,16 +180,16 @@
     a match object, or None if no match was found."""
     return _compile(pattern, flags).search(string)
 
-def sub(pattern, repl, string, count=0):
+def sub(pattern, repl, string, count=0, flags=0):
     """Return the string obtained by replacing the leftmost
     non-overlapping occurrences of the pattern in string by the
     replacement repl.  repl can be either a string or a callable;
     if a string, backslash escapes in it are processed.  If it is
     a callable, it's passed the match object and must return
     a replacement string to be used."""
-    return _compile(pattern, 0).sub(repl, string, count)
-
-def subn(pattern, repl, string, count=0):
+    return _compile(pattern, flags).sub(repl, string, count)
+
+def subn(pattern, repl, string, count=0, flags=0):
     """Return a 2-tuple containing (new_string, number).
     new_string is the string obtained by replacing the leftmost
     non-overlapping occurrences of the pattern in the source
@@ -159,12 +198,12 @@
     callable; if a string, backslash escapes in it are processed.
     If it is a callable, it's passed the match object and must
     return a replacement string to be used."""
-    return _compile(pattern, 0).subn(repl, string, count)
-
-def split(pattern, string, maxsplit=0):
+    return _compile(pattern, flags).subn(repl, string, count)
+
+def split(pattern, string, maxsplit=0, flags=0):
     """Split the source string by the occurrences of the pattern,
     returning a list containing the resulting substrings."""
-    return _compile(pattern, 0).split(string, maxsplit)
+    return _compile(pattern, flags).split(string, maxsplit)
 
 def findall(pattern, string, flags=0):
     """Return a list of all non-overlapping matches in the string.
@@ -198,23 +237,18 @@
     "Compile a template pattern, returning a pattern object"
     return _compile(pattern, flags|T)
 
-_alphanum = {}
-for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
-    _alphanum[c] = 1
-del c
+_nonescaped = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
 
 def escape(pattern):
     "Escape all non-alphanumeric characters in pattern."
     s = list(pattern)
-    alphanum = _alphanum
-    for i in range(len(pattern)):
-        c = pattern[i]
-        if c not in alphanum:
+    for i, c in enumerate(s):
+        if c not in _nonescaped:
             if c == "\000":
                 s[i] = "\\000"
             else:
                 s[i] = "\\" + c
-    return pattern[:0].join(s)
+    return pattern[ : 0].join(s)
 
 # --------------------------------------------------------------------
 # internals
@@ -224,7 +258,7 @@
 
 _pattern_type = type(sre_compile.compile("", 0))
 
-_MAXCACHE = 100
+_MAXCACHE = 256
 
 def _compile(*key):
     # internal: compile pattern
@@ -237,12 +271,12 @@
         if flags:
             raise ValueError('Cannot process flags argument with a compiled pattern')
         return pattern
-    if not sre_compile.isstring(pattern):
-        raise TypeError, "first argument must be string or compiled pattern"
+    if not isinstance(pattern, (str, unicode)):
+        raise TypeError("First argument must be string or compiled pattern")
     try:
         p = sre_compile.compile(pattern, flags)
-    except error, v:
-        raise error, v # invalid expression
+    except sre_compile.error, v:
+        raise error(v) # invalid expression
     if len(_cache) >= _MAXCACHE:
         _cache.clear()
     _cache[cachekey] = p
@@ -256,8 +290,8 @@
     repl, pattern = key
     try:
         p = sre_parse.parse_template(repl, pattern)
-    except error, v:
-        raise error, v # invalid expression
+    except sre_compile.error, v:
+        raise error(v) # invalid expression
     if len(_cache_repl) >= _MAXCACHE:
         _cache_repl.clear()
     _cache_repl[key] = p
@@ -266,7 +300,7 @@
 def _expand(pattern, match, template):
     # internal: match.expand implementation hook
     template = sre_parse.parse_template(template, pattern)
-    return sre_parse.expand_template(template, match)
+    return sre_parse.expand_template(template, match, True)
 
 def _subx(pattern, template):
     # internal: pattern.sub/subn implementation helper
@@ -275,7 +309,7 @@
         # literal replacement
         return template[1][0]
     def filter(match, template=template):
-        return sre_parse.expand_template(template, match)
+        return sre_parse.expand_template(template, match, True)
     return filter
 
 # register myself for pickling
@@ -292,36 +326,48 @@
 
 class Scanner:
     def __init__(self, lexicon, flags=0):
-        from sre_constants import BRANCH, SUBPATTERN
         self.lexicon = lexicon
         # combine phrases into a compound pattern
-        p = []
-        s = sre_parse.Pattern()
-        s.flags = flags
-        for phrase, action in lexicon:
-            p.append(sre_parse.SubPattern(s, [
-                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
-                ]))
-        s.groups = len(p)+1
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        self.scanner = sre_compile.compile(p)
+        string_type = type(lexicon[0][0])
+        sep, template = string_type("|"), string_type("(%s)")
+        regex = sep.join(template % phrase for phrase, action in lexicon)
+        # compile pattern, specifying that it's for a scanner, which is
+        # an alternation of capture groups with no other capture groups
+        self.scanner = sre_compile.compile(regex, flags, scanner=1)
     def scan(self, string):
         result = []
-        append = result.append
         match = self.scanner.scanner(string).match
-        i = 0
-        while 1:
+        pos = 0
+        while True:
             m = match()
             if not m:
                 break
-            j = m.end()
-            if i == j:
+            end_pos = m.end()
+            if pos == end_pos:
                 break
-            action = self.lexicon[m.lastindex-1][1]
+            action = self.lexicon[m.lastindex - 1][1]
             if hasattr(action, '__call__'):
                 self.match = m
                 action = action(self, m.group())
             if action is not None:
-                append(action)
-            i = j
-        return result, string[i:]
+                result.append(action)
+            pos = end_pos
+        return result, string[pos : ]
+    def scaniter(self, string):
+        match = self.scanner.scanner(string).match
+        pos = 0
+        while True:
+            m = match()
+            if not m:
+                break
+            end_pos = m.end()
+            if pos == end_pos:
+                break
+            action = self.lexicon[m.lastindex - 1][1]
+            if hasattr(action, '__call__'):
+                self.match = m
+                action = action(self, m.group())
+            if action is not None:
+                yield action
+            pos = end_pos
+        self.string = string[pos : ]
=== modified file Lib/test/re_tests.py
--- Lib/test/re_tests.py 2003-04-20 07:35:44 +0000
+++ Lib/test/re_tests.py 2009-02-03 18:18:47 +0000
@@ -87,7 +87,7 @@
     (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
     # NOTE: not an error under PCRE/PRE:
     # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
-    (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
+    (r'\c\e\h\i\j\m\q\y\z', 'cehijmqyz', SUCCEED, 'found', 'cehijmqyz'),
     (r'\xff', '\377', SUCCEED, 'found', chr(255)),
     # new \x semantics
     (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
@@ -106,8 +106,8 @@
     ('a.*b', 'acc\nccb', FAIL),
     ('a.{4,5}b', 'acc\nccb', FAIL),
     ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
-    ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
-    ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+    ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+    ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
     ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
     ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
 
@@ -563,7 +563,7 @@
     # Check odd placement of embedded pattern modifiers
 
     # not an error under PCRE/PRE:
-    ('w(?i)', 'W', SUCCEED, 'found', 'W'),
+    ('(?i)w', 'W', SUCCEED, 'found', 'W'),
     # ('w(?i)', 'W', SYNTAX_ERROR),
 
     # Comments using the x embedded pattern modifier
@@ -607,8 +607,8 @@
     # new \x semantics
     (r'\x00ff', '\377', FAIL),
     # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
-    (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
-    ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
+    (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
+    ('\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
     (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
     (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
 
@@ -627,7 +627,7 @@
     # bug 114033: nothing to repeat
     (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
     # bug 115040: rescan if flags are modified inside pattern
-    (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
+    (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
     # bug 115618: negative lookahead
     (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
     # bug 116251: character class bug
=== modified file Lib/test/test_re.py
--- Lib/test/test_re.py 2008-09-10 14:27:00 +0000
+++ Lib/test/test_re.py 2009-02-28 23:45:38 +0000
@@ -6,6 +6,7 @@
 from re import Scanner
 import sys, os, traceback
 from weakref import proxy
+import unicodedata
 
 # Misc tests from Tim Peters' re.doc
 
@@ -181,9 +182,9 @@
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
-        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
-        self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
+        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', '\g<b>', 'xx'), '')
+        self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', '\\2', 'xx'), '')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
 
     def test_re_subn(self):
@@ -208,6 +209,7 @@
                           None, '::', 'c'])
         self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
                          ['', 'a', '', '', 'c'])
+        self.assertEqual(re.split("(?z):*", ":a:b::c"), ['', 'a', 'b', 'c', ''])
 
     def test_qualified_re_split(self):
         self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
@@ -685,6 +687,102 @@
         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
 
+    def test_atomic(self):
+        pattern = re.compile(r'a(?>bc|b)c')
+        self.assertEqual(pattern.match('abc'), None)
+        self.assertNotEqual(pattern.match('abcc'), None)
+        self.assertEqual(re.match(r'(?>.*).', 'abc'), None)
+        self.assertNotEqual(re.match(r'(?>x)++', 'xxx'), None)
+        self.assertNotEqual(re.match(r'(?>x++)', 'xxx'), None)
+        self.assertEqual(re.match(r'(?>x)++x', 'xxx'), None)
+        self.assertEqual(re.match(r'(?>x++)x', 'xxx'), None)
+
+    def test_bug_2537(self):
+        "nested repeat"
+        self.assertEqual(re.sub('((x|y)*)*', '(\\1, \\2)', 'xyyzy', 1), '(, y)zy')
+        self.assertEqual(re.sub('((x|y+)*)*', '(\\1, \\2)', 'xyyzy', 1), '(, yy)zy')
+
+    def test_named_groups(self):
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<b>b)", "a").groups(), ('a', None))
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<b>b)", "a").groupdict(), {'a': 'a', 'b': None})
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<b>b)", "b").groups(), (None, 'b'))
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<b>b)", "b").groupdict(), {'a': None, 'b': 'b'})
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<a>b)", "a").groups(), ('a', None))
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<a>b)", "a").groupdict(), {'a': 'a'})
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<a>b)", "b").groups(), (None, 'b'))
+        self.assertEqual(re.match(r"(?P<a>a)|(?P<a>b)", "b").groupdict(), {'a': 'b'})
+
+    def test_duplicate_groups(self):
+        self.assertEqual(re.match(r"(?:(a)|(b))", "a").groups(), ('a', None))
+        self.assertEqual(re.match(r"(?|(a)|(b))", "a").groups(), ('a',))
+
+    def test_search_anchor(self):
+        self.assertEqual(re.findall(r"\w", "abc def"), ['a', 'b', 'c', 'd', 'e', 'f'])
+        self.assertEqual(re.findall(r"\G\w", "abc def"), ['a', 'b', 'c'])
+        self.assertEqual(re.findall(r"\G\w", " abc def"), [])
+
+    def test_word_chars(self):
+        word_chars, all_chars = [], []
+        accept_set = set(['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'No', 'Mc', 'Me', 'Mn', 'Pc'])
+        for i in range(sys.maxunicode):
+            c = unichr(i)
+            if c == '_' or unicodedata.category(c) in accept_set:
+                word_chars.append(c)
+            all_chars.append(c)
+        word_chars = u''.join(word_chars)
+        found_chars = u''.join(re.findall(r'(?u)(\w)', u''.join(all_chars)))
+        self.assertEqual(found_chars, word_chars)
+
+    def test_digit_chars(self):
+        digit_chars, all_chars = [], []
+        accept_set = set(['Nd'])
+        for i in range(sys.maxunicode):
+            c = unichr(i)
+            if unicodedata.category(c) in accept_set:
+                digit_chars.append(c)
+            all_chars.append(c)
+        digit_chars = u''.join(digit_chars)
+        found_chars = u''.join(re.findall(r'(?u)(\d)', u''.join(all_chars)))
+        self.assertEqual(found_chars, digit_chars)
+
+    def test_named_chars(self):
+        self.assertNotEqual(re.match(r"\N{LATIN CAPITAL LETTER A}", u"A"), None)
+        self.assertNotEqual(re.match(r"[\N{LATIN CAPITAL LETTER A}]", u"A"), None)
+        self.assertEqual(re.match(r"\N{LATIN CAPITAL LETTER A}", u"B"), None)
+        self.assertEqual(re.match(r"[\N{LATIN CAPITAL LETTER A}]", u"a"), None)
+
+    def test_unicode_properties(self):
+        self.assertNotEqual(re.match(r"\p{Lu}", u"A"), None)
+        self.assertEqual(re.match(r"\p{Lu}", u"a"), None)
+        self.assertNotEqual(re.match(r"\p{L&}", u"A"), None)
+        
+        ascii_chars = "".join(chr(c) for c in range(0x0, 0x80))
+        charsets = r"""
+\p{Alnum}   [\p{L&}\p{Nd}]          [a-zA-Z0-9]
+\p{Alpha}   \p{L&}                  [a-zA-Z]
+\p{ASCII}                           [\x00-\x7F]
+\p{Blank}   [\p{Zs}\t]              [ \t]
+\p{Cntrl}   \p{Cc}                  [\x00-\x1F\x7F]
+\p{Digit}   \p{Nd}              \d  [0-9]
+\p{Graph}   [^\p{Z}\p{C}]           [\x21-\x7E]
+\p{Lower}   \p{Ll}                  [a-z]  
+\p{Print}   \P{C}                   [\x20-\x7E]  
+\p{Punct}   [\p{P}\p{S}]            [!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]  
+\p{Space}   [\p{Z}\t\r\n\v\f]   \s  [ \t\r\n\v\f]  
+\p{Upper}   \p{Lu}                  [A-Z]  
+            [\p{L}\p{N}\p{Pc}]  \w  [A-Za-z0-9_]  
+\p{XDigit}                          [A-Fa-f0-9]
+"""
+        for line in charsets.splitlines():
+            parts = [p.strip() for p in line.split("  ")]
+            parts = [p for p in parts if p]
+            if parts:
+                matched = [re.findall(p, ascii_chars, re.U) for p in parts]
+                self.assertEqual(self.all_same(matched), True)
+
+    def all_same(self, items):
+        first = items[0]
+        return all(i == first for i in items[1 : ])
 
 def run_re_tests():
     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
=== modified file Modules/_sre.c
--- Modules/_sre.c 2008-09-10 14:27:00 +0000
+++ Modules/_sre.c 2009-03-07 02:44:17 +0000
@@ -4,24 +4,25 @@
  * regular expression matching engine
  *
  * partial history:
- * 1999-10-24 fl  created (based on existing template matcher code)
- * 2000-03-06 fl  first alpha, sort of
- * 2000-08-01 fl  fixes for 1.6b1
- * 2000-08-07 fl  use PyOS_CheckStack() if available
- * 2000-09-20 fl  added expand method
- * 2001-03-20 fl  lots of fixes for 2.1b2
- * 2001-04-15 fl  export copyright as Python attribute, not global
- * 2001-04-28 fl  added __copy__ methods (work in progress)
- * 2001-05-14 fl  fixes for 1.5.2 compatibility
- * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
- * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
- * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
- * 2001-10-21 fl  added sub/subn primitive
- * 2001-10-24 fl  added finditer primitive (for 2.2 only)
- * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
- * 2002-11-09 fl  fixed empty sub/subn return type
- * 2003-04-18 mvl fully support 4-byte codes
- * 2003-10-17 gn  implemented non recursive scheme
+ * 1999-10-24 fl   created (based on existing template matcher code)
+ * 2000-03-06 fl   first alpha, sort of
+ * 2000-08-01 fl   fixes for 1.6b1
+ * 2000-08-07 fl   use PyOS_CheckStack() if available
+ * 2000-09-20 fl   added expand method
+ * 2001-03-20 fl   lots of fixes for 2.1b2
+ * 2001-04-15 fl   export copyright as Python attribute, not global
+ * 2001-04-28 fl   added __copy__ methods (work in progress)
+ * 2001-05-14 fl   fixes for 1.5.2 compatibility
+ * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
+ * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
+ * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
+ * 2001-10-21 fl   added sub/subn primitive
+ * 2001-10-24 fl   added finditer primitive (for 2.2 only)
+ * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
+ * 2002-11-09 fl   fixed empty sub/subn return type
+ * 2003-04-18 mvl  fully support 4-byte codes
+ * 2003-10-17 gn   implemented non recursive scheme
+ * 2008-09-21 mrab major reworking
  *
  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  *
@@ -37,9 +38,7 @@
 #ifndef SRE_RECURSIVE
 
 static char copyright[] =
-    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
-
-#define PY_SSIZE_T_CLEAN
+  " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
 
 #include "Python.h"
 #include "structmember.h" /* offsetof */
@@ -55,11 +54,11 @@
 
 #define SRE_PY_MODULE "re"
 
-/* defining this one enables tracing */
-#undef VERBOSE
+/* uncomment this define to enable tracing */
+/* #define VERBOSE_SRE_ENGINE */
 
 #if PY_VERSION_HEX >= 0x01060000
-#if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
+#if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
 /* defining this enables unicode support (default under 1.6a1 and later) */
 #define HAVE_UNICODE
 #endif
@@ -67,9 +66,6 @@
 
 /* -------------------------------------------------------------------- */
 /* optional features */
-
-/* enables fast searching */
-#define USE_FAST_SEARCH
 
 /* enables aggressive inlining (always on for Visual C) */
 #undef USE_INLINE
@@ -95,13 +91,13 @@
 #endif
 
 /* error codes */
-#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
-#define SRE_ERROR_STATE -2 /* illegal state */
+#define SRE_ERROR_ILLEGAL -1         /* illegal opcode */
+#define SRE_ERROR_STATE -2           /* illegal state */
 #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
-#define SRE_ERROR_MEMORY -9 /* out of memory */
-#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
-
-#if defined(VERBOSE)
+#define SRE_ERROR_MEMORY -9          /* out of memory */
+#define SRE_ERROR_INTERRUPTED -10    /* signal handler raised exception */
+
+#if defined(VERBOSE_SRE_ENGINE)
 #define TRACE(v) printf v
 #else
 #define TRACE(v)
@@ -110,219 +106,586 @@
 /* -------------------------------------------------------------------- */
 /* search engine state */
 
-/* default character predicates (run sre_chars.py to regenerate tables) */
-
-#define SRE_DIGIT_MASK 1
-#define SRE_SPACE_MASK 2
-#define SRE_LINEBREAK_MASK 4
-#define SRE_ALNUM_MASK 8
-#define SRE_WORD_MASK 16
-
-/* FIXME: this assumes ASCII.  create tables in init_sre() instead */
-
-static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
-25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
-0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
-
-static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
-10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
-61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
-108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
-122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
-106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-120, 121, 122, 123, 124, 125, 126, 127 };
-
-#define SRE_IS_DIGIT(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
-#define SRE_IS_SPACE(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
-#define SRE_IS_LINEBREAK(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
-#define SRE_IS_ALNUM(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
-#define SRE_IS_WORD(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
-
-static unsigned int sre_lower(unsigned int ch)
-{
-    return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
-}
-
-/* locale-specific character predicates */
-/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
- * warnings when c's type supports only numbers < N+1 */
-#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
-#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
-#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
-#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
-#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
-
-static unsigned int sre_lower_locale(unsigned int ch)
-{
-    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
-}
-
-/* unicode-specific character predicates */
-
-#if defined(HAVE_UNICODE)
-
-#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
-#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
-#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
-#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
-#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
-
-static unsigned int sre_lower_unicode(unsigned int ch)
-{
-    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
-}
-
-#endif
-
-LOCAL(int)
-sre_category(SRE_CODE category, unsigned int ch)
-{
+/*
+ The following structs and function are copied from unicodedata.c.
+
+ Ideally the functionality would be available directly from that 'module'.
+ */
+
+typedef struct {
+    const unsigned char category;         /* index into
+                                             PyUnicode_CategoryNames */
+    const unsigned char combining;        /* combining class value 0 - 255 */
+    const unsigned char bidirectional;    /* index into
+                                             PyUnicode_BidirectionalNames */
+    const unsigned char mirrored;         /* true if mirrored in bidir mode */
+    const unsigned char east_asian_width; /* index into
+                                             PyUnicode_EastAsianWidth */
+} _PyUnicode_DatabaseRecord;
+
+typedef struct change_record {
+    const unsigned char bidir_changed;
+    const unsigned char category_changed;
+    const unsigned char decimal_changed;
+    const unsigned char mirrored_changed;
+    const int numeric_changed;
+} change_record;
+
+#include "unicodedata_db.h"
+
+static const unsigned char get_unicode_category(Py_UCS4 code) {
+    int index;
+    if (code >= 0x110000)
+        index = 0;
+    else {
+        index = index1[(code >> SHIFT)];
+        index = index2[(index << SHIFT) + (code & ((1 << SHIFT) - 1))];
+    }
+
+    return _PyUnicode_Database_Records[index].category;
+}
+
+/* ASCII-specific */
+
+/* The maximum ASCII character. */
+#define SRE_ASCII_MAX 0x7F
+
+/* Bit-masks for the character categories. */
+#define SRE_BLANK_MASK 0x001
+#define SRE_DIGIT_MASK 0x002
+#define SRE_GRAPH_MASK 0x004
+#define SRE_LOWER_MASK 0x008
+#define SRE_PRINT_MASK 0x010
+#define SRE_PUNCT_MASK 0x020
+#define SRE_UNDERSCORE_MASK 0x040
+#define SRE_UPPER_MASK 0x080
+#define SRE_XDIGIT_MASK 0x100
+#define SRE_WHITESPACE_MASK 0x200
+
+#define SRE_ALPHA_MASK (SRE_LOWER_MASK | SRE_UPPER_MASK)
+
+/* The categories of the characters. */
+static short sre_ascii_info[SRE_ASCII_MAX + 1] = {
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x201, 0x200, 0x200, 0x200, 0x200, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000, 0x000,
+    0x000, 0x000, 0x000, 0x000, 0x200, 0x200, 0x200, 0x200,
+    0x211, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034,
+    0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034,
+    0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x116, 0x116,
+    0x116, 0x116, 0x034, 0x034, 0x034, 0x034, 0x034, 0x034,
+    0x034, 0x194, 0x194, 0x194, 0x194, 0x194, 0x194, 0x094,
+    0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094,
+    0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094, 0x094,
+    0x094, 0x094, 0x094, 0x034, 0x034, 0x034, 0x034, 0x074,
+    0x034, 0x11C, 0x11C, 0x11C, 0x11C, 0x11C, 0x11C, 0x01C,
+    0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C,
+    0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C, 0x01C,
+    0x01C, 0x01C, 0x01C, 0x034, 0x034, 0x034, 0x034, 0x000,
+};
+
+/* Checks whether a ASCII character is in the given category. */
+static BOOL ascii_in_category(SRE_CODE category, Py_UCS4 ch) {
+    if (ch > SRE_ASCII_MAX)
+        /* Not ASCII. */
+        return FALSE;
+
+    switch(category) {
+    case SRE_CAT_Alnum:
+        return (sre_ascii_info[ch] & (SRE_DIGIT_MASK | SRE_ALPHA_MASK)) != 0;
+    case SRE_CAT_Alpha:
+        return (sre_ascii_info[ch] & SRE_ALPHA_MASK) != 0;
+    case SRE_CAT_ASCII:
+        return TRUE;
+    case SRE_CAT_Blank:
+        return (sre_ascii_info[ch] & SRE_BLANK_MASK) != 0;
+    case SRE_CAT_Cntrl:
+        return (sre_ascii_info[ch] & SRE_PRINT_MASK) == 0;
+    case SRE_CAT_Digit:
+        return (sre_ascii_info[ch] & SRE_DIGIT_MASK) != 0;
+    case SRE_CAT_Graph:
+        return (sre_ascii_info[ch] & SRE_GRAPH_MASK) != 0;
+    case SRE_CAT_LineBreak:
+        return ch == '\n';
+    case SRE_CAT_Lower:
+        return (sre_ascii_info[ch] & SRE_LOWER_MASK) != 0;
+    case SRE_CAT_Print:
+        return (sre_ascii_info[ch] & SRE_PRINT_MASK) != 0;
+    case SRE_CAT_Punct:
+        return (sre_ascii_info[ch] & SRE_PUNCT_MASK) != 0;
+    case SRE_CAT_Space:
+        return (sre_ascii_info[ch] & SRE_WHITESPACE_MASK) != 0;
+    case SRE_CAT_Upper:
+        return (sre_ascii_info[ch] & SRE_UPPER_MASK) != 0;
+    case SRE_CAT_Word:
+        return (sre_ascii_info[ch] &
+          (SRE_DIGIT_MASK | SRE_ALPHA_MASK | SRE_UNDERSCORE_MASK)) != 0;
+    case SRE_CAT_XDigit:
+        return (sre_ascii_info[ch] & SRE_XDIGIT_MASK) != 0;
+    default:
+        /* Not a known category for ASCII. */
+        return FALSE;
+    }
+}
+
+/* Converts an ASCII character to lowercase. */
+static Py_UCS4 ascii_lower(Py_UCS4 ch) {
+    if (ch <= SRE_ASCII_MAX && (sre_ascii_info[ch] & SRE_UPPER_MASK) != 0)
+        /* The character is ASCII and uppercase. */
+        return ch ^ 0x20;
+
+    return ch;
+}
+
+/* Converts an ASCII character to uppercase. */
+static Py_UCS4 ascii_upper(Py_UCS4 ch) {
+    if (ch <= SRE_ASCII_MAX && (sre_ascii_info[ch] & SRE_LOWER_MASK) != 0)
+        /* The character is ASCII and lowercase. */
+        return ch ^ 0x20;
+
+    return ch;
+}
+
+/* The handlers for ASCII characters. */
+static SRE_ENCODING_TABLE ascii_encoding = {
+    ascii_in_category,
+    ascii_lower,
+    ascii_upper,
+    ascii_upper, /* Titlecase for ASCII is the same as uppercase. */
+};
+
+/* Locale-specific */
+
+/* The maximum locale character. */
+#define SRE_LOC_MAX 0xFF
+
+/* Checks whether a locale character is in the given category. */
+static BOOL loc_in_category(SRE_CODE category, Py_UCS4 ch) {
+    if (ch > SRE_LOC_MAX)
+        return FALSE;
+
     switch (category) {
-
-    case SRE_CATEGORY_DIGIT:
-        return SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_NOT_DIGIT:
-        return !SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_SPACE:
-        return SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_NOT_SPACE:
-        return !SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_WORD:
-        return SRE_IS_WORD(ch);
-    case SRE_CATEGORY_NOT_WORD:
-        return !SRE_IS_WORD(ch);
-    case SRE_CATEGORY_LINEBREAK:
-        return SRE_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_NOT_LINEBREAK:
-        return !SRE_IS_LINEBREAK(ch);
-
-    case SRE_CATEGORY_LOC_WORD:
-        return SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_LOC_NOT_WORD:
-        return !SRE_LOC_IS_WORD(ch);
-
-#if defined(HAVE_UNICODE)
-    case SRE_CATEGORY_UNI_DIGIT:
-        return SRE_UNI_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_NOT_DIGIT:
-        return !SRE_UNI_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_SPACE:
-        return SRE_UNI_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_NOT_SPACE:
-        return !SRE_UNI_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_WORD:
-        return SRE_UNI_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_NOT_WORD:
-        return !SRE_UNI_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_LINEBREAK:
-        return SRE_UNI_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-        return !SRE_UNI_IS_LINEBREAK(ch);
-#else
-    case SRE_CATEGORY_UNI_DIGIT:
-        return SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_NOT_DIGIT:
-        return !SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_SPACE:
-        return SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_NOT_SPACE:
-        return !SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_WORD:
-        return SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_NOT_WORD:
-        return !SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_LINEBREAK:
-        return SRE_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-        return !SRE_IS_LINEBREAK(ch);
-#endif
-    }
-    return 0;
-}
-
-/* helpers */
-
-static void
-data_stack_dealloc(SRE_STATE* state)
-{
-    if (state->data_stack) {
-        PyMem_FREE(state->data_stack);
-        state->data_stack = NULL;
-    }
-    state->data_stack_size = state->data_stack_base = 0;
-}
-
-static int
-data_stack_grow(SRE_STATE* state, Py_ssize_t size)
-{
-    Py_ssize_t minsize, cursize;
-    minsize = state->data_stack_base+size;
-    cursize = state->data_stack_size;
-    if (cursize < minsize) {
-        void* stack;
-        cursize = minsize+minsize/4+1024;
-        TRACE(("allocate/grow stack %d\n", cursize));
-        stack = PyMem_REALLOC(state->data_stack, cursize);
-        if (!stack) {
-            data_stack_dealloc(state);
-            return SRE_ERROR_MEMORY;
-        }
-        state->data_stack = (char *)stack;
-        state->data_stack_size = cursize;
-    }
-    return 0;
-}
-
-/* generate 8-bit version */
+    case SRE_CAT_Alnum:
+        return isalnum(ch);
+    case SRE_CAT_Alpha:
+        return isalpha(ch);
+    case SRE_CAT_ASCII:
+        return ch <= SRE_ASCII_MAX;
+    case SRE_CAT_Blank:
+        return ch == '\t' || ch == ' ';
+    case SRE_CAT_Cntrl:
+        return !isprint(ch);
+    case SRE_CAT_Digit:
+        return isdigit(ch);
+    case SRE_CAT_Graph:
+        return isgraph(ch);
+    case SRE_CAT_LineBreak:
+        return ch == '\n';
+    case SRE_CAT_Lower:
+        return islower(ch);
+    case SRE_CAT_Print:
+        return isprint(ch);
+    case SRE_CAT_Punct:
+        return ispunct(ch);
+    case SRE_CAT_Space:
+        return isspace(ch);
+    case SRE_CAT_Upper:
+        return isupper(ch);
+    case SRE_CAT_Word:
+        return ch == '_' || isalnum(ch);
+    case SRE_CAT_XDigit:
+        return isxdigit(ch);
+    default:
+        /* Not a known category for locale. */
+        return FALSE;
+    }
+}
+
+/* Converts a locale character to lowercase. */
+static Py_UCS4 loc_lower(Py_UCS4 ch) {
+    if (ch <= SRE_LOC_MAX)
+        /* The character is locale. */
+        return (Py_UCS4)tolower(ch);
+
+    return ch;
+}
+
+/* Converts a locale character to uppercase. */
+static Py_UCS4 loc_upper(Py_UCS4 ch) {
+    if (ch <= SRE_LOC_MAX)
+        /* The character is locale. */
+        return (Py_UCS4)toupper(ch);
+
+    return ch;
+}
+
+/* The handlers for locale characters. */
+static SRE_ENCODING_TABLE locale_encoding = {
+    loc_in_category,
+    loc_lower,
+    loc_upper,
+    loc_upper, /* Titlecase for locale is the same as uppercase (probably!). */
+};
+
+/* Unicode-specific */
+
+/* Checks whether a Unicode character is in the given category. */
+static BOOL uni_in_category(SRE_CODE category, Py_UCS4 ch) {
+    unsigned char cat = get_unicode_category(ch);
+
+    /* Are we checking for a Unicode category (eg "Lu")? */
+    if (category < 0x20)
+        return cat == category;
+
+    switch (category) {
+    case SRE_UNI_CAT_L: /* Category "L&" or "L". */
+        return (SRE_CAT_MASK_L & (1 << cat)) != 0;
+    case SRE_UNI_CAT_M: /* Category "M&" or "M". */
+        return (SRE_CAT_MASK_M & (1 << cat)) != 0;
+    case SRE_UNI_CAT_N: /* Category "N&" or "N". */
+        return (SRE_CAT_MASK_N & (1 << cat)) != 0;
+    case SRE_UNI_CAT_Z: /* Category "Z&" or "Z". */
+        return (SRE_CAT_MASK_Z & (1 << cat)) != 0;
+    case SRE_UNI_CAT_C: /* Category "C&" or "C". */
+        return (SRE_CAT_MASK_C & (1 << cat)) != 0;
+    case SRE_UNI_CAT_P: /* Category "P&" or "P". */
+        return (SRE_CAT_MASK_P & (1 << cat)) != 0;
+    case SRE_UNI_CAT_S: /* Category "S&" or "S". */
+        return (SRE_CAT_MASK_S & (1 << cat)) != 0;
+    case SRE_CAT_Alnum:
+        return (SRE_CAT_MASK_Alnum & (1 << cat)) != 0;
+    case SRE_CAT_Alpha:
+        return (SRE_CAT_MASK_Alpha & (1 << cat)) != 0;
+    case SRE_CAT_ASCII:
+        return ch <= SRE_ASCII_MAX;
+    case SRE_CAT_Blank:
+        return ch == '\t' || cat == SRE_UNI_CAT_Zs;
+    case SRE_CAT_Cntrl:
+        return cat == SRE_UNI_CAT_Cc;
+    case SRE_CAT_Digit:
+        return cat == SRE_UNI_CAT_Nd;
+    case SRE_CAT_Graph:
+        return (SRE_CAT_MASK_Graph & (1 << cat)) != 0;
+    case SRE_CAT_LineBreak:
+        return ch == '\n';
+    case SRE_CAT_Lower:
+        return cat == SRE_UNI_CAT_Ll;
+    case SRE_CAT_Print:
+        return (SRE_CAT_MASK_Print & (1 << cat)) != 0;
+    case SRE_CAT_Punct:
+        return (SRE_CAT_MASK_Punct & (1 << cat)) != 0;
+    case SRE_CAT_Space:
+        return ch == '\t' || ch == '\r' || ch == '\n' || ch == '\v' ||
+          ch == '\f' || (SRE_CAT_MASK_Z & (1 << cat)) != 0;
+    case SRE_CAT_Upper:
+        return cat == SRE_UNI_CAT_Lu;
+    case SRE_CAT_Word:
+        return (SRE_CAT_MASK_Word & (1 << cat)) != 0;
+    case SRE_CAT_XDigit:
+        return ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' ||
+          ch >= 'a' && ch <= 'f';
+    default:
+        /* Not a known category for Unicode. */
+        return FALSE;
+    }
+}
+
+/* Converts a Unicode character to lowercase. */
+static Py_UCS4 uni_lower(Py_UCS4 ch) {
+    return (Py_UCS4)Py_UNICODE_TOLOWER((Py_UNICODE)ch);
+}
+
+/* Converts a Unicode character to uppercase. */
+static Py_UCS4 uni_upper(Py_UCS4 ch) {
+    return (Py_UCS4)Py_UNICODE_TOUPPER((Py_UNICODE)ch);
+}
+
+/* Converts a Unicode character to titlecase. */
+static Py_UCS4 uni_title(Py_UCS4 ch) {
+    return (Py_UCS4)Py_UNICODE_TOTITLE((Py_UNICODE)ch);
+}
+
+/* The handlers for Unicode characters. */
+static SRE_ENCODING_TABLE unicode_encoding = {
+    uni_in_category,
+    uni_lower,
+    uni_upper,
+    uni_title,
+};
+
+/* Returns the minimum of 2 numbers. */
+LOCAL(unsigned int) unsigned_min(unsigned int x, unsigned int y) {
+    return x <= y ? x : y;
+}
+
+/* Returns the maximum of 2 numbers. */
+LOCAL(unsigned int) unsigned_max(unsigned int x, unsigned int y) {
+    return x >= y ? x : y;
+}
+
+/* Returns TRUE if the op is a repeat-one code. */
+LOCAL(BOOL) is_repeat_one(SRE_CODE op) {
+    switch(op) {
+    case SRE_OP_REPEAT_ONE_MAX:
+    case SRE_OP_REPEAT_ONE_MAX_REV:
+    case SRE_OP_REPEAT_ONE_MIN:
+    case SRE_OP_REPEAT_ONE_MIN_REV:
+    case SRE_OP_REPEAT_ONE_POSS:
+    case SRE_OP_REPEAT_ONE_POSS_REV:
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+/* Checks whether a character is in a charset. */
+LOCAL(BOOL) in_charset(SRE_CODE* charset, Py_UCS4 ch) {
+    /*
+     Charset format: max_char indexes... chunks...
+
+     The charset format is based on that of BIGCHARSET written by
+     Martin von Loewis.
+
+     The characters may be mapped to a bitmap.
+
+     To represent a charset, first a bitmap of all characters in the set is
+     constructed. Then, this bitmap is sliced into chunks of 256 characters,
+     duplicate chunks are eliminated, and each chunk is given a number. In the
+     compiled expression, the charset is represented by a codeword sequence,
+     consisting of one codeword for the maximum character code, a sequence of
+     chunk numbers (2 per codeword), and a sequence of chunks (8 codewords
+     each).
+
+     Compression is normally good: in a typical charset, large ranges of Unicode
+     will be either completely excluded (e.g. if only Cyrillic letters are to be
+     matched), or completely included (e.g. if large subranges of Kanji match).
+     These ranges will be represented by chunks of all one-bits or all
+     zero-bits.
+
+     Matching can be also done efficiently: the most significant bits of the
+     Unicode character is an index into the chunk number, and the least
+     significant byte is a bit index into the chunk.
+
+     This format is used even for 8-bit character sets.
+
+     The entire charset is an array of SRE_CODE, so endianness isn't a problem.
+    */
+    Py_ssize_t hi_bytes = ch / 256; /* Split the character code into the */
+    Py_ssize_t lo_byte = ch % 256;  /*  upper and lower bits. */
+    Py_ssize_t index;
+    SRE_CODE* chunk;
+    SRE_CODE bitmask;
+
+    /* Check against the maximum character code in the charset. */
+    if (ch > charset[0])
+        /* Definitely not in the charset. */
+        return FALSE;
+
+    /* Get the chunk index (2 x 16-bit indexes in each 32-bit codeword). */
+    index = (charset[1 + hi_bytes / 2] >> ((hi_bytes % 2) * 16)) & 0xFFFF;
+
+    /*
+     Point to the chunk. The number of chunk indexes depends on the maximum
+     character code of the charset, so that needs to be taken into account.
+     */
+    chunk = charset + 2 + charset[0] / 512 + index * (256 / SRE_BITS_PER_CODE);
+
+    /* Check the bit in the chunk. */
+    bitmask = 1 << (lo_byte % SRE_BITS_PER_CODE);
+    return (chunk[lo_byte / SRE_BITS_PER_CODE] & bitmask) != 0;
+}
+
+/* Checks whether a character is in a charset, ignoring the case. */
+LOCAL(BOOL) in_charset_ignore(SRE_STATE* state, SRE_CODE* charset, Py_UCS4 ch) {
+    /*
+     Unfortunately we need to check for all 3 possible cases (lower, upper and
+     title).
+
+     As a example of the problem, normally:
+
+         'I' <-> 'i'
+
+     but in Turkish:
+
+         uppercase dotless 'I' <-> lowercase dotless 'i'
+         uppercase dotted  'I' <-> lowercase dotted  'i'
+
+     We therefore adopt the tactic of retaining the case of the character in
+     the charset and checking whether the character we wish to check is
+     equivalent to it. This does, however, mean that some characters might be
+     treated as equivalent when ideally they shouldn't be, eg uppercase
+     dotless 'I' <-> lowercase dotted 'i' in Turkish.
+
+     I hope that in the future Unicode strings will gain some locale-specific
+     methods; the regex code can then be improved to become more
+     locale-friendly.
+    */
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    return in_charset(charset, encoding->lower(ch)) ||
+      in_charset(charset, encoding->upper(ch)) ||
+      in_charset(charset, encoding->title(ch));
+}
+
+/* Checks whether a character is in a range. */
+LOCAL(BOOL) in_range(SRE_CODE lower, SRE_CODE upper, Py_UCS4 ch) {
+    return lower <= ch && ch <= upper;
+}
+
+/* Checks whether a character is in a range, ignoring the case. */
+LOCAL(BOOL) in_range_ignore(SRE_STATE* state, SRE_CODE lower, SRE_CODE upper,
+  Py_UCS4 ch) {
+    /*
+     Unfortunately we need to check for all 3 possible cases.
+
+     (Look at in_charset_ignore() for an explanation.)
+    */
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    return in_range(lower, upper, encoding->lower(ch)) ||
+      in_range(lower, upper, encoding->upper(ch)) ||
+      in_range(lower, upper, encoding->title(ch));
+}
+
+/* Checks whether a character is in a set. */
+LOCAL(BOOL) in_set(SRE_STATE* state, SRE_CODE* charset, Py_UCS4 ch) {
+    SRE_CODE* charset_end = charset + charset[0];
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    charset++;
+
+    do {
+        switch (charset[0]) {
+        case SRE_OP_CATEGORY:
+            /* Character in a certain category. */
+            /* <CATEGORY> <category> */
+            if (encoding->in_category(charset[1], ch))
+                return TRUE;
+            charset += 2;
+            break;
+        case SRE_OP_CHARSET:
+            /* Character in a charset. */
+            /* <CHARSET> <skip> <charset> */
+            if (in_charset(charset + 2, ch))
+                return TRUE;
+            charset += 1 + charset[1];
+            break;
+        case SRE_OP_LITERAL:
+            /* Character is this literal. */
+            /* <LITERAL> <code> */
+            if (ch == charset[1])
+                return TRUE;
+            charset += 2;
+            break;
+        case SRE_OP_RANGE:
+            /* Character in range. */
+            /* <RANGE> <lower> <upper> */
+            if (in_range(charset[1], charset[2], ch))
+                return TRUE;
+            charset += 3;
+            break;
+        default:
+            /* internal error -- there's not much we can do about it
+               here, so let's just pretend it didn't match... */
+            return FALSE;
+        }
+    } while (charset < charset_end);
+
+    return FALSE;
+}
+
+/* Checks whether a character is in a set, ignoring case. */
+LOCAL(BOOL) in_set_ignore(SRE_STATE* state, SRE_CODE* charset, Py_UCS4 ch) {
+    /*
+     Unfortunately we need to check for all 3 possible cases.
+
+     (Look at in_charset_ignore() for an explanation.)
+    */
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    return in_set(state, charset, encoding->lower(ch)) ||
+      in_set(state, charset, encoding->upper(ch)) ||
+      in_set(state, charset, encoding->title(ch));
+}
+
+/* Checks whether 2 characters are equivalent, ignoring case. */
+LOCAL(BOOL) same_char_ignore(SRE_STATE* state, Py_UCS4 ch_1, Py_UCS4 ch_2) {
+    /* Unfortunately we need to check for all 3 possible cases.
+
+     (Look at in_charset_ignore() for an explanation.)
+     */
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    return encoding->lower(ch_1) == encoding->lower(ch_2) ||
+      encoding->upper(ch_1) == encoding->upper(ch_2) ||
+      encoding->title(ch_1) == encoding->title(ch_2);
+}
+
+/* generate bytestring version */
 
 #define SRE_CHAR unsigned char
-#define SRE_AT sre_at
-#define SRE_COUNT sre_count
-#define SRE_CHARSET sre_charset
-#define SRE_INFO sre_info
-#define SRE_MATCH sre_match
-#define SRE_MATCH_CONTEXT sre_match_context
-#define SRE_SEARCH sre_search
-#define SRE_LITERAL_TEMPLATE sre_literal_template
+#define SRE_MATCH sre_bmatch
+#define SRE_SEARCH sre_bsearch
+#define SRE_LITERAL_TEMPLATE sre_bliteral_template
+#define SRE_AT_BOUNDARY sre_bat_boundary
+#define SRE_CONTEXT sre_bcontext
+#define SRE_SAVE_BACKTRACK sre_bsave_backtrack
+#define SRE_DISCARD_BACKTRACK sre_bdiscard_backtrack
+#define SRE_REFRESH_MARKS sre_brefresh_marks
+#define SRE_DISCARD_UNTIL sre_bdiscard_until
+#define SRE_CLEANUP sre_bcleanup
+#define SRE_POSSIBLE_MATCH_AHEAD sre_bpossible_match_ahead
+#define SRE_MATCH_MANY sre_bmatch_many
+#define SRE_MATCH_UNTIL_TAIL sre_bmatch_until_tail
+#define SRE_MATCH_MANY_UNTIL_TAIL sre_bmatch_many_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL sre_bunmatch_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL_REV sre_bunmatch_until_tail_rev
+#define SRE_PRINT_TEXT sre_bprint_text
 
 #if defined(HAVE_UNICODE)
 
 #define SRE_RECURSIVE
 #include "_sre.c"
+#undef SRE_PRINT_TEXT
+#undef SRE_UNMATCH_UNTIL_TAIL_REV
+#undef SRE_UNMATCH_UNTIL_TAIL
+#undef SRE_MATCH_MANY_UNTIL_TAIL
+#undef SRE_MATCH_UNTIL_TAIL
+#undef SRE_MATCH_MANY
+#undef SRE_POSSIBLE_MATCH_AHEAD
+#undef SRE_CLEANUP
+#undef SRE_DISCARD_UNTIL
+#undef SRE_REFRESH_MARKS
+#undef SRE_DISCARD_BACKTRACK
+#undef SRE_SAVE_BACKTRACK
+#undef SRE_CONTEXT
 #undef SRE_RECURSIVE
-
+#undef SRE_AT_BOUNDARY
 #undef SRE_LITERAL_TEMPLATE
 #undef SRE_SEARCH
 #undef SRE_MATCH
-#undef SRE_MATCH_CONTEXT
-#undef SRE_INFO
-#undef SRE_CHARSET
-#undef SRE_COUNT
-#undef SRE_AT
 #undef SRE_CHAR
 
-/* generate 16-bit unicode version */
+/* generate unicode version */
 
 #define SRE_CHAR Py_UNICODE
-#define SRE_AT sre_uat
-#define SRE_COUNT sre_ucount
-#define SRE_CHARSET sre_ucharset
-#define SRE_INFO sre_uinfo
 #define SRE_MATCH sre_umatch
-#define SRE_MATCH_CONTEXT sre_umatch_context
 #define SRE_SEARCH sre_usearch
 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
+#define SRE_AT_BOUNDARY sre_uat_boundary
+#define SRE_CONTEXT sre_ucontext
+#define SRE_SAVE_BACKTRACK sre_usave_backtrack
+#define SRE_DISCARD_BACKTRACK sre_udiscard_backtrack
+#define SRE_REFRESH_MARKS sre_urefresh_marks
+#define SRE_DISCARD_UNTIL sre_udiscard_until
+#define SRE_CLEANUP sre_ucleanup
+#define SRE_POSSIBLE_MATCH_AHEAD sre_upossible_match_ahead
+#define SRE_MATCH_MANY sre_umatch_many
+#define SRE_MATCH_UNTIL_TAIL sre_umatch_until_tail
+#define SRE_MATCH_MANY_UNTIL_TAIL sre_umatch_many_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL sre_uunmatch_until_tail
+#define SRE_UNMATCH_UNTIL_TAIL_REV sre_uunmatch_until_tail_rev
+#define SRE_PRINT_TEXT sre_uprint_text
 #endif
 
 #endif /* SRE_RECURSIVE */
@@ -333,1295 +696,4411 @@
 /* the following section is compiled twice, with different character
    settings */
 
-LOCAL(int)
-SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
-{
-    /* check if pointer is at given position */
-
-    Py_ssize_t thisp, thatp;
-
-    switch (at) {
-
-    case SRE_AT_BEGINNING:
-    case SRE_AT_BEGINNING_STRING:
-        return ((void*) ptr == state->beginning);
-
-    case SRE_AT_BEGINNING_LINE:
-        return ((void*) ptr == state->beginning ||
-                SRE_IS_LINEBREAK((int) ptr[-1]));
-
-    case SRE_AT_END:
-        return (((void*) (ptr+1) == state->end &&
-                 SRE_IS_LINEBREAK((int) ptr[0])) ||
-                ((void*) ptr == state->end));
-
-    case SRE_AT_END_LINE:
-        return ((void*) ptr == state->end ||
-                SRE_IS_LINEBREAK((int) ptr[0]));
-
-    case SRE_AT_END_STRING:
-        return ((void*) ptr == state->end);
-
-    case SRE_AT_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
-
-    case SRE_AT_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-
-    case SRE_AT_LOC_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
-
-    case SRE_AT_LOC_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-
-#if defined(HAVE_UNICODE)
-    case SRE_AT_UNI_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
-
-    case SRE_AT_UNI_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-#endif
-
-    }
+/*
+ The 'context' is the current position within the text and pattern and any
+ associated info.
+ */
+typedef struct SRE_CONTEXT {
+    SRE_STATE* state;          /* The state struct. */
+    SRE_CHAR* text_ptr;        /* Current position within the text. */
+    SRE_CHAR* text_beginning;  /* True start of the text. */
+    SRE_CHAR* text_start;      /* Start of the text to search/match. */
+    SRE_CHAR* text_end;        /* End of the text to search/match; treated as
+                                  the true end even if it isn't really
+                                  (inherited behaviour). */
+    SRE_CHAR* search_ptr;      /* Start of the search (used by \G). */
+    SRE_CHAR* final_linebreak; /* Position of the final linebreak if it's
+                                  the last character, otherwise NULL. */
+    SRE_CODE* pattern_ptr;     /* Current position within the pattern. */
+    SRE_CHAR** marks;          /* All the numbered and named marks (start and
+                                  end of numbered and named groups). */
+    Py_ssize_t marks_size;     /* Total size of the numbered and named text
+                                  mark pointers. */
+    SRE_BACKTRACK_CHUNK* backtrack_chunk; /* Most recent chunk of backtrack
+                                             items. */
+    SRE_BACKTRACK_ITEM* backtrack_item;   /* Current backtrack item. */
+} SRE_CONTEXT;
+
+/*
+ Cleans up the context when the match code wants to return its result.
+
+ The result is passed in, the cleanup is done, and then the result is returned.
+ This makes it a bit tidier in the main match code.
+ */
+LOCAL(int) SRE_CLEANUP(SRE_CONTEXT* context, int result) {
+    SRE_BACKTRACK_CHUNK* current;
+    SRE_BACKTRACK_ITEM* max_item;
+    SRE_BACKTRACK_ITEM* item;
+
+    /*
+     Discard all but the first backtrack chunks.
+
+     The first chunk is reused to reduce the overhead.
+     */
+    current = context->backtrack_chunk;
+    while (current->previous != NULL) {
+        SRE_BACKTRACK_CHUNK* previous = current->previous;
+
+        /* Discard any stored marks in the chunk. */
+        max_item = current->items + current->count;
+        for(item = current->items; item < max_item; item++) {
+            if (item->marks != NULL)
+                PyMem_FREE(item->marks);
+        }
+
+        PyMem_FREE(current);
+        current = previous;
+    }
+
+    /* Discard any stored marks in the first chunk. */
+    max_item = current->items + current->count;
+    for(item = current->items; item < max_item; item++) {
+        if (item->marks != NULL)
+            PyMem_FREE(item->marks);
+    }
+
+    /* Re-initialise the first chunk. */
+    current->count = 0;
+
+    context->backtrack_chunk = current;
+    context->state->backtrack_chunk = current;
+
+    return result;
+}
+
+/*
+ Saves a backtrack position.
+
+ This saves just the opcode, and the marks if required.
+ */
+LOCAL(int) SRE_SAVE_BACKTRACK(SRE_CONTEXT* context, SRE_CODE op,
+  BOOL save_marks) {
+    SRE_BACKTRACK_CHUNK* chunk = context->backtrack_chunk;
+    SRE_BACKTRACK_ITEM* item;
+
+    /* Is there an empty slot in the current chunk? */
+    if (chunk->count >= SRE_BACKTRACK_CHUNK_SIZE) {
+        /* Create a new chunk. */
+        SRE_BACKTRACK_CHUNK* new_chunk =
+          (SRE_BACKTRACK_CHUNK*)PyMem_MALLOC(sizeof(SRE_BACKTRACK_CHUNK));
+        if (new_chunk == NULL)
+            return SRE_ERROR_MEMORY;
+
+        /* Link the new chunk at the head of the list. */
+        new_chunk->previous = chunk;
+        new_chunk->count = 0;
+        context->backtrack_chunk = new_chunk;
+        chunk = new_chunk;
+    }
+
+    /* Store the opcode. */
+    item = &chunk->items[chunk->count++];
+    item->op = op;
+
+    /* Save the marks? */
+    if (save_marks && context->marks_size > 0) {
+        item->marks = PyMem_MALLOC(context->marks_size);
+        if (item->marks == NULL)
+            return SRE_ERROR_MEMORY;
+
+        /* Save the marks. */
+        memmove(item->marks, context->marks, context->marks_size);
+    } else
+        /* No marks. */
+        item->marks = NULL;
+
+    /* This is now the current backtrack item. */
+    context->backtrack_item = item;
 
     return 0;
 }
 
-LOCAL(int)
-SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
-{
-    /* check if character is a member of the given set */
-
-    int ok = 1;
+/* Discards the last backtrack item. */
+LOCAL(void) SRE_DISCARD_BACKTRACK(SRE_CONTEXT* context) {
+    SRE_BACKTRACK_CHUNK* chunk = context->backtrack_chunk;
+    SRE_BACKTRACK_ITEM* item = &chunk->items[--chunk->count];
+
+    /* Discard the saved marks, if any. */
+    if (item->marks != NULL)
+        PyMem_FREE(item->marks);
+
+    /*
+     Are all the slots in the chunk now empty?
+
+     Empty chunks are discarded, except for the first one, which is reused to
+     reduce the overhead.
+    */
+    if (chunk->count == 0 && chunk->previous != NULL) {
+        SRE_BACKTRACK_CHUNK* previous = chunk->previous;
+        PyMem_FREE(chunk);
+        context->backtrack_chunk = previous;
+    }
+}
+
+/* Discards all backtrack items until it finds one with a given opcode. */
+LOCAL(void) SRE_DISCARD_UNTIL(SRE_CONTEXT* context, SRE_CODE op) {
+    SRE_BACKTRACK_ITEM* item;
 
     for (;;) {
-        switch (*set++) {
-
-        case SRE_OP_FAILURE:
-            return !ok;
-
+        SRE_BACKTRACK_CHUNK* chunk = context->backtrack_chunk;
+        item = &chunk->items[chunk->count - 1];
+        if (item->op == op)
+            /* Found it! */
+            break;
+        SRE_DISCARD_BACKTRACK(context);
+    }
+
+    /* Record as the current backtrack item. */
+    context->backtrack_item = item;
+}
+
+/* Returns whether the current text position is at a word boundary. */
+LOCAL(BOOL) SRE_AT_BOUNDARY(SRE_CONTEXT* context) {
+    SRE_ENCODING_TABLE* encoding = context->state->encoding;
+
+    /* Is the previous character part of a word? */
+    BOOL before = context->text_ptr > context->text_beginning &&
+      encoding->in_category(SRE_CAT_Word, context->text_ptr[-1]);
+
+    /* Is the current character part of a word? */
+    BOOL after = context->text_ptr < context->text_end &&
+      encoding->in_category(SRE_CAT_Word, context->text_ptr[0]);
+
+    /* We're at a word boundary if they're different. */
+    return before != after;
+}
+
+/* The 'MARK' operator is 3 codewords long. */
+#define SRE_MARK_OP_SIZE 3
+
+/*
+ Looks ahead to see whether the tail of the pattern _could_ match.
+
+ This is used to avoid creating backtrack points unnecessarily.
+
+ For forwards or backwards searching.
+
+ Some of the code might actually look outside the text, but the worse that
+ could happen is that it could say that the tail _could_ match, which is the
+ default result.
+*/
+LOCAL(BOOL) SRE_POSSIBLE_MATCH_AHEAD(SRE_CONTEXT* context, SRE_CODE* tail) {
+    SRE_STATE* state = context->state;
+    SRE_ENCODING_TABLE* encoding = context->state->encoding;
+
+    /* Skip over any marks. */
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    switch (tail[0]) {
+    case SRE_OP_ANY:
+        /* Any character except a newline (forwards). */
+        return !encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]);
+    case SRE_OP_ANY_ALL:
+    case SRE_OP_ANY_ALL_REV:
+        /* Any character at all. */
+        return TRUE;
+    case SRE_OP_ANY_REV:
+        /* Any character except a newline (backwards). */
+        return !encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]);
+    case SRE_OP_BOUNDARY:
+        /* Boundary between word and non-word. */
+        return SRE_AT_BOUNDARY(context);
+    case SRE_OP_CATEGORY:
+        /* Character in a certain category (forwards). */
+        return encoding->in_category(tail[1], context->text_ptr[0]);
+    case SRE_OP_CATEGORY_REV:
+        /* Character in a certain category (backwards). */
+        return encoding->in_category(tail[1], context->text_ptr[-1]);
+    case SRE_OP_CHARSET:
+        /* Character in a charset (forwards). */
+        return in_charset(tail + 2, context->text_ptr[0]);
+    case SRE_OP_CHARSET_IGNORE:
+        /* Character in a charset, ignoring case (forwards). */
+        return in_charset_ignore(state, tail + 2, context->text_ptr[0]);
+    case SRE_OP_CHARSET_IGNORE_REV:
+        /* Character in a charset, ignoring case (backwards). */
+        return in_charset_ignore(state, tail + 2, context->text_ptr[-1]);
+    case SRE_OP_CHARSET_REV:
+        /* Character in a charset (backwards). */
+        return in_charset(tail + 2, context->text_ptr[-1]);
+    case SRE_OP_END_OF_LINE:
+        /* End of line. */
+        return context->text_ptr >= context->text_end ||
+          encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]);
+    case SRE_OP_END_OF_STRING:
+        /* End of string. */
+        return context->text_ptr >= context->text_end;
+    case SRE_OP_END_OF_STRING_LN:
+        /* End of string or final line. */
+        return context->text_ptr >= context->text_end ||
+          context->text_ptr == context->final_linebreak;
+    case SRE_OP_LITERAL:
+        /* Character is this literal (forwards). */
+        return context->text_ptr[0] == (SRE_CHAR)tail[1];
+    case SRE_OP_LITERAL_IGNORE:
+        /* Character is this literal, ignoring case (forwards). */
+        return same_char_ignore(state, context->text_ptr[0], tail[1]);
+    case SRE_OP_LITERAL_IGNORE_REV:
+        /* Character is this literal, ignoring case (backwards). */
+        return same_char_ignore(state, context->text_ptr[-1], tail[1]);
+    case SRE_OP_LITERAL_REV:
+        /* Character is this literal (backwards). */
+        return context->text_ptr[-1] == (SRE_CHAR)tail[1];
+    case SRE_OP_LITERAL_STRING:
+        /* Literal string (forwards). */
+        return context->text_ptr[0] == (SRE_CHAR)tail[2];
+    case SRE_OP_LITERAL_STRING_IGNORE:
+        /* Literal string, ignoring case (forwards). */
+        return same_char_ignore(state, context->text_ptr[0], tail[2]);
+    case SRE_OP_LITERAL_STRING_IGNORE_REV:
+        /*
+         Literal string, ignoring case (backwards).
+
+         It's a little bit harder to locate the first character of the literal
+         string when searching backwards.
+        */
+        return same_char_ignore(state,
+          context->text_ptr[-(int)tail[1]], tail[2]);
+    case SRE_OP_LITERAL_STRING_REV:
+        /*
+         Literal string (backwards).
+
+         It's a little bit harder to locate the first character of the
+         literal string when searching backwards.
+        */
+        return context->text_ptr[-(int)tail[1]] == (SRE_CHAR)tail[2];
+    case SRE_OP_NOT_BOUNDARY:
+        /* Not a boundary between word and non-word. */
+        return !SRE_AT_BOUNDARY(context);
+    case SRE_OP_NOT_CATEGORY:
+        /* Character not in a certain category (forwards). */
+        return !encoding->in_category(tail[1], context->text_ptr[0]);
+    case SRE_OP_NOT_CATEGORY_REV:
+        /* Character not in a certain category (backwards). */
+        return !encoding->in_category(tail[1], context->text_ptr[-1]);
+    case SRE_OP_NOT_CHARSET:
+        /* Character not in a charset (forwards). */
+        return !in_charset(tail + 2, context->text_ptr[0]);
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        /* Character not in a charset,ignoring case (forwards). */
+        return !in_charset_ignore(state, tail + 2, context->text_ptr[0]);
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        /* Character not in a charset,ignoring case (backwards). */
+        return !in_charset_ignore(state, tail + 2, context->text_ptr[-1]);
+    case SRE_OP_NOT_CHARSET_REV:
+        /* Character not in a charset (backwards). */
+        return !in_charset(tail + 2, context->text_ptr[-1]);
+    case SRE_OP_NOT_LITERAL:
+        /* Character is not this literal (forwards). */
+        return context->text_ptr[0] != (SRE_CHAR)tail[1];
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        /* Character is not this literal, ignoring case (forwards). */
+        return !same_char_ignore(state, context->text_ptr[0], tail[1]);
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        /* Character is not this literal, ignoring case (backwards). */
+        return !same_char_ignore(state, context->text_ptr[-1], tail[1]);
+    case SRE_OP_NOT_LITERAL_REV:
+        /* Character is not this literal (backwards). */
+        return context->text_ptr[-1] != (SRE_CHAR)tail[1];
+    case SRE_OP_NOT_RANGE:
+        /* Character not in range (forwards). */
+        return !in_range(tail[1], tail[2], context->text_ptr[0]);
+    case SRE_OP_NOT_RANGE_IGNORE:
+        /* Character not in range, ignoring case (forwards). */
+        return !in_range_ignore(state, tail[1], tail[2], context->text_ptr[0]);
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        /* Character not in range, ignoring case (backwards). */
+        return !in_range_ignore(state, tail[1], tail[2], context->text_ptr[-1]);
+    case SRE_OP_NOT_RANGE_REV:
+        /* Character not in range (backwards). */
+        return !in_range(tail[1], tail[2], context->text_ptr[-1]);
+    case SRE_OP_NOT_SET:
+        /* Character not in set (forwards). */
+        return !in_set(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_NOT_SET_IGNORE:
+        /* Character not in set, ignoring case (forwards). */
+        return !in_set_ignore(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        /* Character not in set, ignoring case (backwards). */
+        return !in_set_ignore(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_NOT_SET_REV:
+        /* Character not in set (backwards). */
+        return !in_set(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_RANGE:
+        /* Character in range (forwards). */
+        return in_range(tail[1], tail[2], context->text_ptr[0]);
+    case SRE_OP_RANGE_IGNORE:
+        /* Character in range, ignoring case (forwards). */
+        return in_range_ignore(state, tail[1], tail[2], context->text_ptr[0]);
+    case SRE_OP_RANGE_IGNORE_REV:
+        /* Character in range, ignoring case (backwards). */
+        return in_range_ignore(state, tail[1], tail[2], context->text_ptr[-1]);
+    case SRE_OP_RANGE_REV:
+        /* Character in range (backwards). */
+        return in_range(tail[1], tail[2], context->text_ptr[-1]);
+    case SRE_OP_SET:
+        /* Character in set (forwards). */
+        return in_set(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_SET_IGNORE:
+        /* Character in set, ignoring case (forwards). */
+        return in_set_ignore(state, tail + 1, context->text_ptr[0]);
+    case SRE_OP_SET_IGNORE_REV:
+        /* Character in set, ignoring case (backwards). */
+        return in_set_ignore(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_SET_REV:
+        /* Character in set (backwards). */
+        return in_set(state, tail + 1, context->text_ptr[-1]);
+    case SRE_OP_START_OF_LINE:
+        /* Start of line. */
+        return context->text_ptr == context->text_beginning ||
+          encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]);
+    case SRE_OP_START_OF_SEARCH:
+        /* Start of search. */
+        return context->text_ptr == context->search_ptr;
+    case SRE_OP_START_OF_STRING:
+        /* Start of string. */
+        return context->text_ptr == context->text_beginning;
+    default:
+        /* Anything else we'll assume could match. */
+        return TRUE;
+    }
+}
+
+/*
+ Matches single characters up to a maximum.
+
+ This is used for matching a repeated single-character pattern. It's more
+ efficient that the general multi-character repeat.
+
+ For forwards or backwards searching.
+ */
+LOCAL(void) SRE_MATCH_MANY(SRE_CONTEXT* context, SRE_CHAR* max_ptr,
+  SRE_CODE* body) {
+    SRE_STATE* state = context->state;
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    switch (body[0]) {
+    case SRE_OP_ANY:
+        /* Any character except a newline (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_ANY_ALL:
+        /* Any character at all (forwards). */
+        if (context->text_ptr < max_ptr)
+            context->text_ptr = max_ptr;
+        break;
+    case SRE_OP_ANY_ALL_REV:
+        /* Any character at all (backwards). */
+        if (context->text_ptr > max_ptr)
+            context->text_ptr = max_ptr;
+        break;
+    case SRE_OP_ANY_REV:
+        /* Any character except a newline (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CATEGORY:
+        /* Character in a certain category (forwards). */
+        while (context->text_ptr < max_ptr &&
+          encoding->in_category(body[1], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CATEGORY_REV:
+        /* Character in a certain category (backwards). */
+        while (context->text_ptr > max_ptr &&
+          encoding->in_category(body[1], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET:
+        /* Character in a charset (forwards). */
+        while (context->text_ptr < max_ptr &&
+          in_charset(body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        /* Character in a charset, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          in_charset_ignore(state, body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        /* Character in a charset, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET_REV:
+        /* Character in a charset (backwards). */
+        while (context->text_ptr > max_ptr &&
+          in_charset(body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL:
+        /* Character is this literal (forwards). */
+        while (context->text_ptr < max_ptr &&
+          context->text_ptr[0] == (SRE_CHAR)body[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_IGNORE:
+        /* Character is this literal, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          same_char_ignore(state, context->text_ptr[0], body[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        /* Character is this literal, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !same_char_ignore(state, context->text_ptr[-1], body[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_REV:
+        /* Character is this literal (backwards). */
+        while (context->text_ptr > max_ptr &&
+          context->text_ptr[-1] == (SRE_CHAR)body[1])
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        /* Character not in a certain category (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !encoding->in_category(body[1], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CATEGORY_REV:
+        /* Character not in a certain category (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !encoding->in_category(body[1], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET:
+        /* Character not in a charset (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !in_charset(body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        /* Character not in a charset,ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !in_charset_ignore(state, body + 2, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        /* Character not in a charset,ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        /* Character not in a charset (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !in_charset(body + 2, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_LITERAL:
+        /* Character is not this literal (forwards). */
+        while (context->text_ptr < max_ptr &&
+          context->text_ptr[0] != (SRE_CHAR)body[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        /* Character is not this literal, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !same_char_ignore(state, context->text_ptr[0], body[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        /* Character is not this literal, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !same_char_ignore(state, context->text_ptr[-1], body[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        /* Character is not this literal (backwards). */
+        while (context->text_ptr > max_ptr &&
+          context->text_ptr[-1] != (SRE_CHAR)body[1])
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE:
+        /* Character not in range (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !in_range(body[1], body[2], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        /* Character not in range, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !in_range_ignore(state, body[1], body[2], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        /* Character not in range, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !in_range_ignore(state, body[1], body[2], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        /* Character not in range (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !in_range(body[1], body[2], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET:
+        /* Character not in set (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !in_set(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        /* Character not in set, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          !in_set_ignore(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        /* Character not in set, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !in_set_ignore(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET_REV:
+        /* Character not in set (backwards). */
+        while (context->text_ptr > max_ptr &&
+          !in_set(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE:
+        /* Character in range (forwards). */
+        while (context->text_ptr < max_ptr &&
+          in_range(body[1], body[2], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        /* Character in range, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          in_range_ignore(state, body[1], body[2], context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        /* Character in range, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          in_range_ignore(state, body[1], body[2], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE_REV:
+        /* Character in range (backwards). */
+        while (context->text_ptr > max_ptr &&
+          in_range(body[1], body[2], context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET:
+        /* Character in set (forwards). */
+        while (context->text_ptr < max_ptr &&
+          in_set(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_IGNORE:
+        /* Character in set, ignoring case (forwards). */
+        while (context->text_ptr < max_ptr &&
+          in_set_ignore(state, body + 1, context->text_ptr[0]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        /* Character in set, ignoring case (backwards). */
+        while (context->text_ptr > max_ptr &&
+          in_set_ignore(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET_REV:
+        /* Character in set (backwards). */
+        while (context->text_ptr > max_ptr &&
+          in_set(state, body + 1, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    }
+}
+
+/*
+ 'Unmatches' single characters down to a minimum or until the tail _could_
+ match.
+
+ Returns FALSE if the minimum is reached but the tail still couldn't match.
+
+ This is used for 'unmatching' a repeated single-character pattern. It's more
+ efficient that the general multi-character repeat.
+
+ For forwards searching only.
+ */
+LOCAL(BOOL) SRE_UNMATCH_UNTIL_TAIL(SRE_CONTEXT* context, SRE_CHAR* min_ptr,
+  SRE_CODE* tail) {
+    SRE_STATE* state = context->state;
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    /* Skip over any marks. */
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    switch (tail[0]) {
+    case SRE_OP_ANY:
+        /* Any character except a newline. */
+        while (context->text_ptr >= min_ptr &&
+          encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_ANY_ALL:
+        /* Any character at all. */
+        break;
+    case SRE_OP_BOUNDARY:
+        /* Boundary between word and non-word. */
+        while (context->text_ptr >= min_ptr &&
+          !SRE_AT_BOUNDARY(context))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CATEGORY:
+        /* Character in a certain category. */
+        while (context->text_ptr >= min_ptr &&
+          !encoding->in_category(tail[1], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET:
+        /* Character in a charset. */
+        while (context->text_ptr >= min_ptr &&
+          !in_charset(tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        /* Character in a charset, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          !in_charset_ignore(state, tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_END_OF_LINE:
+        /* End of line. */
+        if (context->text_ptr < context->text_end) {
+            while (context->text_ptr >= min_ptr &&
+              !encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+                context->text_ptr--;
+        }
+        break;
+    case SRE_OP_END_OF_STRING:
+        /* End of string. */
+        if (context->text_ptr >= min_ptr &&
+          context->text_ptr < context->text_end)
+            context->text_ptr = min_ptr - 1;
+        break;
+    case SRE_OP_END_OF_STRING_LN:
+        /* End of string or final line. */
+        if (context->text_ptr < context->text_end &&
+          context->text_ptr != context->final_linebreak &&
+          context->text_ptr >= min_ptr)
+            context->text_ptr = min_ptr - 1;
+        break;
+    case SRE_OP_LITERAL:
+        /* Character is this literal. */
+        while (context->text_ptr >= min_ptr &&
+          context->text_ptr[0] != (SRE_CHAR)tail[1])
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_IGNORE:
+        /* Character is this literal, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          !same_char_ignore(state, context->text_ptr[0], tail[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_STRING:
+        /* Literal string. */
+        while (context->text_ptr >= min_ptr &&
+          context->text_ptr[0] != (SRE_CHAR)tail[2])
+            context->text_ptr--;
+        break;
+    case SRE_OP_LITERAL_STRING_IGNORE:
+        /* Literal string, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          !same_char_ignore(state, context->text_ptr[0], tail[2]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_BOUNDARY:
+        /* Not a boundary between word and non-word. */
+        while (context->text_ptr >= min_ptr &&
+          SRE_AT_BOUNDARY(context))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        /* Character not in a certain category. */
+        while (context->text_ptr >= min_ptr &&
+          encoding->in_category(tail[1], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET:
+        /* Character not in a charset. */
+        while (context->text_ptr >= min_ptr &&
+          in_charset(tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        /* Character not in a charset,ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          in_charset_ignore(state, tail + 2, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_LITERAL:
+        /* Character is not this literal. */
+        while (context->text_ptr >= min_ptr &&
+          context->text_ptr[0] == (SRE_CHAR)tail[1])
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        /* Character is not this literal, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          same_char_ignore(state, context->text_ptr[0], tail[1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE:
+        /* Character not in range. */
+        while (context->text_ptr >= min_ptr &&
+          in_range(tail[1], tail[2], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        /* Character not in range, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          in_range_ignore(state, tail[1], tail[2], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET:
+        /* Character not in set. */
+        while (context->text_ptr >= min_ptr &&
+          in_set(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        /* Character not in set, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          in_set_ignore(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE:
+        /* Character in range. */
+        while (context->text_ptr >= min_ptr &&
+          !in_range(tail[1], tail[2], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        /* Character in range, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          !in_range_ignore(state, tail[1], tail[2], context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET:
+        /* Character in set. */
+        while (context->text_ptr >= min_ptr &&
+          !in_set(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_SET_IGNORE:
+        /* Character in set, ignoring case. */
+        while (context->text_ptr >= min_ptr &&
+          !in_set_ignore(state, tail + 1, context->text_ptr[0]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_START_OF_LINE:
+        /* Start of line. */
+        while (context->text_ptr >= min_ptr &&
+          context->text_ptr != context->text_beginning &&
+          encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr--;
+        break;
+    case SRE_OP_START_OF_SEARCH:
+        /* Start of search. */
+        while (context->text_ptr >= min_ptr &&
+          context->text_ptr != context->search_ptr)
+            context->text_ptr--;
+        break;
+    case SRE_OP_START_OF_STRING:
+        /* Start of string. */
+        if (context->text_ptr >= min_ptr &&
+          context->text_ptr > context->text_beginning)
+            context->text_ptr = min_ptr - 1;
+        break;
+    }
+
+    return context->text_ptr >= min_ptr;
+}
+
+/*
+ 'Unmatches' single characters down to a minimum or until the tail _could_
+ match.
+
+ Returns FALSE if the minimum is reached but the tail still couldn't match.
+
+ This is used for 'unmatching' a repeated single-character pattern. It's more
+ efficient that the general multi-character repeat.
+
+ For backwards searching only.
+ */
+LOCAL(BOOL) SRE_UNMATCH_UNTIL_TAIL_REV(SRE_CONTEXT* context, SRE_CHAR* min_ptr,
+  SRE_CODE* tail) {
+    SRE_STATE* state = context->state;
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    /* Skip over any marks. */
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    switch (tail[0]) {
+    case SRE_OP_ANY_ALL_REV:
+        /* Any character at all. */
+        break;
+    case SRE_OP_ANY_REV:
+        /* Any character except a newline. */
+        while (context->text_ptr <= min_ptr &&
+          encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_BOUNDARY:
+        /* Boundary between word and non-word. */
+        while (context->text_ptr <= min_ptr &&
+          !SRE_AT_BOUNDARY(context))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CATEGORY_REV:
+        /* Character in a certain category. */
+        while (context->text_ptr <= min_ptr &&
+          !encoding->in_category(tail[1], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        /* Character in a charset, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          !in_charset_ignore(state, tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_CHARSET_REV:
+        /* Character in a charset. */
+        while (context->text_ptr <= min_ptr &&
+          !in_charset(tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_END_OF_LINE:
+        /* End of line. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr < context->text_end &&
+          !encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_END_OF_STRING:
+        /* End of string. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr < context->text_end)
+            context->text_ptr++;
+        break;
+    case SRE_OP_END_OF_STRING_LN:
+        /* End of string or final line. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr < context->text_end &&
+          context->text_ptr != context->final_linebreak)
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        /* Character is this literal, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          !same_char_ignore(state, context->text_ptr[-1], tail[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_REV:
+        /* Character is this literal. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr[-1] != (SRE_CHAR)tail[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_STRING_IGNORE_REV:
+        /* Literal string, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          !same_char_ignore(state, context->text_ptr[-(int)tail[1]], tail[2]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_LITERAL_STRING_REV:
+        /* Literal string. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr[-(int)tail[1]] != (SRE_CHAR)tail[2])
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_BOUNDARY:
+        /* Not a boundary between word and non-word. */
+        while (context->text_ptr <= min_ptr &&
+          SRE_AT_BOUNDARY(context))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CATEGORY_REV:
+        /* Character not in a certain category. */
+        while (context->text_ptr <= min_ptr &&
+          encoding->in_category(tail[1], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        /* Character not in a charset,ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          in_charset_ignore(state, tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        /* Character not in a charset. */
+        while (context->text_ptr <= min_ptr &&
+          in_charset(tail + 2, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        /* Character is not this literal, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          same_char_ignore(state, context->text_ptr[-1], tail[1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        /* Character is not this literal. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr[-1] == (SRE_CHAR)tail[1])
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        /* Character not in range, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          in_range_ignore(state, tail[1], tail[2], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        /* Character not in range. */
+        while (context->text_ptr <= min_ptr &&
+          in_range(tail[1], tail[2], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        /* Character not in set, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          in_set_ignore(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_NOT_SET_REV:
+        /* Character not in set. */
+        while (context->text_ptr <= min_ptr &&
+          in_set(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        /* Character in range, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          !in_range_ignore(state, tail[1], tail[2], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_RANGE_REV:
+        /* Character in range. */
+        while (context->text_ptr <= min_ptr &&
+          !in_range(tail[1], tail[2], context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        /* Character in set, ignoring case. */
+        while (context->text_ptr <= min_ptr &&
+          !in_set_ignore(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_SET_REV:
+        /* Character in set. */
+        while (context->text_ptr <= min_ptr &&
+          !in_set(state, tail + 1, context->text_ptr[-1]))
+            context->text_ptr++;
+        break;
+    case SRE_OP_START_OF_LINE:
+        /* Start of line. */
+        if (context->text_ptr > context->text_beginning) {
+            while (context->text_ptr <= min_ptr &&
+              encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+                context->text_ptr++;
+        }
+        break;
+    case SRE_OP_START_OF_SEARCH:
+        /* Start of search. */
+        while (context->text_ptr <= min_ptr &&
+          context->text_ptr != context->search_ptr)
+            context->text_ptr++;
+        break;
+    case SRE_OP_START_OF_STRING:
+        /* Start of string. */
+        if (context->text_ptr > context->text_beginning)
+            context->text_ptr = min_ptr + 1;
+        break;
+    }
+
+    return context->text_ptr <= min_ptr;
+}
+
+/*
+ Matches single characters until the tail _could_ match, up to a maximum.
+ Returns FALSE if the maximum is reached but the tail still couldn't match.
+
+ This is used for matching a repeated single-character pattern. It's more
+ efficient that the general multi-character repeat.
+
+ For forwards or backwards searching.
+ */
+LOCAL(BOOL) SRE_MATCH_UNTIL_TAIL(SRE_CONTEXT* context, SRE_CHAR* max_ptr,
+  SRE_CODE* body, SRE_CODE* tail) {
+    SRE_STATE* state = context->state;
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    /* Skip over any marks. */
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    switch (body[0]) {
+    case SRE_OP_ANY:
+        /* Any character except a newline (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_ANY_ALL:
+        /* Any character at all (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr)
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_ANY_ALL_REV:
+        /* Any character at all (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr)
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_ANY_REV:
+        /* Any character except a newline (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_CATEGORY:
+        /* Character in a certain category (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !encoding->in_category(body[1], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_CHARSET:
+        /* Character in a charset (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !in_charset(body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        /* Character in a charset, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !in_charset_ignore(state, body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        /* Character in a charset, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_CHARSET_REV:
+        /* Character in a charset (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !in_charset(body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_LITERAL:
+        /* Character is this literal (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              context->text_ptr[0] != (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_LITERAL_IGNORE:
+        /* Character is this literal, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !same_char_ignore(state, context->text_ptr[0], body[1]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        /* Character is this literal, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !same_char_ignore(state, context->text_ptr[-1], body[1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_LITERAL_REV:
+        /* Character is this literal (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              context->text_ptr[-1] != (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        /* Character not in a certain category (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              encoding->in_category(body[1], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET:
+        /* Character not in a charset (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              in_charset(body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        /* Character not in a charset,ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              in_charset_ignore(state, body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        /* Character not in a charset,ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        /* Character not in a charset (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              in_charset(body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL:
+        /* Character is not this literal (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              context->text_ptr[0] == (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        /* Character is not this literal, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              same_char_ignore(state, context->text_ptr[0], body[1]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        /* Character is not this literal, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              same_char_ignore(state, context->text_ptr[-1], body[1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        /* Character is not this literal (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              context->text_ptr[-1] == (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_RANGE:
+        /* Character not in range (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              in_range(body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        /* Character not in range, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              in_range_ignore(state, body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        /* Character not in range, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              in_range_ignore(state, body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        /* Character not in range (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              in_range(body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_SET:
+        /* Character not in set (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              in_set(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        /* Character not in set, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              in_set_ignore(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        /* Character not in set, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              in_set_ignore(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_NOT_SET_REV:
+        /* Character not in set (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              in_set(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_RANGE:
+        /* Character in range (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !in_range(body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        /* Character in range, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !in_range_ignore(state, body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        /* Character in range, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !in_range_ignore(state, body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_RANGE_REV:
+        /* Character in range (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !in_range(body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_SET:
+        /* Character in set (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !in_set(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_SET_IGNORE:
+        /* Character in set, ignoring case (forwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr >= max_ptr ||
+              !in_set_ignore(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        }
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        /* Character in set, ignoring case (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !in_set_ignore(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    case SRE_OP_SET_REV:
+        /* Character in set (backwards). */
+        while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail)) {
+            if (context->text_ptr <= max_ptr ||
+              !in_set(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        }
+        break;
+    }
+
+    return TRUE;
+}
+
+/*
+ Matches single characters until the tail _could_ match, up to a maximum.
+ Returns FALSE if the maximum is reached but the tail still couldn't match.
+
+ This is used for matching a repeated single-character pattern. It's more
+ efficient that the general multi-character repeat.
+
+ For forwards or backwards searching.
+ */
+LOCAL(BOOL) SRE_MATCH_MANY_UNTIL_TAIL(SRE_CONTEXT* context, SRE_CHAR* max_ptr,
+  SRE_CODE* body, SRE_CODE* tail) {
+    SRE_STATE* state = context->state;
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+
+    /* Skip over any marks. */
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    switch (body[0]) {
+    case SRE_OP_ANY:
+        /* Any character except a newline (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_ANY_ALL:
+        /* Any character at all (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr)
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_ANY_ALL_REV:
+        /* Any character at all (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr)
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_ANY_REV:
+        /* Any character except a newline (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              encoding->in_category(SRE_CAT_LineBreak, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_CATEGORY:
+        /* Character in a certain category (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !encoding->in_category(body[1], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_CHARSET:
+        /* Character in a charset (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !in_charset(body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_CHARSET_IGNORE:
+        /* Character in a charset, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !in_charset_ignore(state, body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_CHARSET_IGNORE_REV:
+        /* Character in a charset, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_CHARSET_REV:
+        /* Character in a charset (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !in_charset(body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_LITERAL:
+        /* Character is this literal (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              context->text_ptr[0] != (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_LITERAL_IGNORE:
+        /* Character is this literal, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !same_char_ignore(state, context->text_ptr[0], body[1]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_LITERAL_IGNORE_REV:
+        /* Character is this literal, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !same_char_ignore(state, context->text_ptr[-1], body[1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_LITERAL_REV:
+        /* Character is this literal (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              context->text_ptr[-1] != (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_CATEGORY:
+        /* Character not in a certain category (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              encoding->in_category(body[1], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_CHARSET:
+        /* Character not in a charset (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              in_charset(body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        /* Character not in a charset,ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              in_charset_ignore(state, body + 2, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_CHARSET_IGNORE_REV:
+        /* Character not in a charset,ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              in_charset_ignore(state, body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_CHARSET_REV:
+        /* Character not in a charset (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              in_charset(body + 2, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_LITERAL:
+        /* Character is not this literal (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              context->text_ptr[0] == (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE:
+        /* Character is not this literal, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              same_char_ignore(state, context->text_ptr[0], body[1]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_LITERAL_IGNORE_REV:
+        /* Character is not this literal, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              same_char_ignore(state, context->text_ptr[-1], body[1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_LITERAL_REV:
+        /* Character is not this literal (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              context->text_ptr[-1] == (SRE_CHAR)body[1])
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_RANGE:
+        /* Character not in range (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              in_range(body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE:
+        /* Character not in range, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              in_range_ignore(state, body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_RANGE_IGNORE_REV:
+        /* Character not in range, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              in_range_ignore(state, body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_RANGE_REV:
+        /* Character not in range (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              in_range(body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_SET:
+        /* Character not in set (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              in_set(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_SET_IGNORE:
+        /* Character not in set, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              in_set_ignore(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_SET_IGNORE_REV:
+        /* Character not in set, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              in_set_ignore(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_NOT_SET_REV:
+        /* Character not in set (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              in_set(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_RANGE:
+        /* Character in range (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !in_range(body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_RANGE_IGNORE:
+        /* Character in range, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !in_range_ignore(state, body[1], body[2], context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_RANGE_IGNORE_REV:
+        /* Character in range, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !in_range_ignore(state, body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_RANGE_REV:
+        /* Character in range (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !in_range(body[1], body[2], context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_SET:
+        /* Character in set (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !in_set(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_SET_IGNORE:
+        /* Character in set, ignoring case (forwards). */
+        do {
+            if (context->text_ptr >= max_ptr ||
+              !in_set_ignore(state, body + 1, context->text_ptr[0]))
+                return FALSE;
+            context->text_ptr++;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_SET_IGNORE_REV:
+        /* Character in set, ignoring case (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !in_set_ignore(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    case SRE_OP_SET_REV:
+        /* Character in set (backwards). */
+        do {
+            if (context->text_ptr <= max_ptr ||
+              !in_set(state, body + 1, context->text_ptr[-1]))
+                return FALSE;
+            context->text_ptr--;
+        } while (!SRE_POSSIBLE_MATCH_AHEAD(context, tail));
+        break;
+    }
+
+    return TRUE;
+}
+
+/*
+ Checks if the string matches the given pattern.  Returns <0 for
+ error, 0 for failure, and 1 for success.
+ */
+LOCAL(int) SRE_MATCH(SRE_STATE* state) {
+    SRE_CONTEXT context; /* 'context' is the info for the pattern matching. */
+    SRE_BACKTRACK_ITEM* current_loop = NULL; /* Points to the backtrack item
+                                                for the current loop, if any. */
+    unsigned int sigcount = 0;
+    int result;
+    SRE_ENCODING_TABLE* encoding = state->encoding;
+    SRE_CODE op;
+
+    /* Initialise the context. */
+    context.state = state;
+    context.text_beginning = (SRE_CHAR *)state->beginning;
+    context.text_ptr = state->ptr;
+    context.text_start = (SRE_CHAR *)state->start;
+    context.text_end = (SRE_CHAR *)state->end;
+    context.search_ptr = state->search_ptr;
+    context.pattern_ptr = state->pattern_code;
+    context.marks = (SRE_CHAR**)state->mark;
+    context.marks_size = (state->numbered_mark_count +
+      state->named_mark_count) * sizeof(context.marks[0]);
+    context.backtrack_chunk = state->backtrack_chunk;
+
+    /* Point to the final newline if it's the final character. */
+    if (context.text_beginning < context.text_end &&
+      encoding->in_category(SRE_CAT_LineBreak, context.text_end[-1]))
+        context.final_linebreak = context.text_end - 1;
+    else
+        context.final_linebreak = NULL;
+
+    TRACE(("|%p|%p|ENTER\n", context.pattern_ptr, context.text_ptr));
+
+    /*
+     Store a backtrack item for failure. This takes effect if the entire
+     pattern fails to match.
+     */
+    result = SRE_SAVE_BACKTRACK(&context, SRE_OP_FAILURE, 0);
+    if (result != 0)
+        return SRE_CLEANUP(&context, result);
+
+    /* Clear the text marks. */
+    memset(context.marks, 0, context.marks_size);
+
+    /* The main matching loop. */
+advance:
+    for (;;) {
+        /* Cancel the matching? */
+        ++sigcount;
+        if ((0 == (sigcount & 0xFFF)) && PyErr_CheckSignals())
+            return SRE_CLEANUP(&context, SRE_ERROR_INTERRUPTED);
+
+        /* Try to match the next operator against the text. */
+        op = context.pattern_ptr[0];
+        switch (op) {
+        case SRE_OP_ANY:
+            /* Any character except a newline (forwards). */
+            /* <ANY> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+                encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_ANY_ALL:
+            /* Any character at all (forwards). */
+            /* <ANY_ALL> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end)
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_ANY_ALL_REV:
+            /* Any character at all (backwards). */
+            /* <ANY_ALL_REV> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start)
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_ANY_REV:
+            /* Any character except a newline (backwards). */
+            /* <ANY_REV> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+                encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_ASSERT:
+            /* Assert subpattern (+ve look-ahead/look-behind). */
+            /* <ASSERT> <skip to end> ... <END_ASSERT> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /*
+             If the subpattern succeeds then we'll discard the enclosed
+             backtrack info, including any marks, so we need to save the marks
+             here.
+
+             If the subpattern fails then the marks will be restored
+             automatically.
+             */
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ASSERT, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            /* Save the context. */
+            context.backtrack_item->assert.text_start = context.text_start;
+            context.backtrack_item->assert.text_ptr = context.text_ptr;
+            context.backtrack_item->assert.pattern_ptr = context.pattern_ptr;
+            /* The assert can look at the entire text. */
+            context.text_start = state->beginning;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_ASSERT_NOT:
+            /* Assert not subpattern (-ve look-ahead/look-behind). */
+            /* <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /*
+             If the subpattern succeeds then we'll discard the enclosed
+             backtrack info, including any marks, so we need to save the marks
+             here.
+
+             If the subpattern fails then the marks will be restored
+             automatically.
+             */
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ASSERT_NOT, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            /* Save the context. */
+            context.backtrack_item->assert.text_start = context.text_start;
+            context.backtrack_item->assert.text_ptr = context.text_ptr;
+            context.backtrack_item->assert.pattern_ptr = context.pattern_ptr;
+            /* The assert can look at the entire text. */
+            context.text_start = state->beginning;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_ATOMIC:
+            /* Atomic subpattern. */
+            /* <ATOMIC> ... <END_ATOMIC> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /*
+             If the subpattern succeeds then we'll discard the enclosed
+             backtrack info, including any marks, so we need to save the marks
+             here.
+             */
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ATOMIC, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_BOUNDARY:
+            /* Boundary between word and non-word. */
+            /* <BOUNDARY> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (!SRE_AT_BOUNDARY(&context))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_BRANCH:
+        {
+            /* Alternation. */
+            /*
+             <BRANCH>
+             <skip to next>
+               ...
+               <JUMP> <skip to end>
+             <skip to next>
+               ...
+               <JUMP> <skip to end>
+             0
+             */
+            SRE_CODE* skip_ptr = context.pattern_ptr + 1;
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /* Look ahead in the branch to avoid unnecessary backtracking. */
+            while (! SRE_POSSIBLE_MATCH_AHEAD(&context, skip_ptr + 1)) {
+                skip_ptr += skip_ptr[0];
+                /* Is there another branch? */
+                if (skip_ptr[0] == 0)
+                    goto backtrack;
+            }
+            /* Try this branch. */
+            context.pattern_ptr = skip_ptr + 1;
+            /* Save the next branch, if any. */
+            skip_ptr += skip_ptr[0];
+            if (skip_ptr[0] != 0) {
+                result = SRE_SAVE_BACKTRACK(&context, SRE_OP_BRANCH, FALSE);
+                if (result != 0)
+                    return SRE_CLEANUP(&context, result);
+                /* Save the context for trying the next branch. */
+                context.backtrack_item->branch.text_ptr = context.text_ptr;
+                context.backtrack_item->branch.pattern_ptr = skip_ptr;
+            }
+            break;
+        }
+        case SRE_OP_CATEGORY:
+            /* Character in a certain category (forwards). */
+            /* <CATEGORY> <mask> */
+            TRACE(("|%p|%p|%s 0x%X\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_end ||
+              !encoding->in_category(context.pattern_ptr[1],
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_CATEGORY_REV:
+            /* Character in a certain category (backwards). */
+            /* <CATEGORY_REV> <mask> */
+            TRACE(("|%p|%p|%s 0x%X\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr <= context.text_start ||
+              !encoding->in_category(context.pattern_ptr[1],
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_CHARSET:
+            /* Character in a charset (forwards). */
+            /* <CHARSET> <skip> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              !in_charset(context.pattern_ptr + 2, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_IGNORE:
+            /* Character in a charset, ignoring case (forwards). */
+            /* <CHARSET_IGNORE> <skip> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              !in_charset_ignore(state, context.pattern_ptr + 2,
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_IGNORE_REV:
+            /* Character in a charset, ignoring case (backwards). */
+            /* <CHARSET_IGNORE_REV> <skip> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              !in_charset_ignore(state, context.pattern_ptr + 2,
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_REV:
+            /* Character in a charset (backwards). */
+            /* <CHARSET_REV> <skip> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              !in_charset(context.pattern_ptr + 2, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_END_ASSERT:
+            /* End of assert subpattern (+ve look-ahead/look-behind). */
+            /* <ASSERT> <skip to end> ... <END_ASSERT> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /*
+             The subpattern has succeeded, so discard all backtrack info in the
+             assertion.
+             */
+            SRE_DISCARD_UNTIL(&context, SRE_OP_ASSERT);
+            /* Restore the marks and context and continue matching. */
+            memmove(context.marks, context.backtrack_item->marks,
+              context.marks_size);
+            context.text_start = context.backtrack_item->assert.text_start;
+            context.text_ptr = context.backtrack_item->assert.text_ptr;
+            SRE_DISCARD_BACKTRACK(&context);
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_ASSERT_NOT:
+            /* End of assert not subpattern (-ve look-ahead/look-behind). */
+            /* <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /*
+             The subpattern has succeeded, so discard all backtrack info in the
+             assertion.
+             */
+            SRE_DISCARD_UNTIL(&context, SRE_OP_ASSERT_NOT);
+            /* Restore the context and marks and backtrack. */
+            memmove(context.marks, context.backtrack_item->marks,
+              context.marks_size);
+            context.text_start = context.backtrack_item->assert.text_start;
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        case SRE_OP_END_ATOMIC:
+            /* Atomic subpattern. */
+            /* <ATOMIC> <skip to end> ... <END_ATOMIC> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /*
+             The subpattern has succeeded, so discard all backtrack info in the
+             subpattern.
+             */
+            SRE_DISCARD_UNTIL(&context, SRE_OP_ATOMIC);
+            /*
+             Modify the backtrack info so that the marks will be restored if the
+             tail fails.
+             */
+            context.backtrack_item->op = SRE_OP_END_ATOMIC;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_OF_LINE:
+            /* End of line. */
+            /* <END_OF_LINE> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr < context.text_end &&
+              !encoding->in_category(SRE_CAT_LineBreak, context.text_ptr[0]))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_OF_STRING:
+            /* End of string. */
+            /* <END_OF_STRING> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr < context.text_end)
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_OF_STRING_LN:
+            /* End of string or final line. */
+            /* <END_OF_STRING_LN> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr < context.text_end &&
+              context.text_ptr != context.final_linebreak)
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_END_REPEAT_MAX:
+        case SRE_OP_END_REPEAT_MAX_REV:
+        {
+            /* End of greedy repeat. */
+            /*
+             <REPEAT_MAX> <skip to end> <min> <max>
+                 ...
+             <END_REPEAT_MAX> <skip to start>
+             */
+            BOOL forward = op == SRE_OP_END_REPEAT_MAX;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            BOOL consumed;
+            BOOL try_body;
+            BOOL try_tail;
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /* Point to the repeat and end-repeat operators. */
+            end_repeat_ptr = context.pattern_ptr;
+            repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = end_repeat_ptr + 2;
+
+            /* We've just matched the body again. */
+            ++current_loop->repeat.repeat_counter;
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /* How many times can we repeat the body? */
+            max_rep = unsigned_min(current_loop->repeat.repeat_max,
+              current_loop->repeat.repeat_counter + available);
+
+            /* Has the body consumed any characters this time? */
+            consumed = context.text_ptr != current_loop->repeat.repeat_start;
+
+            /*
+             If the body hasn't consumed any characters then it could continue
+             to repeat up to the maximum and then the tail could be tried.
+
+             If that's the case then we can just skip the pointless repeats and
+             go straight to the tail.
+             */
+            /* Should the body be tried again? */
+            try_body = consumed && current_loop->repeat.repeat_counter <
+              max_rep;
+            /* Should the tail be tried? */
+            try_tail = (consumed || current_loop->repeat.repeat_counter >=
+              current_loop->repeat.repeat_min) &&
+              SRE_POSSIBLE_MATCH_AHEAD(&context, tail);
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                     Both the body and the tail should be tried.
+
+                     The body takes precedence, so create a backtrack point for
+                     the tail.
+                     */
+                    result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, result);
+                    /* Save the context for trying the tail. */
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.pattern_ptr = tail;
+                    context.backtrack_item->repeat.loop = current_loop;
+                }
+                /* Try the body. */
+                current_loop->repeat.repeat_start = context.text_ptr;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    /*
+                     Only the tail should be tried, so do that.
+
+                     Now that we're about to try the tail we also need to make
+                     the enclosing loop the 'current' one.
+                     */
+                    current_loop = current_loop->repeat.loop;
+                    context.pattern_ptr = tail;
+                } else
+                    /*
+                     Neither the body and the tail should be tried, so
+                     backtrack.
+                     */
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_END_REPEAT_MIN:
+        case SRE_OP_END_REPEAT_MIN_REV:
+        {
+            /* End of lazy repeat. */
+            /*
+             <REPEAT_MIN> <skip to end> <min> <max>
+                 ...
+             <END_REPEAT_MIN> <skip to start>
+             */
+            BOOL forward = op == SRE_OP_END_REPEAT_MIN;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            BOOL consumed;
+            BOOL try_body;
+            BOOL try_tail;
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /* Point to the repeat and end-repeat operators. */
+            end_repeat_ptr = context.pattern_ptr;
+            repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = end_repeat_ptr + 2;
+
+            /* We've just matched the body again. */
+            ++current_loop->repeat.repeat_counter;
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /* How many times can we repeat the body? */
+            max_rep = unsigned_min(current_loop->repeat.repeat_max,
+              current_loop->repeat.repeat_counter + available);
+
+            /* Has the body consumed any characters this time? */
+            consumed = context.text_ptr != current_loop->repeat.repeat_start;
+
+            /*
+             If the body hasn't consumed any characters then it could continue
+             to repeat up to the minimum and then the tail could be tried.
+
+             If that's the case then we can just skip the pointless repeats and
+             go straight to the tail.
+             */
+            /* Should the body be tried again? */
+            try_body = consumed && current_loop->repeat.repeat_counter <
+              max_rep;
+            /* Should the tail be tried? */
+            try_tail = (consumed || current_loop->repeat.repeat_counter >=
+              current_loop->repeat.repeat_min) &&
+              SRE_POSSIBLE_MATCH_AHEAD(&context, tail);
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                     Both the body and the tail should be tried.
+
+                     The tail takes precedence, so create a backtrack point for
+                     the body.
+                     */
+                    result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, result);
+                    /* Save the context for trying the body. */
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.pattern_ptr = body;
+                    context.backtrack_item->repeat.loop = current_loop;
+                    /*
+                     Now that we're about to try the tail we also need to make
+                     the enclosing loop the 'current' one.
+                     */
+                    context.pattern_ptr = tail;
+                    current_loop = current_loop->repeat.loop;
+                } else {
+                    /* Only the body should be tried, so do that. */
+                    current_loop->repeat.repeat_start = context.text_ptr;
+                    context.pattern_ptr = body;
+                }
+            } else {
+                if (try_tail) {
+                    /*
+                     Only the tail should be tried, so do that.
+
+                     Now that we're about to try the tail we also need to make
+                     the enclosing loop the 'current' one.
+                     */
+                    current_loop = current_loop->repeat.loop;
+                    context.pattern_ptr = tail;
+                } else
+                    /*
+                     Neither the body and the tail should be tried, so
+                     backtrack.
+                     */
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_END_REPEAT_POSS:
+        case SRE_OP_END_REPEAT_POSS_REV:
+        {
+            /* End of greedy repeat. */
+            /*
+             <REPEAT_POSS> <skip to end> <min> <max>
+                 ...
+             <END_REPEAT_POSS> <skip to start>
+             */
+            BOOL forward = op == SRE_OP_END_REPEAT_POSS;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            BOOL consumed;
+            BOOL try_body;
+            BOOL try_tail;
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /* Point to the repeat and end-repeat operators. */
+            end_repeat_ptr = context.pattern_ptr;
+            repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = end_repeat_ptr + 2;
+
+            /* We've just matched the body again. */
+            ++current_loop->repeat.repeat_counter;
+
+            /* Discard all the backtrack info in the body we've just matched. */
+            SRE_DISCARD_UNTIL(&context, repeat_ptr[0]);
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /* How many times can we repeat the body? */
+            max_rep = unsigned_min(current_loop->repeat.repeat_max,
+              current_loop->repeat.repeat_counter + available);
+
+            /* Has the body consumed any characters this time? */
+            consumed = context.text_ptr != current_loop->repeat.repeat_start;
+
+            /*
+             If the body hasn't consumed any characters then it could continue
+             to repeat up to the maximum and then the tail could be tried.
+
+             If that's the case then we can just skip the pointless repeats and
+             go straight to the tail.
+             */
+            /* Should the body be tried again? */
+            try_body = consumed && current_loop->repeat.repeat_counter <
+              max_rep;
+            /* Should the tail be tried? */
+            try_tail = (consumed || current_loop->repeat.repeat_counter >=
+              current_loop->repeat.repeat_min) &&
+              SRE_POSSIBLE_MATCH_AHEAD(&context, tail);
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                     Both the body and the tail should be tried.
+
+                     The body takes precedence, so create a backtrack point for
+                     the tail.
+                     */
+                    result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, result);
+                    /* Save the context for trying the tail. */
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.pattern_ptr = tail;
+                    context.backtrack_item->repeat.loop = current_loop;
+                }
+                /* Try the body. */
+                current_loop->repeat.repeat_start = context.text_ptr;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail) {
+                    /*
+                     Only the tail should be tried, so do that.
+
+                     Now that we're about to try the tail we also need to make
+                     the enclosing loop the 'current' one.
+                     */
+                    current_loop = current_loop->repeat.loop;
+                    context.pattern_ptr = tail;
+                } else
+                    /*
+                     Neither the body and the tail should be tried, so
+                     backtrack.
+                     */
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_GROUPREF:
+        case SRE_OP_GROUPREF_REV:
+        {
+            /* Match against capture group. */
+            /* <GROUPREF> <group_index> */
+            BOOL forward = op == SRE_OP_GROUPREF;
+            unsigned int group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t available;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            /*
+             Get the captured group.
+
+             Here we're using a zero-based index. Note that _externally_
+             group 0 is the entire matched string.
+             */
+            group = context.pattern_ptr[1];
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            /* Is this group valid? */
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+
+            /* How long is the captured group? */
+            length = group_end - group_start;
+            /* Are there enough characters available for a match? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+            if (length > available)
+                goto backtrack;
+
+            if (!forward)
+                /* Skip to the start of the text we're about to match. */
+                context.text_ptr -= length;
+
+            /* Check whether the text here matches the group. */
+            i = 0;
+            while (i < length) {
+                if (context.text_ptr[i] != group_start[i])
+                    goto backtrack;
+                i++;
+            }
+
+            if (forward)
+                /* Skip over the text we've just matched. */
+                context.text_ptr += length;
+
+            context.pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_GROUPREF_IGNORE:
+        case SRE_OP_GROUPREF_IGNORE_REV:
+        {
+            /* Match against capture group, ignoring case. */
+            /* <GROUPREF_IGNORE> <group_index> */
+            BOOL forward = op == SRE_OP_GROUPREF_IGNORE;
+            unsigned int group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t available;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            /*
+             Get the captured group.
+
+             Here we're using a zero-based index. Note that _externally_
+             group 0 is the entire matched string.
+             */
+            group = context.pattern_ptr[1];
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            /* Is this group valid? */
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+
+            /* How long is the captured group? */
+            length = group_end - group_start;
+            /* Are there enough characters available for a match? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+            if (length > available)
+                goto backtrack;
+
+            if (!forward)
+                /* Skip to the start of the text we're about to match. */
+                context.text_ptr -= length;
+
+            /* Check whether the text here matches the group. */
+            i = 0;
+            while (i < length) {
+                if (!same_char_ignore(state, context.text_ptr[i],
+                  group_start[i]))
+                    goto backtrack;
+                i++;
+            }
+
+            if (forward)
+                /* Skip over the text we've just matched. */
+                context.text_ptr += length;
+
+            context.pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_GROUPREF_EXISTS:
+        {
+            /* Whether a capture group exists. */
+            /*
+             <GROUPREF_EXISTS> <group_index> <skip>
+                 code_yes
+             <JUMP> <skip>
+                 code_no
+             */
+            unsigned int group;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            /*
+             Get the captured group.
+
+             Here we're using a zero-based index. Note that _externally_
+             group 0 is the entire matched string.
+             */
+            group = context.pattern_ptr[1];
+            group_start = context.marks[group * 2];
+            group_end = context.marks[group * 2 + 1];
+            /* Is this group valid? */
+            if (group_start == NULL || group_start > group_end)
+                /*
+                 Skip to code_no, which might actually be the tail of the
+                 pattern.
+                 */
+                context.pattern_ptr += 1 + context.pattern_ptr[2];
+            else
+                /* Skip to code_yes. */
+                context.pattern_ptr += 3;
+            break;
+        }
+        case SRE_OP_JUMP:
+            /* Jump forward in the pattern. */
+            /* <JUMP> <offset> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
         case SRE_OP_LITERAL:
-            /* <LITERAL> <code> */
-            if (ch == set[0])
-                return ok;
-            set++;
-            break;
-
-        case SRE_OP_CATEGORY:
-            /* <CATEGORY> <code> */
-            if (sre_category(set[0], (int) ch))
-                return ok;
-            set += 1;
-            break;
-
-        case SRE_OP_CHARSET:
-            if (sizeof(SRE_CODE) == 2) {
-                /* <CHARSET> <bitmap> (16 bits per code word) */
-                if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
-                    return ok;
-                set += 16;
+            /* Character is not this literal (forwards). */
+            /* <LITERAL> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_end ||
+                context.text_ptr[0] != (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_IGNORE:
+            /* Character is this literal, ignoring case (forwards). */
+            /* <LITERAL_IGNORE> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_end ||
+              !same_char_ignore(state, context.text_ptr[0],
+              context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_IGNORE_REV:
+            /* Character is this literal, ignoring case (backwards). */
+            /* <LITERAL_IGNORE_REV> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr <= context.text_start ||
+              !same_char_ignore(state, context.text_ptr[-1],
+              context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_REV:
+            /* Character is this literal (backwards). */
+            /* <LITERAL_REV> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr <= context.text_start ||
+                context.text_ptr[-1] != (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_STRING:
+        case SRE_OP_LITERAL_STRING_REV:
+        {
+            /* Literal string. */
+            /* <LITERAL_STRING> <length> ... */
+            BOOL forward = op == SRE_OP_LITERAL_STRING;
+            Py_ssize_t available;
+            Py_ssize_t length = context.pattern_ptr[1];
+            SRE_CODE* literal = context.pattern_ptr + 2;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            /* Are there enough characters available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+            if (length > available)
+                goto backtrack;
+
+            if (!forward)
+                /* Skip to the start of the text we're about to match. */
+                context.text_ptr -= length;
+
+            /* Check whether the text here matches the literal. */
+            i = 0;
+            do {
+                if (context.text_ptr[i] != (SRE_CHAR)literal[i])
+                    goto backtrack;
+                i++;
             }
-            else {
-                /* <CHARSET> <bitmap> (32 bits per code word) */
-                if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
-                    return ok;
-                set += 8;
+            while (i < length);
+
+            if (forward)
+                /* Skip over the text we've just matched. */
+                context.text_ptr += length;
+
+            context.pattern_ptr = literal + length;
+            break;
+        }
+        case SRE_OP_LITERAL_STRING_IGNORE:
+        case SRE_OP_LITERAL_STRING_IGNORE_REV:
+        {
+            /* Literal string, ignoring case. */
+            /* <LITERAL_STRING_IGNORE> <length> ... */
+            BOOL forward = op == SRE_OP_LITERAL_STRING_IGNORE;
+            Py_ssize_t available;
+            Py_ssize_t length = context.pattern_ptr[1];
+            SRE_CODE* literal = context.pattern_ptr + 2;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            /* Are there enough characters available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+            if (length > available)
+                goto backtrack;
+
+            if (!forward)
+                /* Skip to the start of the text we're about to match. */
+                context.text_ptr -= length;
+
+            /* Check whether the text here matches the literal. */
+            i = 0;
+            do {
+                if (!same_char_ignore(state, context.text_ptr[i], literal[i]))
+                    goto backtrack;
+                i++;
             }
-            break;
-
+            while (i < length);
+
+            if (forward)
+                /* Skip over the text we've just matched. */
+                context.text_ptr += length;
+
+            context.pattern_ptr = literal + length;
+            break;
+        }
+        case SRE_OP_MARK:
+        {
+            /* Text mark. */
+            /* <MARK> <numbered_index> <named_index> */
+            unsigned int numbered_index = context.pattern_ptr[1];
+            unsigned int named_index = context.pattern_ptr[2];
+            SRE_BACKTRACK_ITEM* item;
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, numbered_index, named_index));
+
+            /*
+             Save the current position of the mark and set the mark to the
+             current text position.
+
+             Every capture group has a number; some also have names, which
+             might occur more than once in a regex. Therefore the marks have 2
+             ids, one for the number and the other for the name. If the mark is
+             for an unnamed group then the name id the same as the number id.
+             */
+            result = SRE_SAVE_BACKTRACK(&context, SRE_OP_MARK, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            item = context.backtrack_item;
+            /* The number id. */
+            item->mark.numbered_index = numbered_index;
+            item->mark.numbered_mark_ptr = context.marks[numbered_index];
+            context.marks[numbered_index] = context.text_ptr;
+            /* The name id. */
+            item->mark.named_index = named_index;
+            item->mark.named_mark_ptr = context.marks[named_index];
+            context.marks[named_index] = context.text_ptr;
+
+            context.pattern_ptr += 3;
+            break;
+        }
+        case SRE_OP_NOT_BOUNDARY:
+            /* Not a boundary between word and non-word. */
+            /* <NOT_BOUNDARY> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (SRE_AT_BOUNDARY(&context))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_NOT_CATEGORY:
+            /* Character not in a certain category (forwards). */
+            /* <NOT_CATEGORY> <mask> */
+            TRACE(("|%p|%p|%s 0x%X\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_end ||
+              state->encoding->in_category(context.pattern_ptr[1],
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_CATEGORY_REV:
+            /* Character not in a certain category (backwards). */
+            /* <NOT_CATEGORY_REV> <mask> */
+            TRACE(("|%p|%p|%s 0x%X\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_start ||
+              state->encoding->in_category(context.pattern_ptr[1],
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_CHARSET:
+            /* Character not in a charset (forwards). */
+            /* <NOT_CHARSET> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              in_charset(context.pattern_ptr + 2, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_IGNORE:
+            /* Character not in a charset,ignoring case (forwards). */
+            /* <NOT_CHARSET_IGNORE> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              in_charset_ignore(state, context.pattern_ptr + 2,
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_IGNORE_REV:
+            /* Character not in a charset,ignoring case (backwards). */
+            /* <NOT_CHARSET_IGNORE_REV> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              in_charset_ignore(state, context.pattern_ptr + 2,
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_REV:
+            /* Character not in a charset (backwards). */
+            /* <NOT_CHARSET_REV> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              in_charset(context.pattern_ptr + 2, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 1 + context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_LITERAL:
+            /* Character is not this literal (forwards). */
+            /* <NOT_LITERAL> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_end ||
+              context.text_ptr[0] == (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_IGNORE:
+            /* Character is not this literal, ignoring case (forwards). */
+            /* <NOT_LITERAL_IGNORE> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr >= context.text_end ||
+              same_char_ignore(state, context.text_ptr[0],
+              context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_IGNORE_REV:
+            /* Character is not this literal, ignoring case (backwards). */
+            /* <NOT_LITERAL_IGNORE_REV> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr <= context.text_start ||
+              same_char_ignore(state, context.text_ptr[-1],
+              context.pattern_ptr[1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_REV:
+            /* Character is not this literal (backwards). */
+            /* <NOT_LITERAL_REV> <character> */
+            TRACE(("|%p|%p|%s %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1]));
+
+            if (context.text_ptr <= context.text_start ||
+              context.text_ptr[-1] == (SRE_CHAR)context.pattern_ptr[1])
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_RANGE:
+            /* Character not in range (forwards). */
+            /* <NOT_RANGE> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr >= context.text_end ||
+              in_range(context.pattern_ptr[1], context.pattern_ptr[2],
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_IGNORE:
+            /* Character not in range, ignoring case (forwards). */
+            /* <NOT_RANGE_IGNORE> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr >= context.text_end ||
+              in_range_ignore(state, context.pattern_ptr[1],
+              context.pattern_ptr[2], context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_IGNORE_REV:
+            /* Character not in range, ignoring case (backwards). */
+            /* <NOT_RANGE_IGNORE_REV> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr <= context.text_start ||
+              in_range_ignore(state, context.pattern_ptr[1],
+              context.pattern_ptr[2], context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_REV:
+            /* Character not in range (backwards). */
+            /* <NOT_RANGE_REV> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr <= context.text_start ||
+              in_range(context.pattern_ptr[1], context.pattern_ptr[2],
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_SET:
+            /* Character not in set (forwards). */
+            /* <NOT_SET> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              in_set(state, context.pattern_ptr + 1, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_IGNORE:
+            /* Character not in set, ignoring case (forwards). */
+            /* <NOT_SET_IGNORE> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              in_set_ignore(state, context.pattern_ptr + 1,
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_IGNORE_REV:
+            /* Character not in set, ignoring case (backwards). */
+            /* <NOT_SET_IGNORE_REV> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              in_set_ignore(state, context.pattern_ptr + 1,
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_REV:
+            /* Character not in set (backwards). */
+            /* <NOT_SET_REV> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              in_set(state, context.pattern_ptr + 1, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
         case SRE_OP_RANGE:
+            /* Character in range (forwards). */
             /* <RANGE> <lower> <upper> */
-            if (set[0] <= ch && ch <= set[1])
-                return ok;
-            set += 2;
-            break;
-
-        case SRE_OP_NEGATE:
-            ok = !ok;
-            break;
-
-        case SRE_OP_BIGCHARSET:
-            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr >= context.text_end ||
+              !in_range(context.pattern_ptr[1], context.pattern_ptr[2],
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_IGNORE:
+            /* Character in range, ignoring case (forwards). */
+            /* <RANGE_IGNORE> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr >= context.text_end ||
+              !in_range_ignore(state, context.pattern_ptr[1],
+              context.pattern_ptr[2], context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_IGNORE_REV:
+            /* Character in range, ignoring case (backwards). */
+            /* <RANGE_IGNORE_REV> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr <= context.text_start ||
+              !in_range_ignore(state, context.pattern_ptr[1],
+              context.pattern_ptr[2], context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_REV:
+            /* Character in range (backwards). */
+            /* <RANGE_REV> <lower> <upper> */
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, context.pattern_ptr[1],
+              context.pattern_ptr[2]));
+
+            if (context.text_ptr <= context.text_start ||
+              !in_range(context.pattern_ptr[1], context.pattern_ptr[2],
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr += 3;
+            break;
+        case SRE_OP_REPEAT_MAX:
+        case SRE_OP_REPEAT_MAX_REV:
         {
-            Py_ssize_t count, block;
-            count = *(set++);
-
-            if (sizeof(SRE_CODE) == 2) {
-                block = ((unsigned char*)set)[ch >> 8];
-                set += 128;
-                if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
-                    return ok;
-                set += count*16;
+            /* Greedy repeat. */
+            /*
+             <REPEAT_MAX> <skip to end> <min> <max>
+                 ...
+             <END_REPEAT_MAX> <skip to start>
+             */
+            BOOL forward = op == SRE_OP_REPEAT_MAX;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            SRE_BACKTRACK_ITEM* new_loop;
+            BOOL try_body;
+            BOOL try_tail;
+
+            /* Point to the repeat and end-repeat operators. */
+            repeat_ptr = context.pattern_ptr;
+            end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, repeat_ptr[2], repeat_ptr[3]));
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = end_repeat_ptr + 2;
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /*
+             Are there enough characters available for the repeat?
+
+             The repeat should consume at least one character per iteration and
+             must iterate a minimum number of times.
+            */
+            if ((Py_ssize_t)repeat_ptr[2] > available)
+                goto backtrack;
+
+            /* How many times can we repeat the body? */
+            if (repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+
+            /*
+             Save the context and initialise the repeat info for the new repeat.
+            */
+            result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            new_loop = context.backtrack_item;
+            new_loop->repeat.repeat_min = repeat_ptr[2];
+            new_loop->repeat.repeat_max = max_rep;
+            new_loop->repeat.repeat_counter = 0;
+            new_loop->repeat.loop = current_loop;
+            new_loop->repeat.repeat_start = context.text_ptr;
+
+            /* Should the body be tried? */
+            try_body = available > 0;
+            /* Should the tail be tried? */
+            try_tail = new_loop->repeat.repeat_min == 0 &&
+              SRE_POSSIBLE_MATCH_AHEAD(&context, tail);
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                     Both the body and the tail should be tried.
+
+                     The body takes precedence, so create a backtrack point for
+                     the tail.
+                     */
+                    result = SRE_SAVE_BACKTRACK(&context, end_repeat_ptr[0],
+                      FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, result);
+                    /* Save the context for trying the tail. */
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.pattern_ptr = tail;
+                    context.backtrack_item->repeat.loop = new_loop;
+                }
+                /* Try the body. */
+                current_loop = new_loop;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail)
+                    /* Only the tail should be tried, so do that. */
+                    context.pattern_ptr = tail;
+                else
+                    /*
+                     Neither the body and the tail should be tried, so
+                     backtrack.
+                     */
+                    goto backtrack;
             }
-            else {
-                /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
-                 * warnings when c's type supports only numbers < N+1 */
-                if (!(ch & ~65535))
-                    block = ((unsigned char*)set)[ch >> 8];
+            break;
+        }
+        case SRE_OP_REPEAT_MIN:
+        case SRE_OP_REPEAT_MIN_REV:
+        {
+            /* Lazy repeat. */
+            /*
+             <REPEAT_MIN> <skip to end> <min> <max>
+                 ...
+             <END_REPEAT_MIN> <skip to start>
+             */
+            BOOL forward = op == SRE_OP_REPEAT_MIN;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            SRE_BACKTRACK_ITEM* new_loop;
+            BOOL try_body;
+            BOOL try_tail;
+
+            /* Point to the repeat and end-repeat operators. */
+            repeat_ptr = context.pattern_ptr;
+            end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, repeat_ptr[2], repeat_ptr[3]));
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = end_repeat_ptr + 2;
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /*
+             Are there enough characters available for the repeat?
+
+             The repeat should consume at least one character per iteration and
+             must iterate a minimum number of times.
+            */
+            if ((Py_ssize_t)repeat_ptr[2] > available)
+                goto backtrack;
+
+            /* How many times can we repeat the body? */
+            if (repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+
+            /*
+             Save the context and initialise the repeat info for the new repeat.
+            */
+            result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            new_loop = context.backtrack_item;
+            new_loop->repeat.repeat_min = repeat_ptr[2];
+            new_loop->repeat.repeat_max = max_rep;
+            new_loop->repeat.repeat_counter = 0;
+            new_loop->repeat.loop = current_loop;
+            new_loop->repeat.repeat_start = context.text_ptr;
+
+            /* Should the body be tried? */
+            try_body = available > 0;
+            /* Should the tail be tried? */
+            try_tail = new_loop->repeat.repeat_min == 0 &&
+              SRE_POSSIBLE_MATCH_AHEAD(&context, tail);
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                     Both the body and the tail should be tried.
+
+                     The tail takes precedence, so create a backtrack point for
+                     the body.
+                     */
+                    result = SRE_SAVE_BACKTRACK(&context, end_repeat_ptr[0],
+                      FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, result);
+                    /* Save the context for trying the body. */
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.pattern_ptr = body;
+                    context.backtrack_item->repeat.loop = new_loop;
+
+                    /* Try the tail. */
+                    context.pattern_ptr = tail;
+                } else {
+                    /* Try the body. */
+                    current_loop = new_loop;
+                    context.pattern_ptr = body;
+                }
+            } else {
+                if (try_tail)
+                    /* Only the tail should be tried, so do that. */
+                    context.pattern_ptr = tail;
                 else
-                    block = -1;
-                set += 64;
-                if (block >=0 &&
-                    (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
-                    return ok;
-                set += count*8;
+                    /*
+                     Neither the body and the tail should be tried, so
+                     backtrack.
+                     */
+                    goto backtrack;
             }
             break;
         }
-
-        default:
-            /* internal error -- there's not much we can do about it
-               here, so let's just pretend it didn't match... */
-            return 0;
-        }
-    }
-}
-
-LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
-
-LOCAL(Py_ssize_t)
-SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
-{
-    SRE_CODE chr;
-    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t i;
-
-    /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
-        end = ptr + maxcount;
-
-    switch (pattern[0]) {
-
-    case SRE_OP_IN:
-        /* repeated set */
-        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
-        while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
-            ptr++;
-        break;
-
-    case SRE_OP_ANY:
-        /* repeated dot wildcard. */
-        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
-        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
-            ptr++;
-        break;
-
-    case SRE_OP_ANY_ALL:
-        /* repeated dot wildcard.  skip to the end of the target
-           string, and backtrack from there */
-        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
-        ptr = end;
-        break;
-
-    case SRE_OP_LITERAL:
-        /* repeated literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) *ptr == chr)
-            ptr++;
-        break;
-
-    case SRE_OP_LITERAL_IGNORE:
-        /* repeated literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
-            ptr++;
-        break;
-
-    case SRE_OP_NOT_LITERAL:
-        /* repeated non-literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) *ptr != chr)
-            ptr++;
-        break;
-
-    case SRE_OP_NOT_LITERAL_IGNORE:
-        /* repeated non-literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
-            ptr++;
-        break;
-
-    default:
-        /* repeated single character pattern */
-        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
-        while ((SRE_CHAR*) state->ptr < end) {
-            i = SRE_MATCH(state, pattern);
-            if (i < 0)
-                return i;
-            if (!i)
-                break;
-        }
-        TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
-               (SRE_CHAR*) state->ptr - ptr));
-        return (SRE_CHAR*) state->ptr - ptr;
-    }
-
-    TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
-    return ptr - (SRE_CHAR*) state->ptr;
-}
-
-#if 0 /* not used in this release */
-LOCAL(int)
-SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
-{
-    /* check if an SRE_OP_INFO block matches at the current position.
-       returns the number of SRE_CODE objects to skip if successful, 0
-       if no match */
-
-    SRE_CHAR* end = state->end;
-    SRE_CHAR* ptr = state->ptr;
-    Py_ssize_t i;
-
-    /* check minimal length */
-    if (pattern[3] && (end - ptr) < pattern[3])
-        return 0;
-
-    /* check known prefix */
-    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
-        /* <length> <skip> <prefix data> <overlap data> */
-        for (i = 0; i < pattern[5]; i++)
-            if ((SRE_CODE) ptr[i] != pattern[7 + i])
-                return 0;
-        return pattern[0] + 2 * pattern[6];
-    }
-    return pattern[0];
-}
-#endif
-
-/* The macros below should be used to protect recursive SRE_MATCH()
- * calls that *failed* and do *not* return immediately (IOW, those
- * that will backtrack). Explaining:
- *
- * - Recursive SRE_MATCH() returned true: that's usually a success
- *   (besides atypical cases like ASSERT_NOT), therefore there's no
- *   reason to restore lastmark;
- *
- * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
- *   is returning to the caller: If the current SRE_MATCH() is the
- *   top function of the recursion, returning false will be a matching
- *   failure, and it doesn't matter where lastmark is pointing to.
- *   If it's *not* the top function, it will be a recursive SRE_MATCH()
- *   failure by itself, and the calling SRE_MATCH() will have to deal
- *   with the failure by the same rules explained here (it will restore
- *   lastmark by itself if necessary);
- *
- * - Recursive SRE_MATCH() returned false, and will continue the
- *   outside 'for' loop: must be protected when breaking, since the next
- *   OP could potentially depend on lastmark;
- *
- * - Recursive SRE_MATCH() returned false, and will be called again
- *   inside a local for/while loop: must be protected between each
- *   loop iteration, since the recursive SRE_MATCH() could do anything,
- *   and could potentially depend on lastmark.
- *
- * For more information, check the discussion at SF patch #712900.
- */
-#define LASTMARK_SAVE()     \
-    do { \
-        ctx->lastmark = state->lastmark; \
-        ctx->lastindex = state->lastindex; \
-    } while (0)
-#define LASTMARK_RESTORE()  \
-    do { \
-        state->lastmark = ctx->lastmark; \
-        state->lastindex = ctx->lastindex; \
-    } while (0)
-
-#define RETURN_ERROR(i) do { return i; } while(0)
-#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
-#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
-
-#define RETURN_ON_ERROR(i) \
-    do { if (i < 0) RETURN_ERROR(i); } while (0)
-#define RETURN_ON_SUCCESS(i) \
-    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
-#define RETURN_ON_FAILURE(i) \
-    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
-
-#define SFY(x) #x
-
-#define DATA_STACK_ALLOC(state, type, ptr) \
-do { \
-    alloc_pos = state->data_stack_base; \
-    TRACE(("allocating %s in %d (%d)\n", \
-           SFY(type), alloc_pos, sizeof(type))); \
-    if (state->data_stack_size < alloc_pos+sizeof(type)) { \
-        int j = data_stack_grow(state, sizeof(type)); \
-        if (j < 0) return j; \
-        if (ctx_pos != -1) \
-            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
-    } \
-    ptr = (type*)(state->data_stack+alloc_pos); \
-    state->data_stack_base += sizeof(type); \
-} while (0)
-
-#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
-do { \
-    TRACE(("looking up %s at %d\n", SFY(type), pos)); \
-    ptr = (type*)(state->data_stack+pos); \
-} while (0)
-
-#define DATA_STACK_PUSH(state, data, size) \
-do { \
-    TRACE(("copy data in %p to %d (%d)\n", \
-           data, state->data_stack_base, size)); \
-    if (state->data_stack_size < state->data_stack_base+size) { \
-        int j = data_stack_grow(state, size); \
-        if (j < 0) return j; \
-        if (ctx_pos != -1) \
-            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
-    } \
-    memcpy(state->data_stack+state->data_stack_base, data, size); \
-    state->data_stack_base += size; \
-} while (0)
-
-#define DATA_STACK_POP(state, data, size, discard) \
-do { \
-    TRACE(("copy data to %p from %d (%d)\n", \
-           data, state->data_stack_base-size, size)); \
-    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
-    if (discard) \
-        state->data_stack_base -= size; \
-} while (0)
-
-#define DATA_STACK_POP_DISCARD(state, size) \
-do { \
-    TRACE(("discard data from %d (%d)\n", \
-           state->data_stack_base-size, size)); \
-    state->data_stack_base -= size; \
-} while(0)
-
-#define DATA_PUSH(x) \
-    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
-#define DATA_POP(x) \
-    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
-#define DATA_POP_DISCARD(x) \
-    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
-#define DATA_ALLOC(t,p) \
-    DATA_STACK_ALLOC(state, t, p)
-#define DATA_LOOKUP_AT(t,p,pos) \
-    DATA_STACK_LOOKUP_AT(state,t,p,pos)
-
-#define MARK_PUSH(lastmark) \
-    do if (lastmark > 0) { \
-        i = lastmark; /* ctx->lastmark may change if reallocated */ \
-        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
-    } while (0)
-#define MARK_POP(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
-    } while (0)
-#define MARK_POP_KEEP(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
-    } while (0)
-#define MARK_POP_DISCARD(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
-    } while (0)
-
-#define JUMP_NONE            0
-#define JUMP_MAX_UNTIL_1     1
-#define JUMP_MAX_UNTIL_2     2
-#define JUMP_MAX_UNTIL_3     3
-#define JUMP_MIN_UNTIL_1     4
-#define JUMP_MIN_UNTIL_2     5
-#define JUMP_MIN_UNTIL_3     6
-#define JUMP_REPEAT          7
-#define JUMP_REPEAT_ONE_1    8
-#define JUMP_REPEAT_ONE_2    9
-#define JUMP_MIN_REPEAT_ONE  10
-#define JUMP_BRANCH          11
-#define JUMP_ASSERT          12
-#define JUMP_ASSERT_NOT      13
-
-#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
-    DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
-    nextctx->last_ctx_pos = ctx_pos; \
-    nextctx->jump = jumpvalue; \
-    nextctx->pattern = nextpattern; \
-    ctx_pos = alloc_pos; \
-    ctx = nextctx; \
-    goto entrance; \
-    jumplabel: \
-    while (0) /* gcc doesn't like labels at end of scopes */ \
-
-typedef struct {
-    Py_ssize_t last_ctx_pos;
-    Py_ssize_t jump;
-    SRE_CHAR* ptr;
-    SRE_CODE* pattern;
-    Py_ssize_t count;
-    Py_ssize_t lastmark;
-    Py_ssize_t lastindex;
-    union {
-        SRE_CODE chr;
-        SRE_REPEAT* rep;
-    } u;
-} SRE_MATCH_CONTEXT;
-
-/* check if string matches the given pattern.  returns <0 for
-   error, 0 for failure, and 1 for success */
-LOCAL(Py_ssize_t)
-SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
-{
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t alloc_pos, ctx_pos = -1;
-    Py_ssize_t i, ret = 0;
-    Py_ssize_t jump;
-    unsigned int sigcount=0;
-
-    SRE_MATCH_CONTEXT* ctx;
-    SRE_MATCH_CONTEXT* nextctx;
-
-    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
-
-    DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
-    ctx->last_ctx_pos = -1;
-    ctx->jump = JUMP_NONE;
-    ctx->pattern = pattern;
-    ctx_pos = alloc_pos;
-
-entrance:
-
-    ctx->ptr = (SRE_CHAR *)state->ptr;
-
-    if (ctx->pattern[0] == SRE_OP_INFO) {
-        /* optimization info block */
-        /* <INFO> <1=skip> <2=flags> <3=min> ... */
-        if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
-            TRACE(("reject (got %d chars, need %d)\n",
-                   (end - ctx->ptr), ctx->pattern[3]));
-            RETURN_FAILURE;
-        }
-        ctx->pattern += ctx->pattern[1] + 1;
-    }
-
-    for (;;) {
-        ++sigcount;
-        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
-            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
-
-        switch (*ctx->pattern++) {
-
-        case SRE_OP_MARK:
-            /* set mark */
-            /* <MARK> <gid> */
-            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            if (i & 1)
-                state->lastindex = i/2 + 1;
-            if (i > state->lastmark) {
-                /* state->lastmark is the highest valid index in the
-                   state->mark array.  If it is increased by more than 1,
-                   the intervening marks must be set to NULL to signal
-                   that these marks have not been encountered. */
-                Py_ssize_t j = state->lastmark + 1;
-                while (j < i)
-                    state->mark[j++] = NULL;
-                state->lastmark = i;
+        case SRE_OP_REPEAT_ONE_MAX:
+        case SRE_OP_REPEAT_ONE_MAX_REV:
+        {
+            /* Greedy repeat. */
+            /*
+             <REPEAT_ONE_MAX> <skip to end> <min> <max>
+                 ...
+             */
+            BOOL forward = op == SRE_OP_REPEAT_ONE_MAX;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* min_ptr;
+            SRE_CHAR* max_ptr;
+            BOOL ok;
+
+            /* Point to the repeat operator. */
+            repeat_ptr = context.pattern_ptr;
+
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, repeat_ptr[2], repeat_ptr[3]));
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = repeat_ptr + 1 + repeat_ptr[1];
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /*
+             Are there enough characters available for the repeat?
+
+             The repeat should consume one character per iteration and must
+             iterate a minimum number of times.
+            */
+            if ((Py_ssize_t)repeat_ptr[2] > available)
+                goto backtrack;
+
+            /* How many times can we repeat the body? */
+            if (repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+
+            start_ptr = context.text_ptr;
+            if (forward) {
+                min_ptr = start_ptr + repeat_ptr[2];
+                max_ptr = start_ptr + max_rep;
+            } else {
+                min_ptr = start_ptr - repeat_ptr[2];
+                max_ptr = start_ptr - max_rep;
             }
-            state->mark[i] = ctx->ptr;
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_LITERAL:
-            /* match literal string */
-            /* <LITERAL> <code> */
-            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_NOT_LITERAL:
-            /* match anything that is not literal character */
-            /* <NOT_LITERAL> <code> */
-            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
+
+            /* Match up to the maximum. */
+            SRE_MATCH_MANY(&context, max_ptr, body);
+
+            /* Unmatch down to the minimum until the tail could match. */
+            if (forward)
+                ok = SRE_UNMATCH_UNTIL_TAIL(&context, min_ptr, tail);
+            else
+                ok = SRE_UNMATCH_UNTIL_TAIL_REV(&context, min_ptr, tail);
+            if (!ok)
+                /* Reached the minimum and the tail still couldn't match. */
+                goto backtrack;
+
+            /*
+             Save the context and initialise the repeat info for the new repeat
+             unless we're already at the minimum.
+            */
+            if (context.text_ptr != min_ptr) {
+                SRE_BACKTRACK_ITEM* new_loop;
+
+                result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+                if (result != 0)
+                    return SRE_CLEANUP(&context, result);
+                new_loop = context.backtrack_item;
+                new_loop->repeat.repeat_min = repeat_ptr[2];
+                new_loop->repeat.repeat_max = max_rep;
+                if (forward)
+                    new_loop->repeat.repeat_counter = context.text_ptr -
+                      start_ptr;
+                else
+                    new_loop->repeat.repeat_counter = start_ptr -
+                      context.text_ptr;
+                new_loop->repeat.pattern_ptr = repeat_ptr;
+                new_loop->repeat.loop = current_loop;
+                new_loop->repeat.repeat_start = start_ptr;
+            }
+
+            /* Now match the tail. */
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_MIN:
+        case SRE_OP_REPEAT_ONE_MIN_REV:
+        {
+            /* Lazy repeat. */
+            /*
+             <REPEAT_ONE_MIN> <skip to end> <min> <max>
+                 ...
+             */
+            BOOL forward = op == SRE_OP_REPEAT_ONE_MIN;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* min_ptr;
+            SRE_CHAR* max_ptr;
+            BOOL ok;
+
+            /* Point to the repeat operator. */
+            repeat_ptr = context.pattern_ptr;
+
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, repeat_ptr[2], repeat_ptr[3]));
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = repeat_ptr + 1 + repeat_ptr[1];
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /*
+             Are there enough characters available for the repeat?
+
+             The repeat should consume one character per iteration and must
+             iterate a minimum number of times.
+            */
+            if ((Py_ssize_t)repeat_ptr[2] > available)
+                goto backtrack;
+
+            /* How many times can we repeat the body? */
+            if (repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+
+            start_ptr = context.text_ptr;
+            if (forward)
+                min_ptr = start_ptr + repeat_ptr[2];
+            else
+                min_ptr = start_ptr - repeat_ptr[2];
+
+            /* Match up to the minimum. */
+            SRE_MATCH_MANY(&context, min_ptr, body);
+
+            /* Matched at least the minimum? */
+            if (forward)
+                ok = context.text_ptr >= min_ptr;
+            else
+                ok = context.text_ptr <= min_ptr;
+            if (!ok)
+                goto backtrack;
+
+            /* Match until the tail could match, up to the maximum. */
+            if (forward)
+                max_ptr = start_ptr + max_rep;
+            else
+                max_ptr = start_ptr - max_rep;
+            if(!SRE_MATCH_UNTIL_TAIL(&context, max_ptr, body, tail))
+                /* Reached the maximum and the tail still couldn't match. */
+                goto backtrack;
+
+            /*
+             Save the context and initialise the repeat info for the new repeat
+             unless we're already at the maximum.
+            */
+            if (context.text_ptr != max_ptr)
+            {
+                SRE_BACKTRACK_ITEM* new_loop;
+
+                result = SRE_SAVE_BACKTRACK(&context, op, FALSE);
+                if (result != 0)
+                    return SRE_CLEANUP(&context, result);
+                new_loop = context.backtrack_item;
+                new_loop->repeat.repeat_min = repeat_ptr[2];
+                new_loop->repeat.repeat_max = max_rep;
+                if (forward)
+                    new_loop->repeat.repeat_counter = context.text_ptr -
+                      start_ptr;
+                else
+                    new_loop->repeat.repeat_counter = start_ptr -
+                      context.text_ptr;
+                new_loop->repeat.pattern_ptr = repeat_ptr;
+                new_loop->repeat.loop = current_loop;
+                new_loop->repeat.repeat_start = start_ptr;
+            }
+
+            /* Now match the tail. */
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_ONE_POSS:
+        case SRE_OP_REPEAT_ONE_POSS_REV:
+        {
+            /* Possessive repeat. */
+            /*
+             <REPEAT_ONE_POSS> <skip to end> <min> <max>
+                 ...
+             */
+            BOOL forward = op == SRE_OP_REPEAT_ONE_POSS;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* min_ptr;
+            SRE_CHAR* max_ptr;
+            BOOL ok;
+
+            /* Point to the repeat operator. */
+            repeat_ptr = context.pattern_ptr;
+
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, repeat_ptr[2], repeat_ptr[3]));
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = repeat_ptr + 1 + repeat_ptr[1];
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /*
+             Are there enough characters available for the repeat?
+
+             The repeat should consume one character per iteration and must
+             iterate a minimum number of times.
+            */
+            if ((Py_ssize_t)repeat_ptr[2] > available)
+                goto backtrack;
+
+            /* How many times can we repeat the body? */
+            if (repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+
+            start_ptr = context.text_ptr;
+            if (forward) {
+                min_ptr = start_ptr + repeat_ptr[2];
+                max_ptr = start_ptr + max_rep;
+            } else {
+                min_ptr = start_ptr - repeat_ptr[2];
+                max_ptr = start_ptr - max_rep;
+            }
+
+            /* Match up to the maximum. */
+            SRE_MATCH_MANY(&context, max_ptr, body);
+
+            /* Matched at least the minimum? */
+            if (forward)
+                ok = context.text_ptr >= min_ptr;
+            else
+                ok = context.text_ptr <= min_ptr;
+            if (!ok)
+                goto backtrack;
+
+            /* Now match the tail. */
+            context.pattern_ptr = tail;
+            break;
+        }
+        case SRE_OP_REPEAT_POSS:
+        case SRE_OP_REPEAT_POSS_REV:
+        {
+            /* Possessive repeat. */
+            /*
+             <REPEAT_POSS> <skip to end> <min> <max>
+                 ...
+             <END_REPEAT_POSS> <skip to start>
+             */
+            BOOL forward = op == SRE_OP_REPEAT_POSS;
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body;
+            SRE_CODE* tail;
+            Py_ssize_t available;
+            Py_ssize_t max_rep;
+            SRE_BACKTRACK_ITEM* new_loop;
+            BOOL try_body;
+            BOOL try_tail;
+
+            /* Point to the repeat and end-repeat operators. */
+            repeat_ptr = context.pattern_ptr;
+            end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+
+            TRACE(("|%p|%p|%s %u %u\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name, repeat_ptr[2], repeat_ptr[3]));
+
+            /* Point to the body of the repeat and the tail of the pattern. */
+            body = repeat_ptr + 4;
+            tail = end_repeat_ptr + 2;
+
+            /* How many characters are still available? */
+            if (forward)
+                available = context.text_end - context.text_ptr;
+            else
+                available = context.text_ptr - context.text_start;
+
+            /*
+             Are there enough characters available for the repeat?
+
+             The repeat should consume at least one character per iteration and
+             must iterate a minimum number of times.
+            */
+            if ((Py_ssize_t)repeat_ptr[2] > available)
+                goto backtrack;
+
+            /* How many times can we repeat the body? */
+            if (repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+
+            /*
+             Save the context and initialise the repeat info for the new repeat.
+
+             If the body succeeds then we'll discard its backtrack info,
+             including any marks, so we need to save the marks here in case the
+             tail fails.
+             */
+            result = SRE_SAVE_BACKTRACK(&context, op, TRUE);
+            if (result != 0)
+                return SRE_CLEANUP(&context, result);
+            new_loop = context.backtrack_item;
+            new_loop->repeat.repeat_min = repeat_ptr[2];
+            new_loop->repeat.repeat_max = max_rep;
+            new_loop->repeat.repeat_counter = 0;
+            new_loop->repeat.loop = current_loop;
+            new_loop->repeat.repeat_start = context.text_ptr;
+
+            /* Should the body be tried? */
+            try_body = available > 0;
+            /* Should the tail be tried? */
+            try_tail = new_loop->repeat.repeat_min == 0 &&
+              SRE_POSSIBLE_MATCH_AHEAD(&context, tail);
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                     Both the body and the tail should be tried.
+
+                     The body takes precedence, so create a backtrack point for
+                     the tail.
+                     */
+                    result = SRE_SAVE_BACKTRACK(&context, end_repeat_ptr[0],
+                      FALSE);
+                    if (result != 0)
+                        return SRE_CLEANUP(&context, result);
+                    /* Save the context for trying the tail. */
+                    context.backtrack_item->repeat.text_ptr = context.text_ptr;
+                    context.backtrack_item->repeat.pattern_ptr = tail;
+                    context.backtrack_item->repeat.loop = new_loop;
+                }
+                /* Try the body. */
+                current_loop = new_loop;
+                context.pattern_ptr = body;
+            } else {
+                if (try_tail)
+                    /* Only the tail should be tried, so do that. */
+                    context.pattern_ptr = tail;
+                else
+                    /*
+                     Neither the body and the tail should be tried, so
+                     backtrack.
+                     */
+                    goto backtrack;
+            }
+            break;
+        }
+        case SRE_OP_SET:
+            /* Character in set (forwards). */
+            /* <SET> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              !in_set(state, context.pattern_ptr + 1, context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_SET_IGNORE:
+            /* Character in set, ignoring case (forwards). */
+            /* <SET_IGNORE> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr >= context.text_end ||
+              !in_set_ignore(state, context.pattern_ptr + 1,
+              context.text_ptr[0]))
+                goto backtrack;
+            context.text_ptr++;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_SET_IGNORE_REV:
+            /* Character in set, ignoring case (backwards). */
+            /* <SET_IGNORE_REV> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              !in_set_ignore(state, context.pattern_ptr + 1,
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_SET_REV:
+            /* Character in set (backwards). */
+            /* <SET_REV> <set> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr <= context.text_start ||
+              !in_set(state, context.pattern_ptr + 1, context.text_ptr[-1]))
+                goto backtrack;
+            context.text_ptr--;
+            context.pattern_ptr = context.pattern_ptr + 1 +
+              context.pattern_ptr[1];
+            break;
+        case SRE_OP_START_OF_LINE:
+            /* Start of line. */
+            /* <START_OF_LINE> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr > context.text_beginning &&
+              !state->encoding->in_category(SRE_CAT_LineBreak,
+              context.text_ptr[-1]))
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_START_OF_SEARCH:
+            /* Start of search. */
+            /* <START_OF_SEARCH> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr != context.search_ptr)
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
+        case SRE_OP_START_OF_STRING:
+            /* Start of string. */
+            /* <START_OF_STRING> */
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            if (context.text_ptr > context.text_beginning)
+                goto backtrack;
+            context.pattern_ptr++;
+            break;
         case SRE_OP_SUCCESS:
-            /* end of pattern */
-            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
-            state->ptr = ctx->ptr;
-            RETURN_SUCCESS;
-
-        case SRE_OP_AT:
-            /* match at given position */
-            /* <AT> <code> */
-            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
-            if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_CATEGORY:
-            /* match at given category */
-            /* <CATEGORY> <code> */
-            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_ANY:
-            /* match anything (except a newline) */
-            /* <ANY> */
-            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
-                RETURN_FAILURE;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_ANY_ALL:
-            /* match anything */
-            /* <ANY_ALL> */
-            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end)
-                RETURN_FAILURE;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_IN:
-            /* match set member (or non_member) */
-            /* <IN> <skip> <set> */
-            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
-                RETURN_FAILURE;
-            ctx->pattern += ctx->pattern[0];
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_LITERAL_IGNORE:
-            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
-                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
-            if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_NOT_LITERAL_IGNORE:
-            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
-                   ctx->pattern, ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_IN_IGNORE:
-            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end
-                || !SRE_CHARSET(ctx->pattern+1,
-                                (SRE_CODE)state->lower(*ctx->ptr)))
-                RETURN_FAILURE;
-            ctx->pattern += ctx->pattern[0];
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_JUMP:
-        case SRE_OP_INFO:
-            /* jump forward */
-            /* <JUMP> <offset> */
-            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_BRANCH:
-            /* alternation */
-            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
-            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
-            LASTMARK_SAVE();
-            ctx->u.rep = state->repeat;
-            if (ctx->u.rep)
-                MARK_PUSH(ctx->lastmark);
-            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
-                if (ctx->pattern[1] == SRE_OP_LITERAL &&
-                    (ctx->ptr >= end ||
-                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
-                    continue;
-                if (ctx->pattern[1] == SRE_OP_IN &&
-                    (ctx->ptr >= end ||
-                     !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
-                    continue;
-                state->ptr = ctx->ptr;
-                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
-                if (ret) {
-                    if (ctx->u.rep)
-                        MARK_POP_DISCARD(ctx->lastmark);
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                if (ctx->u.rep)
-                    MARK_POP_KEEP(ctx->lastmark);
-                LASTMARK_RESTORE();
-            }
-            if (ctx->u.rep)
-                MARK_POP_DISCARD(ctx->lastmark);
-            RETURN_FAILURE;
-
-        case SRE_OP_REPEAT_ONE:
-            /* match repeated sequence (maximizing regexp) */
-
-            /* this operator only works if the repeated item is
-               exactly one character wide, and we're not already
-               collecting backtracking points.  for other cases,
-               use the MAX_REPEAT operator */
-
-            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
-
-            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            if (ctx->ptr + ctx->pattern[1] > end)
-                RETURN_FAILURE; /* cannot match */
-
-            state->ptr = ctx->ptr;
-
-            ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
-            RETURN_ON_ERROR(ret);
-            DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-            ctx->count = ret;
-            ctx->ptr += ctx->count;
-
-            /* when we arrive here, count contains the number of
-               matches, and ctx->ptr points to the tail of the target
-               string.  check if the rest of the pattern matches,
-               and backtrack if not. */
-
-            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
-                RETURN_FAILURE;
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
-                /* tail is empty.  we're finished */
-                state->ptr = ctx->ptr;
-                RETURN_SUCCESS;
-            }
-
-            LASTMARK_SAVE();
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
-                /* tail starts with a literal. skip positions where
-                   the rest of the pattern cannot possibly match */
-                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
-                for (;;) {
-                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
-                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
-                        ctx->ptr--;
-                        ctx->count--;
-                    }
-                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
-                        break;
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-
-                    LASTMARK_RESTORE();
-
-                    ctx->ptr--;
-                    ctx->count--;
-                }
-
-            } else {
-                /* general case */
-                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-                    ctx->ptr--;
-                    ctx->count--;
-                    LASTMARK_RESTORE();
-                }
-            }
-            RETURN_FAILURE;
-
-        case SRE_OP_MIN_REPEAT_ONE:
-            /* match repeated sequence (minimizing regexp) */
-
-            /* this operator only works if the repeated item is
-               exactly one character wide, and we're not already
-               collecting backtracking points.  for other cases,
-               use the MIN_REPEAT operator */
-
-            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
-
-            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            if (ctx->ptr + ctx->pattern[1] > end)
-                RETURN_FAILURE; /* cannot match */
-
-            state->ptr = ctx->ptr;
-
-            if (ctx->pattern[1] == 0)
-                ctx->count = 0;
-            else {
-                /* count using pattern min as the maximum */
-                ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
-                RETURN_ON_ERROR(ret);
-                DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-                if (ret < (Py_ssize_t) ctx->pattern[1])
-                    /* didn't match minimum number of times */
-                    RETURN_FAILURE;
-                /* advance past minimum matches of repeat */
-                ctx->count = ret;
-                ctx->ptr += ctx->count;
-            }
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
-                /* tail is empty.  we're finished */
-                state->ptr = ctx->ptr;
-                RETURN_SUCCESS;
-
-            } else {
-                /* general case */
-                LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
-                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-                    state->ptr = ctx->ptr;
-                    ret = SRE_COUNT(state, ctx->pattern+3, 1);
-                    RETURN_ON_ERROR(ret);
-                    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-                    if (ret == 0)
-                        break;
-                    assert(ret == 1);
-                    ctx->ptr++;
-                    ctx->count++;
-                    LASTMARK_RESTORE();
-                }
-            }
-            RETURN_FAILURE;
-
-        case SRE_OP_REPEAT:
-            /* create repeat context.  all the hard work is done
-               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
-            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
-            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            /* install new repeat context */
-            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
-            if (!ctx->u.rep) {
-                PyErr_NoMemory();
-                RETURN_FAILURE;
-            }
-            ctx->u.rep->count = -1;
-            ctx->u.rep->pattern = ctx->pattern;
-            ctx->u.rep->prev = state->repeat;
-            ctx->u.rep->last_ptr = NULL;
-            state->repeat = ctx->u.rep;
-
-            state->ptr = ctx->ptr;
-            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
-            state->repeat = ctx->u.rep->prev;
-            PyObject_FREE(ctx->u.rep);
-
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
-            }
-            RETURN_FAILURE;
-
-        case SRE_OP_MAX_UNTIL:
-            /* maximizing repeat */
-            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
-
-            /* FIXME: we probably need to deal with zero-width
-               matches in here... */
-
-            ctx->u.rep = state->repeat;
-            if (!ctx->u.rep)
-                RETURN_ERROR(SRE_ERROR_STATE);
-
-            state->ptr = ctx->ptr;
-
-            ctx->count = ctx->u.rep->count+1;
-
-            TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
-                   ctx->ptr, ctx->count));
-
-            if (ctx->count < ctx->u.rep->pattern[1]) {
-                /* not enough matches */
-                ctx->u.rep->count = ctx->count;
-                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
-                        ctx->u.rep->pattern+3);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-                RETURN_FAILURE;
-            }
-
-            if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
-                state->ptr != ctx->u.rep->last_ptr) {
-                /* we may have enough matches, but if we can
-                   match another item, do so */
-                ctx->u.rep->count = ctx->count;
-                LASTMARK_SAVE();
-                MARK_PUSH(ctx->lastmark);
-                /* zero-width match protection */
-                DATA_PUSH(&ctx->u.rep->last_ptr);
-                ctx->u.rep->last_ptr = state->ptr;
-                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
-                        ctx->u.rep->pattern+3);
-                DATA_POP(&ctx->u.rep->last_ptr);
-                if (ret) {
-                    MARK_POP_DISCARD(ctx->lastmark);
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                MARK_POP(ctx->lastmark);
-                LASTMARK_RESTORE();
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-            }
-
-            /* cannot match more repeated items here.  make sure the
-               tail matches */
-            state->repeat = ctx->u.rep->prev;
-            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
-            RETURN_ON_SUCCESS(ret);
-            state->repeat = ctx->u.rep;
-            state->ptr = ctx->ptr;
-            RETURN_FAILURE;
-
-        case SRE_OP_MIN_UNTIL:
-            /* minimizing repeat */
-            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
-
-            ctx->u.rep = state->repeat;
-            if (!ctx->u.rep)
-                RETURN_ERROR(SRE_ERROR_STATE);
-
-            state->ptr = ctx->ptr;
-
-            ctx->count = ctx->u.rep->count+1;
-
-            TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
-                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
-
-            if (ctx->count < ctx->u.rep->pattern[1]) {
-                /* not enough matches */
-                ctx->u.rep->count = ctx->count;
-                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
-                        ctx->u.rep->pattern+3);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-                RETURN_FAILURE;
-            }
-
-            LASTMARK_SAVE();
-
-            /* see if the tail matches */
-            state->repeat = ctx->u.rep->prev;
-            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
-            }
-
-            state->repeat = ctx->u.rep;
-            state->ptr = ctx->ptr;
-
-            LASTMARK_RESTORE();
-
-            if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
-                RETURN_FAILURE;
-
-            ctx->u.rep->count = ctx->count;
-            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
-                    ctx->u.rep->pattern+3);
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
-            }
-            ctx->u.rep->count = ctx->count-1;
-            state->ptr = ctx->ptr;
-            RETURN_FAILURE;
-
-        case SRE_OP_GROUPREF:
-            /* match backreference */
-            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    RETURN_FAILURE;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p)
-                        RETURN_FAILURE;
-                    while (p < e) {
-                        if (ctx->ptr >= end || *ctx->ptr != *p)
-                            RETURN_FAILURE;
-                        p++; ctx->ptr++;
+        {
+            /* Success at the end of the pattern. */
+            /* <SUCCESS> */
+            BOOL zero_width;
+            unsigned int m;
+            SRE_CHAR* end_ptr;
+            unsigned int max_mark;
+            TRACE(("|%p|%p|%s\n", context.pattern_ptr, context.text_ptr,
+              sre_op_info[op].name));
+
+            /* Is the entire matched portion zero-width? */
+            zero_width = context.text_ptr == context.text_start;
+
+            /*
+             Reject the match if it's zero-width and we aren't allowed to
+             return zero-width matches.
+             */
+            if (zero_width && state->reject_zero_width)
+                goto backtrack;
+
+            /*
+             Find the numbered mark which matched the furthest to the right.
+             */
+            end_ptr = NULL;
+            for (m = 1; m < state->numbered_mark_count; m += 2) {
+                if (context.marks[m - 1] != NULL &&
+                    context.marks[m] >= context.marks[m - 1]) {
+                    state->lastmark = m;
+                    if (end_ptr < context.marks[m]) {
+                        state->lastindex = 1 + m / 2;
+                        end_ptr = context.marks[m];
                     }
                 }
             }
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_GROUPREF_IGNORE:
-            /* match backreference */
-            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    RETURN_FAILURE;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p)
-                        RETURN_FAILURE;
-                    while (p < e) {
-                        if (ctx->ptr >= end ||
-                            state->lower(*ctx->ptr) != state->lower(*p))
-                            RETURN_FAILURE;
-                        p++; ctx->ptr++;
+
+            /* Find the named mark which matched the furthest to the right. */
+            end_ptr = NULL;
+            max_mark = state->numbered_mark_count + state->named_mark_count;
+            for (m = state->numbered_mark_count + 1; m < max_mark; m += 2) {
+                if (context.marks[m - 1] != NULL &&
+                    context.marks[m] >= context.marks[m - 1]) {
+                    if (end_ptr < context.marks[m]) {
+                        state->last_named_index = 1 + m / 2;
+                        end_ptr = context.marks[m];
                     }
                 }
             }
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_GROUPREF_EXISTS:
-            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    ctx->pattern += ctx->pattern[1];
+
+            /* Record where the match finished. */
+            state->ptr = context.text_ptr;
+            return SRE_CLEANUP(&context, 1);
+        }
+        default:
+            /* Unknown opcode. */
+            TRACE(("|%p|%p|UNKNOWN %u\n", context.pattern_ptr, context.text_ptr,
+              context.pattern_ptr[0]));
+            return SRE_CLEANUP(&context, SRE_ERROR_ILLEGAL);
+        }
+    }
+
+backtrack:
+    /* Handle the backtracking. */
+    TRACE(("|%p|%p|BACKTRACK ", context.pattern_ptr, context.text_ptr));
+
+    /* Fetch the backtracking info. */
+    context.backtrack_item = context.backtrack_chunk->items +
+      (context.backtrack_chunk->count - 1);
+
+    op = context.backtrack_item->op;
+    switch (op) {
+    case SRE_OP_ASSERT:
+        /* Assert subpattern (+ve look-ahead/look-behind). */
+        /* <ASSERT> <skip to end> ... <END_ASSERT> */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         The subpattern has failed, so the marks have already been restored.
+
+         Restore the context and continue backtracking.
+        */
+        context.text_start = context.backtrack_item->assert.text_start;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_ASSERT_NOT:
+        /* Assert not subpattern (-ve look-ahead/look-behind). */
+        /* <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT> */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         The subpattern has failed, so the marks have already been restored.
+
+         Restore the context and continue matching.
+        */
+        context.text_start = context.backtrack_item->assert.text_start;
+        context.text_ptr = context.backtrack_item->assert.text_ptr;
+        context.pattern_ptr = context.backtrack_item->assert.pattern_ptr;
+        SRE_DISCARD_BACKTRACK(&context);
+
+        context.pattern_ptr += 1 + context.pattern_ptr[1];
+        goto advance;
+    case SRE_OP_ATOMIC:
+        /* Atomic subpattern. */
+        /* <ATOMIC> ... <END_ATOMIC> */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         The subpattern has failed, so the marks have already been restored.
+
+         Continue backtracking.
+         */
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_BRANCH:
+    {
+        /* Alternation. */
+        /*
+         <BRANCH>
+         <skip to next>
+           ...
+           <JUMP> <skip to end>
+         <skip to next>
+           ...
+           <JUMP> <skip to end>
+         0
+         */
+        SRE_CODE* skip_ptr;
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Fetch the next branch. */
+        skip_ptr = context.backtrack_item->branch.pattern_ptr;
+
+        /* Restore the context for the next branch. */
+        context.text_ptr = context.backtrack_item->branch.text_ptr;
+
+        /* Look ahead in the branch to avoid unnecessary backtracking. */
+        while (! SRE_POSSIBLE_MATCH_AHEAD(&context, skip_ptr + 1)) {
+            /* This branch can't match, so advance to the next one. */
+            skip_ptr += skip_ptr[0];
+
+            /* Is there another branch? */
+            if (skip_ptr[0] == 0) {
+                /* No more branches, so backtrack. */
+                SRE_DISCARD_BACKTRACK(&context);
+                goto backtrack;
+            }
+        }
+
+        /* Try this branch. */
+        context.pattern_ptr = skip_ptr + 1;
+
+        /*
+         Is there another branch?
+
+         There's no need to save the context if this is the last branch.
+         */
+        skip_ptr += skip_ptr[0];
+        if (skip_ptr[0] == 0)
+            /* No more branches after this one. */
+            SRE_DISCARD_BACKTRACK(&context);
+        else
+            /* Save the next branch for backtracking. */
+            context.backtrack_item->branch.pattern_ptr = skip_ptr;
+        goto advance;
+    }
+    case SRE_OP_END_ATOMIC:
+        /* Atomic subpattern. */
+        /* <ATOMIC> <skip to end> ... <END_ATOMIC> */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         The tail has failed, so restore the marks and continue backtracking.
+         */
+        memmove(context.marks, context.backtrack_item->marks,
+          context.marks_size);
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_END_REPEAT_MAX:
+    case SRE_OP_END_REPEAT_MAX_REV:
+        /* End of greedy repeat. */
+        /*
+         <REPEAT_MAX> <skip to end> <min> <max>
+             ...
+         <END_REPEAT_MAX> <skip to start>
+         */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Restore the context. */
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+
+        /*
+         REPEAT_MAX prefers trying the body to trying the tail.
+
+         We've tried the body, so now we need to try the tail.
+
+         The tail expects the current loop to be this one's enclosing (outer)
+         loop.
+         */
+        current_loop = context.backtrack_item->repeat.loop->repeat.loop;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto advance;
+    case SRE_OP_END_REPEAT_MIN:
+    case SRE_OP_END_REPEAT_MIN_REV:
+        /* Lazy repeat. */
+        /*
+         <REPEAT_MIN> <skip to end> <min> <max>
+             ...
+         <END_REPEAT_MIN> <skip to start>
+         */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Restore the context. */
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+
+        /*
+         REPEAT_MIN prefers to trying the tail to trying the body.
+
+         We've tried the tail, so now we need to try the body.
+
+         The body expects the current loop to be this one's.
+         */
+        current_loop = context.backtrack_item->repeat.loop;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto advance;
+    case SRE_OP_END_REPEAT_POSS:
+    case SRE_OP_END_REPEAT_POSS_REV:
+        /* End of greedy repeat. */
+        /*
+         <REPEAT_POSS> <skip to end> <min> <max>
+             ...
+         <END_REPEAT_POSS> <skip to start>
+         */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Restore the context. */
+        context.text_ptr = context.backtrack_item->repeat.text_ptr;
+        context.pattern_ptr = context.backtrack_item->repeat.pattern_ptr;
+
+        /*
+         REPEAT_POSS prefers trying the body to trying the tail.
+
+         We've tried the body, so now we need to try the tail.
+
+         The tail expects the current loop to be this one's enclosing (outer)
+         loop.
+         */
+        current_loop = context.backtrack_item->repeat.loop->repeat.loop;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto advance;
+    case SRE_OP_FAILURE:
+        /* Failed to match. */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Permit a zero-width match next time. */
+        state->reject_zero_width = FALSE;
+        return SRE_CLEANUP(&context, 0);
+    case SRE_OP_MARK:
+    {
+        /* Text mark. */
+        /* <MARK> <numbered_index> <named_index> */
+        SRE_BACKTRACK_ITEM* item;
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         The number and name ids need to be restored in the opposite order to
+         which they were saved. This is because the name id might be the same
+         as the number id.
+         */
+        item = context.backtrack_item;
+        /* The name id. */
+        context.marks[item->mark.named_index] = item->mark.named_mark_ptr;
+        /* The name id. */
+        context.marks[item->mark.numbered_index] = item->mark.numbered_mark_ptr;
+
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    }
+    case SRE_OP_REPEAT_MAX:
+    case SRE_OP_REPEAT_MAX_REV:
+        /* Greedy repeat. */
+        /*
+         <REPEAT_MAX> <skip to end> <min> <max>
+             ...
+         <END_REPEAT_MAX> <skip to start>
+         */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         REPEAT_MAX failed.
+
+         Restore 'current' loop to the enclosing loop and backtrack.
+         */
+        current_loop = context.backtrack_item->repeat.loop;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_MIN:
+    case SRE_OP_REPEAT_MIN_REV:
+        /* Lazy repeat. */
+        /*
+         <REPEAT_MIN> <skip to end> <min> <max>
+             ...
+         <END_REPEAT_MIN> <skip to start>
+         */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         REPEAT_MIN failed.
+
+         Restore 'current' loop to the enclosing loop and backtrack.
+         */
+        current_loop = context.backtrack_item->repeat.loop;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    case SRE_OP_REPEAT_ONE_MAX:
+    case SRE_OP_REPEAT_ONE_MAX_REV:
+    {
+        /* Greedy repeat. */
+        /*
+         <REPEAT_ONE_MAX> <skip to end> <min> <max>
+             ...
+         */
+        BOOL forward = op == SRE_OP_REPEAT_ONE_MAX;
+        SRE_CODE* repeat_ptr;
+        SRE_CODE* tail;
+        SRE_BACKTRACK_ITEM* loop;
+        SRE_CHAR* start_ptr;
+        BOOL ok;
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Point to the repeat operator. */
+        repeat_ptr = context.backtrack_item->repeat.pattern_ptr;
+
+        /* Point to the tail of the pattern. */
+        tail = repeat_ptr + 1 + repeat_ptr[1];
+
+        /* The loop info is stored in the backtrack info. */
+        loop = context.backtrack_item;
+
+        /* Restore the context. */
+        start_ptr = loop->repeat.repeat_start;
+        if (forward)
+            context.text_ptr = start_ptr + loop->repeat.repeat_counter;
+        else
+            context.text_ptr = start_ptr - loop->repeat.repeat_counter;
+
+        /*
+         Release the last character we matched in the body and then unmatch down
+         to the minimum, until the tail could match.
+         */
+        if (forward) {
+            context.text_ptr--;
+            ok = SRE_UNMATCH_UNTIL_TAIL(&context, start_ptr +
+              loop->repeat.repeat_min, tail);
+        } else {
+            context.text_ptr++;
+            ok = SRE_UNMATCH_UNTIL_TAIL_REV(&context, start_ptr -
+              loop->repeat.repeat_min, tail);
+        }
+        if(!ok) {
+            /* Reached the minimum and the tail still couldn't match. */
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        }
+
+        /* How many times has the body matched? */
+        if (forward)
+            loop->repeat.repeat_counter = context.text_ptr - start_ptr;
+        else
+            loop->repeat.repeat_counter = start_ptr - context.text_ptr;
+
+        /*
+         Now match the tail.
+
+         The tail expects the 'current' loop to be the enclosing one.
+         */
+        current_loop = loop->repeat.loop;
+        context.pattern_ptr = tail;
+        goto advance;
+    }
+    case SRE_OP_REPEAT_ONE_MIN:
+    case SRE_OP_REPEAT_ONE_MIN_REV:
+    {
+        /* Lazy repeat. */
+        /*
+         <REPEAT_ONE_MIN> <skip to end> <min> <max>
+             ...
+         */
+        BOOL forward = op == SRE_OP_REPEAT_ONE_MIN;
+        SRE_CODE* repeat_ptr;
+        SRE_CODE* body;
+        SRE_CODE* tail;
+        SRE_BACKTRACK_ITEM* loop;
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* max_ptr;
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /* Point to the repeat operator. */
+        repeat_ptr = context.backtrack_item->repeat.pattern_ptr;
+
+        /* Point to the body of the repeat and the tail of the pattern. */
+        body = repeat_ptr + 4;
+        tail = repeat_ptr + 1 + repeat_ptr[1];
+
+        /* The loop info is stored in the backtrack info. */
+        loop = context.backtrack_item;
+
+        /* Restore the context. */
+        start_ptr = loop->repeat.repeat_start;
+        if (forward)
+            context.text_ptr = start_ptr + loop->repeat.repeat_counter;
+        else
+            context.text_ptr = start_ptr - loop->repeat.repeat_counter;
+
+        /* Match up to the maximum, until the tail could match. */
+        if (forward)
+            max_ptr = start_ptr + context.backtrack_item->repeat.repeat_max;
+        else
+            max_ptr = start_ptr - context.backtrack_item->repeat.repeat_max;
+        if (!SRE_MATCH_MANY_UNTIL_TAIL(&context, max_ptr, body, tail)) {
+            /* Reached the maximum and the tail still couldn't match. */
+            SRE_DISCARD_BACKTRACK(&context);
+            goto backtrack;
+        }
+
+        /* How many times has the body matched? */
+        if (forward)
+            loop->repeat.repeat_counter = context.text_ptr - start_ptr;
+        else
+            loop->repeat.repeat_counter = start_ptr - context.text_ptr;
+
+        /*
+         Now match the tail.
+
+         The tail expects the 'current' loop to be the enclosing one.
+         */
+        current_loop = loop->repeat.loop;
+        context.pattern_ptr = tail;
+        goto advance;
+    }
+    case SRE_OP_REPEAT_POSS:
+    case SRE_OP_REPEAT_POSS_REV:
+        /* Possessive repeat. */
+        /*
+         <REPEAT_POSS> <skip to end> <min> <max>
+             ...
+         <END_REPEAT_POSS> <skip to start>
+         */
+        TRACE(("%s\n", sre_op_info[op].name));
+
+        /*
+         REPEAT_POSS failed.
+
+         Restore the marks, restore 'current' loop to the enclosing loop and
+         backtrack.
+        */
+        memmove(context.marks, context.backtrack_item->marks,
+          context.marks_size);
+        current_loop = context.backtrack_item->repeat.loop;
+        SRE_DISCARD_BACKTRACK(&context);
+        goto backtrack;
+    default:
+        /* Unknown opcode. */
+        TRACE(("UNKNOWN %u\n", context.backtrack_item->op));
+        return SRE_CLEANUP(&context, SRE_ERROR_ILLEGAL);
+    }
+
+    return 0;
+}
+
+LOCAL(int) SRE_SEARCH(SRE_STATE* state) {
+    SRE_CODE* repeat_ptr;
+    SRE_CODE* tail;
+    SRE_CONTEXT context;
+    int status = 0;
+
+    /*
+     If a pattern starts with "c{m,n}" where "c" matches a single character and
+     the pattern fails, then advancing by only one character before retrying
+     could be inefficient (if m < n and it failed to match m...n times when it
+     would certainly fail to match m...n-1 times!).
+     */
+    repeat_ptr = state->pattern_code;
+    if (is_repeat_one(repeat_ptr[0]) && repeat_ptr[3] == SRE_UNLIMITED_REPEATS)
+        repeat_ptr += 4;
+    else
+        repeat_ptr = NULL;
+
+    /* Skip over any marks. */
+    tail = state->pattern_code;
+    while (tail[0] == SRE_OP_MARK)
+        tail += SRE_MARK_OP_SIZE;
+
+    /*
+     If the pattern is anchored at the start of the string or the start of the
+     search then try a match instead of searching.
+     */
+    if (tail[0] == SRE_OP_START_OF_STRING ||
+      tail[0] == SRE_OP_START_OF_SEARCH) {
+        /* Where should we start the match? */
+        state->ptr = state->reverse ? (SRE_CHAR *)state->end :
+          (SRE_CHAR *)state->start;
+        state->search_ptr = state->ptr;
+        return SRE_MATCH(state);
+    }
+
+    /*
+     Initialise the context.
+
+     It's more efficient to do it here instead of each time we try a match.
+     */
+    context.state = state;
+    context.text_beginning = (SRE_CHAR *)state->beginning;
+    context.text_start = (SRE_CHAR *)state->start;
+    context.text_end = (SRE_CHAR *)state->end;
+
+    /* Point to the final newline if it's the final character. */
+    context.final_linebreak = context.text_beginning < context.text_end &&
+      state->encoding->in_category(SRE_CAT_LineBreak, context.text_end[-1]) ?
+      context.text_end - 1 : NULL;
+
+    /*
+     state->reject_zero_width might initially be set to reject an initial zero-
+     width match.
+
+     If there's no match initially then state->reject_zero_width will be
+     cleared to allow a zero-wodth match subsequently.
+     */
+    if (state->reverse) {
+        /*
+         We want to search backwards.
+
+         Where should we start the match?
+         */
+        context.text_ptr = (SRE_CHAR *)state->end;
+
+        /* Try a match at each position until we're successful. */
+        while (context.text_ptr >= context.text_start) {
+            TRACE(("|%p|%p|SEARCH\n", state->pattern_code, context.text_ptr));
+
+            /* Could the pattern match here? */
+            if (SRE_POSSIBLE_MATCH_AHEAD(&context, tail)) {
+                /* Try a match. */
+                state->end = state->ptr = context.text_ptr;
+                status = SRE_MATCH(state);
+                if (status != 0)
                     break;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p) {
-                        ctx->pattern += ctx->pattern[1];
-                        break;
-                    }
-                }
-            }
-            ctx->pattern += 2;
-            break;
-
-        case SRE_OP_ASSERT:
-            /* assert subpattern */
-            /* <ASSERT> <skip> <back> <pattern> */
-            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr < state->beginning)
-                RETURN_FAILURE;
-            DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
-            RETURN_ON_FAILURE(ret);
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_ASSERT_NOT:
-            /* assert not subpattern */
-            /* <ASSERT_NOT> <skip> <back> <pattern> */
-            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr >= state->beginning) {
-                DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_FAILURE;
-                }
-            }
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_FAILURE:
-            /* immediate failure */
-            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
-            RETURN_FAILURE;
-
-        default:
-            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[-1]));
-            RETURN_ERROR(SRE_ERROR_ILLEGAL);
-        }
-    }
-
-exit:
-    ctx_pos = ctx->last_ctx_pos;
-    jump = ctx->jump;
-    DATA_POP_DISCARD(ctx);
-    if (ctx_pos == -1)
-        return ret;
-    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-
-    switch (jump) {
-        case JUMP_MAX_UNTIL_2:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_2;
-        case JUMP_MAX_UNTIL_3:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_3;
-        case JUMP_MIN_UNTIL_2:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_2;
-        case JUMP_MIN_UNTIL_3:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_3;
-        case JUMP_BRANCH:
-            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
-            goto jump_branch;
-        case JUMP_MAX_UNTIL_1:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_1;
-        case JUMP_MIN_UNTIL_1:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_1;
-        case JUMP_REPEAT:
-            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat;
-        case JUMP_REPEAT_ONE_1:
-            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat_one_1;
-        case JUMP_REPEAT_ONE_2:
-            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat_one_2;
-        case JUMP_MIN_REPEAT_ONE:
-            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
-            goto jump_min_repeat_one;
-        case JUMP_ASSERT:
-            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
-            goto jump_assert;
-        case JUMP_ASSERT_NOT:
-            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
-            goto jump_assert_not;
-        case JUMP_NONE:
-            TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
-            break;
-    }
-
-    return ret; /* should never get here */
-}
-
-LOCAL(Py_ssize_t)
-SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
-{
-    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t status = 0;
-    Py_ssize_t prefix_len = 0;
-    Py_ssize_t prefix_skip = 0;
-    SRE_CODE* prefix = NULL;
-    SRE_CODE* charset = NULL;
-    SRE_CODE* overlap = NULL;
-    int flags = 0;
-
-    if (pattern[0] == SRE_OP_INFO) {
-        /* optimization info block */
-        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
-
-        flags = pattern[2];
-
-        if (pattern[3] > 1) {
-            /* adjust end point (but make sure we leave at least one
-               character in there, so literal search will work) */
-            end -= pattern[3]-1;
-            if (end <= ptr)
-                end = ptr+1;
-        }
-
-        if (flags & SRE_INFO_PREFIX) {
-            /* pattern starts with a known prefix */
-            /* <length> <skip> <prefix data> <overlap data> */
-            prefix_len = pattern[5];
-            prefix_skip = pattern[6];
-            prefix = pattern + 7;
-            overlap = prefix + prefix_len - 1;
-        } else if (flags & SRE_INFO_CHARSET)
-            /* pattern starts with a character from a known set */
-            /* <charset> */
-            charset = pattern + 5;
-
-        pattern += 1 + pattern[1];
-    }
-
-    TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
-    TRACE(("charset = %p\n", charset));
-
-#if defined(USE_FAST_SEARCH)
-    if (prefix_len > 1) {
-        /* pattern starts with a known prefix.  use the overlap
-           table to skip forward as fast as we possibly can */
-        Py_ssize_t i = 0;
-        end = (SRE_CHAR *)state->end;
-        while (ptr < end) {
-            for (;;) {
-                if ((SRE_CODE) ptr[0] != prefix[i]) {
-                    if (!i)
-                        break;
+
+                /* Is there an initial repeat? */
+                if (repeat_ptr != NULL) {
+                    /*
+                     How many characters could the initial repeat match if
+                     unlimited?
+                     */
+                    SRE_CHAR * max_ptr = context.text_ptr - repeat_ptr[3];
+                    SRE_MATCH_MANY(&context, context.text_start,
+                      repeat_ptr);
+
+                    if (context.text_ptr >= max_ptr)
+                        /*
+                         The initial repeat could have consumed all those
+                         available, but it still failed to match, so discard
+                         all of those, advance by one, and try again.
+                         */
+                        context.text_ptr--;
                     else
-                        i = overlap[i];
-                } else {
-                    if (++i == prefix_len) {
-                        /* found a potential match */
-                        TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
-                        state->start = ptr + 1 - prefix_len;
-                        state->ptr = ptr + 1 - prefix_len + prefix_skip;
-                        if (flags & SRE_INFO_LITERAL)
-                            return 1; /* we got all of it */
-                        status = SRE_MATCH(state, pattern + 2*prefix_skip);
-                        if (status != 0)
-                            return status;
-                        /* close but no cigar -- try again */
-                        i = overlap[i];
-                    }
+                        /*
+                         The initial repeat couldn't have consumed all those
+                         available, so discard until it /could/ consume all
+                         those available and let try again.
+                         */
+                        context.text_ptr += repeat_ptr[3];
+                } else
+                    /* Advance and try again. */
+                    context.text_ptr--;
+            } else
+                /* Advance and try again. */
+                context.text_ptr--;
+
+            state->reject_zero_width = FALSE;
+        }
+    } else {
+        /*
+         We want to search forwards.
+
+         Where should we start the match?
+         */
+        context.text_ptr = (SRE_CHAR *)state->start;
+
+        /* Try a match at each position until we're successful. */
+        while (context.text_ptr <= context.text_end) {
+            TRACE(("|%p|%p|SEARCH\n", state->pattern_code, context.text_ptr));
+
+            /* Could the pattern match here? */
+            if (SRE_POSSIBLE_MATCH_AHEAD(&context, tail)) {
+                /* Try a match. */
+                state->start = state->ptr = context.text_ptr;
+                status = SRE_MATCH(state);
+                if (status != 0)
                     break;
-                }
-            }
-            ptr++;
-        }
-        return 0;
-    }
-#endif
-
-    if (pattern[0] == SRE_OP_LITERAL) {
-        /* pattern starts with a literal character.  this is used
-           for short prefixes, and if fast search is disabled */
-        SRE_CODE chr = pattern[1];
-        end = (SRE_CHAR *)state->end;
-        for (;;) {
-            while (ptr < end && (SRE_CODE) ptr[0] != chr)
-                ptr++;
-            if (ptr >= end)
-                return 0;
-            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
-            state->start = ptr;
-            state->ptr = ++ptr;
-            if (flags & SRE_INFO_LITERAL)
-                return 1; /* we got all of it */
-            status = SRE_MATCH(state, pattern + 2);
-            if (status != 0)
-                break;
-        }
-    } else if (charset) {
-        /* pattern starts with a character from a known set */
-        end = (SRE_CHAR *)state->end;
-        for (;;) {
-            while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
-                ptr++;
-            if (ptr >= end)
-                return 0;
-            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
-            state->start = ptr;
-            state->ptr = ptr;
-            status = SRE_MATCH(state, pattern);
-            if (status != 0)
-                break;
-            ptr++;
-        }
-    } else
-        /* general case */
-        while (ptr <= end) {
-            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
-            state->start = state->ptr = ptr++;
-            status = SRE_MATCH(state, pattern);
-            if (status != 0)
-                break;
-        }
+
+                /* Is there an initial repeat? */
+                if (repeat_ptr != NULL) {
+                    /*
+                     How many characters could the initial repeat match if
+                     unlimited?
+                     */
+                    SRE_CHAR * max_ptr = context.text_ptr + repeat_ptr[3];
+                    SRE_MATCH_MANY(&context, context.text_end,
+                      repeat_ptr);
+
+                    if (context.text_ptr <= max_ptr)
+                        /*
+                         The initial repeat could have consumed all those
+                         available, but it still failed to match, so discard
+                         all of those, advance by one, and try again.
+                         */
+                        context.text_ptr++;
+                    else
+                        /*
+                         The initial repeat couldn't have consumed all those
+                         available, so discard until it /could/ consume all
+                         those available and let try again.
+                         */
+                        context.text_ptr -= repeat_ptr[3];
+                } else
+                    /* Advance and try again. */
+                    context.text_ptr++;
+            } else
+                /* Advance and try again. */
+                context.text_ptr++;
+
+            state->reject_zero_width = FALSE;
+        }
+    }
 
     return status;
 }
 
-LOCAL(int)
-SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
-{
+LOCAL(BOOL) SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len) {
     /* check if given string is a literal template (i.e. no escapes) */
     while (len-- > 0)
         if (*ptr++ == '\\')
             return 0;
-    return 1;
+    return TRUE;
 }
 
 #if !defined(SRE_RECURSIVE)
@@ -1630,49 +5109,70 @@
 /* factories and destructors */
 
 /* see sre.h for object declarations */
-static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
-static PyObject*pattern_scanner(PatternObject*, PyObject*);
-
-static PyObject *
-sre_codesize(PyObject* self, PyObject *unused)
-{
+static PyObject* pattern_new_match(PatternObject*, SRE_STATE*, int);
+static PyObject* pattern_scanner(PatternObject*, PyObject*);
+
+static PyObject* sre_codesize(PyObject* self, PyObject *unused) {
     return Py_BuildValue("l", sizeof(SRE_CODE));
 }
 
-static PyObject *
-sre_getlower(PyObject* self, PyObject* args)
-{
+/* Exported function to convert a character to lowercase. */
+static PyObject* sre_getlower(PyObject* self, PyObject* args) {
     int character, flags;
     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
         return NULL;
     if (flags & SRE_FLAG_LOCALE)
-        return Py_BuildValue("i", sre_lower_locale(character));
+        return Py_BuildValue("i", loc_lower(character));
     if (flags & SRE_FLAG_UNICODE)
 #if defined(HAVE_UNICODE)
-        return Py_BuildValue("i", sre_lower_unicode(character));
+        return Py_BuildValue("i", uni_lower(character));
 #else
-        return Py_BuildValue("i", sre_lower_locale(character));
+        return Py_BuildValue("i", loc_lower(character));
 #endif
-    return Py_BuildValue("i", sre_lower(character));
-}
-
-LOCAL(void)
-state_reset(SRE_STATE* state)
-{
-    /* FIXME: dynamic! */
-    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
-
+    return Py_BuildValue("i", ascii_lower(character));
+}
+
+/* Exported function to convert a character to uppercase. */
+static PyObject* sre_getupper(PyObject* self, PyObject* args) {
+    int character, flags;
+    if (!PyArg_ParseTuple(args, "ii", &character, &flags))
+        return NULL;
+    if (flags & SRE_FLAG_LOCALE)
+        return Py_BuildValue("i", loc_upper(character));
+    if (flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
+        return Py_BuildValue("i", uni_upper(character));
+#else
+        return Py_BuildValue("i", loc_upper(character));
+#endif
+    return Py_BuildValue("i", ascii_upper(character));
+}
+
+/* Exported function to convert a character to titlecase. */
+static PyObject* sre_gettitle(PyObject* self, PyObject* args) {
+    int character, flags;
+    if (!PyArg_ParseTuple(args, "ii", &character, &flags))
+        return NULL;
+    if (flags & SRE_FLAG_LOCALE)
+        return Py_BuildValue("i", loc_upper(character));
+    if (flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
+        return Py_BuildValue("i", uni_title(character));
+#else
+        return Py_BuildValue("i", loc_upper(character));
+#endif
+    return Py_BuildValue("i", ascii_upper(character));
+}
+
+/* Resets the state. */
+LOCAL(void) state_reset(SRE_STATE* state) {
     state->lastmark = -1;
     state->lastindex = -1;
-
-    state->repeat = NULL;
-
-    data_stack_dealloc(state);
-}
-
-static void*
-getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
-{
+    state->last_named_index = -1;
+}
+
+static void* getstring(PyObject* string, Py_ssize_t* p_length,
+  int* p_charsize) {
     /* given a python object, return a data pointer, a length (in
        characters), and a character size.  return NULL if the object
        is not a string (or not compatible) */
@@ -1694,7 +5194,7 @@
 #endif
 
     /* get pointer to string buffer */
-    buffer = Py_TYPE(string)->tp_as_buffer;
+    buffer = string->ob_type->tp_as_buffer;
     if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
         buffer->bf_getsegcount(string, NULL) != 1) {
         PyErr_SetString(PyExc_TypeError, "expected string or buffer");
@@ -1736,10 +5236,9 @@
     return ptr;
 }
 
-LOCAL(PyObject*)
-state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
-           Py_ssize_t start, Py_ssize_t end)
-{
+/* Initialises the state. */
+LOCAL(PyObject*) state_init(SRE_STATE* state, PatternObject* pattern,
+  PyObject* string, Py_ssize_t start, Py_ssize_t end, SRE_CODE* pattern_code) {
     /* prepare state object */
 
     Py_ssize_t length;
@@ -1748,12 +5247,34 @@
 
     memset(state, 0, sizeof(SRE_STATE));
 
+    /* Store the pattern. */
+    state->pattern_code = pattern_code;
+
+    /* Create the first chunk of backtracking items. */
+    state->backtrack_chunk =
+      (SRE_BACKTRACK_CHUNK*)PyMem_MALLOC(sizeof(SRE_BACKTRACK_CHUNK));
+    if (state->backtrack_chunk == NULL)
+        goto error;
+
+    state->backtrack_chunk->previous = NULL;
+    state->backtrack_chunk->count = 0;
+
+    /*
+     Calculate how many numbered and named marks there are.
+
+     All capture groups are numbered. Some also have a name, but there can be
+     multiple groups with the same name, so there are name ids too.
+     */
+    state->numbered_mark_count = 2 * pattern->groups;
+    state->named_mark_count = 2 * (pattern->internal_groups - pattern->groups);
+
     state->lastmark = -1;
     state->lastindex = -1;
+    state->last_named_index = -1;
 
     ptr = getstring(string, &length, &charsize);
     if (!ptr)
-        return NULL;
+        goto error;
 
     /* adjust boundaries */
     if (start < 0)
@@ -1769,48 +5290,67 @@
     state->charsize = charsize;
 
     state->beginning = ptr;
-
     state->start = (void*) ((char*) ptr + start * state->charsize);
     state->end = (void*) ((char*) ptr + end * state->charsize);
+
+    /* Whether to reject zero-width matches. */
+    state->reject_zero_width = FALSE;
 
     Py_INCREF(string);
     state->string = string;
     state->pos = start;
     state->endpos = end;
 
-    if (pattern->flags & SRE_FLAG_LOCALE)
-        state->lower = sre_lower_locale;
-    else if (pattern->flags & SRE_FLAG_UNICODE)
-#if defined(HAVE_UNICODE)
-        state->lower = sre_lower_unicode;
-#else
-        state->lower = sre_lower_locale;
-#endif
+    /*
+     What is the encoding of the text?
+
+     The term "encoding" might not be correct: here it means whether the text
+     is ASCII, locale-specific 8-bit, or Unicode.
+     */
+    if ((pattern->flags & SRE_FLAG_UNICODE) || state->charsize > 1)
+        /* We'll assume that non-8-bit text is Unicode. */
+        state->encoding = &unicode_encoding;
+    else if (pattern->flags & SRE_FLAG_LOCALE)
+        /* Locale-specific 8-bit. */
+        state->encoding = &locale_encoding;
     else
-        state->lower = sre_lower;
+        /* ASCII. */
+        state->encoding = &ascii_encoding;
+
+    /* Whether to search backwards. */
+    state->reverse = pattern->flags & SRE_FLAG_REVERSE;
 
     return string;
-}
-
-LOCAL(void)
-state_fini(SRE_STATE* state)
-{
+
+error:
+    PyMem_FREE(state->backtrack_chunk);
+    return NULL;
+}
+
+LOCAL(void) state_fini(SRE_STATE* state) {
+    /*
+     There are actually 2 versions of backtrack_chunk, 8-bit and Unicode. This
+     shouldn't be a problem because they have the same format and contain
+     pointers and an int, which are always the same size.
+     */
+    PyMem_FREE(state->backtrack_chunk);
+    state->backtrack_chunk = NULL;
+
     Py_XDECREF(state->string);
-    data_stack_dealloc(state);
 }
 
 /* calculate offset from start of string */
-#define STATE_OFFSET(state, member)\
+#define STATE_OFFSET(state, member) \
     (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
 
-LOCAL(PyObject*)
-state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
-{
+LOCAL(PyObject*) state_getslice(SRE_STATE* state, Py_ssize_t index,
+  PyObject* string, int empty) {
     Py_ssize_t i, j;
 
     index = (index - 1) * 2;
 
-    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
+    if (string == Py_None || index >= state->lastmark || !state->mark[index] ||
+      !state->mark[index + 1]) {
         if (empty)
             /* want empty string */
             i = j = 0;
@@ -1820,15 +5360,13 @@
         }
     } else {
         i = STATE_OFFSET(state, state->mark[index]);
-        j = STATE_OFFSET(state, state->mark[index+1]);
+        j = STATE_OFFSET(state, state->mark[index + 1]);
     }
 
     return PySequence_GetSlice(string, i, j);
 }
 
-static void
-pattern_error(int status)
-{
+static void pattern_error(int status) {
     switch (status) {
     case SRE_ERROR_RECURSION_LIMIT:
         PyErr_SetString(
@@ -1851,23 +5389,20 @@
     }
 }
 
-static void
-pattern_dealloc(PatternObject* self)
-{
+static void pattern_dealloc(PatternObject* self) {
     if (self->weakreflist != NULL)
-        PyObject_ClearWeakRefs((PyObject *) self);
+        PyObject_ClearWeakRefs((PyObject*)self);
     Py_XDECREF(self->pattern);
     Py_XDECREF(self->groupindex);
     Py_XDECREF(self->indexgroup);
     PyObject_DEL(self);
 }
 
-static PyObject*
-pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_match(PatternObject* self, PyObject* args,
+  PyObject* kw) {
     SRE_STATE state;
     int status;
-
+    SRE_CODE* pattern_code;
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -1876,23 +5411,26 @@
                                      &string, &start, &end))
         return NULL;
 
-    string = state_init(&state, self, string, start, end);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, start, end, pattern_code);
     if (!string)
         return NULL;
 
-    state.ptr = state.start;
-
-    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
+    state.ptr = state.reverse ? state.end : state.start;
+    state.search_ptr = state.ptr;
+
+    TRACE(("|%p|%p|MATCH\n", pattern_code, state.ptr));
 
     if (state.charsize == 1) {
-        status = sre_match(&state, PatternObject_GetCode(self));
+        status = sre_bmatch(&state);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_umatch(&state, PatternObject_GetCode(self));
+        status = sre_umatch(&state);
 #endif
     }
 
-    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|END\n", pattern_code, state.ptr));
     if (PyErr_Occurred())
         return NULL;
 
@@ -1901,12 +5439,11 @@
     return pattern_new_match(self, &state, status);
 }
 
-static PyObject*
-pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_search(PatternObject* self, PyObject* args,
+  PyObject* kw) {
     SRE_STATE state;
     int status;
-
+    SRE_CODE* pattern_code;
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -1915,21 +5452,23 @@
                                      &string, &start, &end))
         return NULL;
 
-    string = state_init(&state, self, string, start, end);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, start, end, pattern_code);
     if (!string)
         return NULL;
 
-    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|SEARCH\n", pattern_code, state.ptr));
 
     if (state.charsize == 1) {
-        status = sre_search(&state, PatternObject_GetCode(self));
+        status = sre_bsearch(&state);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_usearch(&state, PatternObject_GetCode(self));
+        status = sre_usearch(&state);
 #endif
     }
 
-    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|END\n", pattern_code, state.ptr));
 
     state_fini(&state);
 
@@ -1939,9 +5478,7 @@
     return pattern_new_match(self, &state, status);
 }
 
-static PyObject*
-call(char* module, char* function, PyObject* args)
-{
+static PyObject* call(char* module, char* function, PyObject* args) {
     PyObject* name;
     PyObject* mod;
     PyObject* func;
@@ -1967,9 +5504,7 @@
 }
 
 #ifdef USE_BUILTIN_COPY
-static int
-deepcopy(PyObject** object, PyObject* memo)
-{
+static int deepcopy(PyObject** object, PyObject* memo) {
     PyObject* copy;
 
     copy = call(
@@ -1986,9 +5521,7 @@
 }
 #endif
 
-static PyObject*
-join_list(PyObject* list, PyObject* string)
-{
+static PyObject* join_list(PyObject* list, PyObject* string) {
     /* join list elements */
 
     PyObject* joiner;
@@ -2034,14 +5567,13 @@
     return result;
 }
 
-static PyObject*
-pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_findall(PatternObject* self, PyObject* args,
+  PyObject* kw) {
     SRE_STATE state;
     PyObject* list;
     int status;
     Py_ssize_t i, b, e;
-
+    SRE_CODE* pattern_code;
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
@@ -2050,7 +5582,9 @@
                                      &string, &start, &end))
         return NULL;
 
-    string = state_init(&state, self, string, start, end);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, start, end, pattern_code);
     if (!string)
         return NULL;
 
@@ -2061,23 +5595,22 @@
     }
 
     while (state.start <= state.end) {
-
         PyObject* item;
 
         state_reset(&state);
 
-        state.ptr = state.start;
+        state.ptr = state.reverse ? state.end : state.start;
 
         if (state.charsize == 1) {
-            status = sre_search(&state, PatternObject_GetCode(self));
+            status = sre_bsearch(&state);
         } else {
 #if defined(HAVE_UNICODE)
-            status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state);
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2089,8 +5622,13 @@
         /* don't bother to build a match object */
         switch (self->groups) {
         case 0:
-            b = STATE_OFFSET(&state, state.start);
-            e = STATE_OFFSET(&state, state.ptr);
+            if (state.reverse) {
+                b = STATE_OFFSET(&state, state.ptr);
+                e = STATE_OFFSET(&state, state.end);
+            } else {
+                b = STATE_OFFSET(&state, state.start);
+                e = STATE_OFFSET(&state, state.ptr);
+            }
             item = PySequence_GetSlice(string, b, e);
             if (!item)
                 goto error;
@@ -2105,7 +5643,7 @@
             if (!item)
                 goto error;
             for (i = 0; i < self->groups; i++) {
-                PyObject* o = state_getslice(&state, i+1, string, 1);
+                PyObject* o = state_getslice(&state, i + 1, string, 1);
                 if (!o) {
                     Py_DECREF(item);
                     goto error;
@@ -2120,11 +5658,15 @@
         if (status < 0)
             goto error;
 
-        if (state.ptr == state.start)
-            state.start = (void*) ((char*) state.ptr + state.charsize);
+        /*
+         Continue the search from where we left off. Forbid another zero-width
+         match at the same start position.
+         */
+        if (state.reverse)
+            state.end = state.ptr;
         else
             state.start = state.ptr;
-
+        state.reject_zero_width = TRUE;
     }
 
     state_fini(&state);
@@ -2138,9 +5680,7 @@
 }
 
 #if PY_VERSION_HEX >= 0x02020000
-static PyObject*
-pattern_finditer(PatternObject* pattern, PyObject* args)
-{
+static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args) {
     PyObject* scanner;
     PyObject* search;
     PyObject* iterator;
@@ -2161,16 +5701,17 @@
 }
 #endif
 
-static PyObject*
-pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_split(PatternObject* self, PyObject* args,
+  PyObject* kw) {
     SRE_STATE state;
     PyObject* list;
     PyObject* item;
     int status;
+    SRE_CODE* pattern_code;
     Py_ssize_t n;
     Py_ssize_t i;
     void* last;
+    BOOL zero_width;
 
     PyObject* string;
     Py_ssize_t maxsplit = 0;
@@ -2179,7 +5720,9 @@
                                      &string, &maxsplit))
         return NULL;
 
-    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX, pattern_code);
     if (!string)
         return NULL;
 
@@ -2189,46 +5732,69 @@
         return NULL;
     }
 
+    zero_width = (self->flags & SRE_FLAG_ZEROWIDTH) != 0;
+
     n = 0;
-    last = state.start;
+
+    /* Where did the last match end? */
+    last = state.reverse ? state.end : state.start;
 
     while (!maxsplit || n < maxsplit) {
-
         state_reset(&state);
 
-        state.ptr = state.start;
+        /* Where should the search start? */
+        state.ptr = state.reverse ? state.end : state.start;
 
         if (state.charsize == 1) {
-            status = sre_search(&state, PatternObject_GetCode(self));
+            status = sre_bsearch(&state);
         } else {
 #if defined(HAVE_UNICODE)
-            status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state);
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
+            /* The search failed. */
             if (status == 0)
                 break;
             pattern_error(status);
             goto error;
         }
 
-        if (state.start == state.ptr) {
-            if (last == state.end)
-                break;
-            /* skip one character */
-            state.start = (void*) ((char*) state.ptr + state.charsize);
-            continue;
-        }
-
-        /* get segment before this match */
-        item = PySequence_GetSlice(
-            string, STATE_OFFSET(&state, last),
-            STATE_OFFSET(&state, state.start)
-            );
+        if (state.reverse) {
+            /* Zero-width match? */
+            if (state.ptr == state.end) {
+                /* Are we permitted to split on zero-width? */
+                if (!zero_width) {
+                    state.end = (void*) ((char*) state.ptr - state.charsize);
+                    continue;
+                }
+            }
+
+            /* get segment before this match */
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, state.end),
+                STATE_OFFSET(&state, last)
+                );
+        } else {
+            /* Zero-width match? */
+            if (state.ptr == state.start) {
+                /* Are we permitted to split on zero-width? */
+                if (!zero_width) {
+                    state.start = (void*) ((char*) state.ptr + state.charsize);
+                    continue;
+                }
+            }
+
+            /* get segment before this match */
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, last),
+                STATE_OFFSET(&state, state.start)
+                );
+        }
         if (!item)
             goto error;
         status = PyList_Append(list, item);
@@ -2238,7 +5804,7 @@
 
         /* add groups (if any) */
         for (i = 0; i < self->groups; i++) {
-            item = state_getslice(&state, i+1, string, 0);
+            item = state_getslice(&state, i + 1, string, 0);
             if (!item)
                 goto error;
             status = PyList_Append(list, item);
@@ -2249,14 +5815,52 @@
 
         n = n + 1;
 
-        last = state.start = state.ptr;
-
+        /* Remember where the search finished. */
+        last = state.ptr;
+
+        /*
+         Continue the search from where we left off.
+
+         Legacy code won't split on a zero-width match; it'll simply ignore the
+         match, advance, and try again.
+
+         Newer code with the ZEROWIDTH flag set can split on a zero-width match;
+         when it tries the next match it'll forbid another zero-width match at
+         the same start position.
+         */
+        if (state.reverse) {
+            if (zero_width) {
+                state.end = state.ptr;
+                state.reject_zero_width = TRUE;
+            } else {
+                if (state.ptr == state.end)
+                    state.end = (void*) ((char*) state.ptr - state.charsize);
+                else
+                    state.end = state.ptr;
+            }
+        } else {
+            if (zero_width) {
+                state.start = state.ptr;
+                state.reject_zero_width = TRUE;
+            } else {
+                if(state.ptr == state.start)
+                    state.start = (void*) ((char*) state.ptr + state.charsize);
+                else
+                    state.start = state.ptr;
+            }
+        }
     }
 
     /* get segment following last match (even if empty) */
-    item = PySequence_GetSlice(
-        string, STATE_OFFSET(&state, last), state.endpos
-        );
+    if (state.reverse)
+        item = PySequence_GetSlice(
+            string, state.pos, STATE_OFFSET(&state, last)
+            );
+    else
+        item = PySequence_GetSlice(
+            string, STATE_OFFSET(&state, last), state.endpos
+            );
+
     if (!item)
         goto error;
     status = PyList_Append(list, item);
@@ -2276,8 +5880,7 @@
 
 static PyObject*
 pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
-             Py_ssize_t count, Py_ssize_t subn)
-{
+  Py_ssize_t count, Py_ssize_t subn) {
     SRE_STATE state;
     PyObject* list;
     PyObject* item;
@@ -2287,9 +5890,11 @@
     void* ptr;
     int status;
     Py_ssize_t n;
-    Py_ssize_t i, b, e;
+    Py_ssize_t b;
     int bint;
     int filter_is_callable;
+    SRE_CODE* pattern_code;
+    void* last;
 
     if (PyCallable_Check(ptemplate)) {
         /* sub/subn takes either a function or a template */
@@ -2303,10 +5908,10 @@
         b = bint;
         if (ptr) {
             if (b == 1) {
-		    literal = sre_literal_template((unsigned char *)ptr, n);
+                literal = sre_bliteral_template((unsigned char*)ptr, n);
             } else {
 #if defined(HAVE_UNICODE)
-		    literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
+                literal = sre_uliteral_template((Py_UNICODE*)ptr, n);
 #endif
             }
         } else {
@@ -2329,7 +5934,9 @@
         }
     }
 
-    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
+    pattern_code = PatternObject_GetCode(self);
+
+    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX, pattern_code);
     if (!string) {
         Py_DECREF(filter);
         return NULL;
@@ -2342,48 +5949,54 @@
         return NULL;
     }
 
-    n = i = 0;
+    n = 0;
+
+    /* Where did the last match end? */
+    last = state.reverse ? state.end : state.start;
 
     while (!count || n < count) {
-
         state_reset(&state);
 
-        state.ptr = state.start;
+        /* Where should the search start? */
+        state.ptr = state.reverse ? state.end : state.start;
 
         if (state.charsize == 1) {
-            status = sre_search(&state, PatternObject_GetCode(self));
+            status = sre_bsearch(&state);
         } else {
 #if defined(HAVE_UNICODE)
-            status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state);
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
+            /* The search failed. */
             if (status == 0)
                 break;
             pattern_error(status);
             goto error;
         }
 
-        b = STATE_OFFSET(&state, state.start);
-        e = STATE_OFFSET(&state, state.ptr);
-
-        if (i < b) {
-            /* get segment before this match */
-            item = PySequence_GetSlice(string, i, b);
-            if (!item)
-                goto error;
-            status = PyList_Append(list, item);
-            Py_DECREF(item);
-            if (status < 0)
-                goto error;
-
-        } else if (i == b && i == e && n > 0)
-            /* ignore empty match on latest position */
-            goto next;
+        /* get segment before this match */
+        if (state.reverse) {
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, state.end),
+                STATE_OFFSET(&state, last)
+                );
+        } else {
+            item = PySequence_GetSlice(
+                string, STATE_OFFSET(&state, last),
+                STATE_OFFSET(&state, state.start)
+                );
+        }
+        if (!item)
+            goto error;
+        status = PyList_Append(list, item);
+        Py_DECREF(item);
+        if (status < 0)
+            goto error;
 
         if (filter_is_callable) {
             /* pass match object through filter */
@@ -2414,28 +6027,35 @@
                 goto error;
         }
 
-        i = e;
         n = n + 1;
 
-next:
-        /* move on */
-        if (state.ptr == state.start)
-            state.start = (void*) ((char*) state.ptr + state.charsize);
+        /* Remember where the search finished. */
+        last = state.ptr;
+
+        /*
+         Continue the search from where we left off. Forbid another zero-width
+         match at the same start position.
+         */
+        if (state.reverse)
+            state.end = state.ptr;
         else
             state.start = state.ptr;
-
+        state.reject_zero_width = TRUE;
     }
 
     /* get segment following last match */
-    if (i < state.endpos) {
-        item = PySequence_GetSlice(string, i, state.endpos);
-        if (!item)
-            goto error;
-        status = PyList_Append(list, item);
-        Py_DECREF(item);
-        if (status < 0)
-            goto error;
-    }
+    if (state.reverse)
+        item = PySequence_GetSlice(string, state.pos, STATE_OFFSET(&state,
+          last));
+    else
+        item = PySequence_GetSlice(string, STATE_OFFSET(&state, last),
+          state.endpos);
+    if (!item)
+        goto error;
+    status = PyList_Append(list, item);
+    Py_DECREF(item);
+    if (status < 0)
+        goto error;
 
     state_fini(&state);
 
@@ -2460,9 +6080,8 @@
 
 }
 
-static PyObject*
-pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_sub(PatternObject* self, PyObject* args,
+  PyObject* kw) {
     PyObject* ptemplate;
     PyObject* string;
     Py_ssize_t count = 0;
@@ -2474,9 +6093,8 @@
     return pattern_subx(self, ptemplate, string, count, 0);
 }
 
-static PyObject*
-pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* pattern_subn(PatternObject* self, PyObject* args,
+  PyObject* kw) {
     PyObject* ptemplate;
     PyObject* string;
     Py_ssize_t count = 0;
@@ -2488,9 +6106,7 @@
     return pattern_subx(self, ptemplate, string, count, 1);
 }
 
-static PyObject*
-pattern_copy(PatternObject* self, PyObject *unused)
-{
+static PyObject* pattern_copy(PatternObject* self, PyObject *unused) {
 #ifdef USE_BUILTIN_COPY
     PatternObject* copy;
     int offset;
@@ -2516,9 +6132,7 @@
 #endif
 }
 
-static PyObject*
-pattern_deepcopy(PatternObject* self, PyObject* memo)
-{
+static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) {
 #ifdef USE_BUILTIN_COPY
     PatternObject* copy;
 
@@ -2577,20 +6191,20 @@
 
 static PyMethodDef pattern_methods[] = {
     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
-	pattern_match_doc},
+     pattern_match_doc},
     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
-	pattern_search_doc},
+     pattern_search_doc},
     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
-	pattern_sub_doc},
+     pattern_sub_doc},
     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
-	pattern_subn_doc},
+     pattern_subn_doc},
     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
-	pattern_split_doc},
+     pattern_split_doc},
     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
-	pattern_findall_doc},
+     pattern_findall_doc},
 #if PY_VERSION_HEX >= 0x02020000
     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
-	pattern_finditer_doc},
+     pattern_finditer_doc},
 #endif
     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
@@ -2598,9 +6212,7 @@
     {NULL, NULL}
 };
 
-static PyObject*
-pattern_getattr(PatternObject* self, char* name)
-{
+static PyObject* pattern_getattr(PatternObject* self, char* name) {
     PyObject* res;
 
     res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
@@ -2631,38 +6243,49 @@
     return NULL;
 }
 
-statichere PyTypeObject Pattern_Type = {
+static int _validate(PatternObject *self); /* Forward reference. */
+
+static Py_ssize_t match_length(MatchObject* self)
+{
+    return self->groups;
+}
+
+static PyObject* match_subscript(MatchObject* self, PyObject* group);
+
+static PyMappingMethods match_as_mapping = {
+    (lenfunc)match_length, /*mp_length*/
+    (binaryfunc)match_subscript, /*mp_subscript*/
+    0, /*mp_ass_subscript*/
+};
+
+static PyTypeObject Pattern_Type = {
     PyObject_HEAD_INIT(NULL)
     0, "_" SRE_MODULE ".SRE_Pattern",
     sizeof(PatternObject), sizeof(SRE_CODE),
     (destructor)pattern_dealloc, /*tp_dealloc*/
-    0, /*tp_print*/
+    0,                  /*tp_print*/
     (getattrfunc)pattern_getattr, /*tp_getattr*/
-    0,					/* tp_setattr */
-    0,					/* tp_compare */
-    0,					/* tp_repr */
-    0,					/* tp_as_number */
-    0,					/* tp_as_sequence */
-    0,					/* tp_as_mapping */
-    0,					/* tp_hash */
-    0,					/* tp_call */
-    0,					/* tp_str */
-    0,					/* tp_getattro */
-    0,					/* tp_setattro */
-    0,					/* tp_as_buffer */
-    Py_TPFLAGS_HAVE_WEAKREFS,		/* tp_flags */
-    pattern_doc,			/* tp_doc */
-    0,					/* tp_traverse */
-    0,					/* tp_clear */
-    0,					/* tp_richcompare */
-    offsetof(PatternObject, weakreflist),	/* tp_weaklistoffset */
+    0,                  /* tp_setattr */
+    0,                  /* tp_compare */
+    0,                  /* tp_repr */
+    0,                  /* tp_as_number */
+    0,                  /* tp_as_sequence */
+    0,                  /* tp_as_mapping */
+    0,                  /* tp_hash */
+    0,                  /* tp_call */
+    0,                  /* tp_str */
+    0,                  /* tp_getattro */
+    0,                  /* tp_setattro */
+    0,                  /* tp_as_buffer */
+    Py_TPFLAGS_HAVE_WEAKREFS,       /* tp_flags */
+    pattern_doc,        /* tp_doc */
+    0,                  /* tp_traverse */
+    0,                  /* tp_clear */
+    0,                  /* tp_richcompare */
+    offsetof(PatternObject, weakreflist),    /* tp_weaklistoffset */
 };
 
-static int _validate(PatternObject *self); /* Forward */
-
-static PyObject *
-_compile(PyObject* self_, PyObject* args)
-{
+static PyObject* _compile(PyObject* self_, PyObject* args) {
     /* "compile" pattern descriptor to pattern object */
 
     PatternObject* self;
@@ -2763,454 +6386,652 @@
 #define VTRACE(v)
 #endif
 
-/* Report failure */
-#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
-
-/* Extract opcode, argument, or skip count from code array */
-#define GET_OP                                          \
-    do {                                                \
-        VTRACE(("%p: ", code));                         \
-        if (code >= end) FAIL;                          \
-        op = *code++;                                   \
-        VTRACE(("%lu (op)\n", (unsigned long)op));      \
-    } while (0)
-#define GET_ARG                                         \
-    do {                                                \
-        VTRACE(("%p= ", code));                         \
-        if (code >= end) FAIL;                          \
-        arg = *code++;                                  \
-        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
-    } while (0)
-#define GET_SKIP_ADJ(adj)                               \
-    do {                                                \
-        VTRACE(("%p= ", code));                         \
-        if (code >= end) FAIL;                          \
-        skip = *code;                                   \
-        VTRACE(("%lu (skip to %p)\n",                   \
-               (unsigned long)skip, code+skip));        \
-        if (code+skip-adj < code || code+skip-adj > end)\
-            FAIL;                                       \
-        code++;                                         \
-    } while (0)
-#define GET_SKIP GET_SKIP_ADJ(0)
-
-static int
-_validate_charset(SRE_CODE *code, SRE_CODE *end)
-{
-    /* Some variables are manipulated by the macros above */
-    SRE_CODE op;
-    SRE_CODE arg;
-    SRE_CODE offset;
-    int i;
-
-    while (code < end) {
-        GET_OP;
-        switch (op) {
-
-        case SRE_OP_NEGATE:
-            break;
-
-        case SRE_OP_LITERAL:
-            GET_ARG;
-            break;
-
-        case SRE_OP_RANGE:
-            GET_ARG;
-            GET_ARG;
-            break;
-
-        case SRE_OP_CHARSET:
-            offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            code += offset;
-            break;
-
-        case SRE_OP_BIGCHARSET:
-            GET_ARG; /* Number of blocks */
-            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            /* Make sure that each byte points to a valid block */
-            for (i = 0; i < 256; i++) {
-                if (((unsigned char *)code)[i] >= arg)
-                    FAIL;
+/* The info for validating a pattern. */
+typedef struct SRE_Validation {
+    unsigned int numbered_mark_count;
+    unsigned int named_mark_count;
+    unsigned int min_numbered_mark;
+    unsigned int max_numbered_mark;
+    unsigned int min_named_mark;
+    unsigned int max_named_mark;
+    unsigned int group_ref_count;
+    unsigned int max_group_ref;
+} SRE_Validation;
+
+/*
+ Validates a charset. Returns a pointer to the following op if valid or NULL if
+ invalid.
+
+ The charset might look valid yet extend off the end of the pattern; the caller
+ will check whether that's the case.
+*/
+static SRE_CODE* validate_charset(SRE_CODE* charset) {
+    /*
+     The format of a charset is explained in in_charset().
+
+     charset[0] contains the maximum character code in the charset.
+     */
+    Py_ssize_t hi_bytes = charset[0] / 256;
+    Py_ssize_t max_index = 0;
+    Py_ssize_t index;
+
+    /* Check each of the chunk indexes. */
+    for (index = 0; index <= hi_bytes; index ++) {
+        /* Get the chunk index (2 x 16-bit indexes in each 32-bit codeword). */
+        Py_ssize_t i = (charset[1 + index / 2] >> ((index % 2) * 16)) & 0xFFFF;
+
+        /*
+         If a chunk is identical to a previous one then its index is the same as
+         that one's.
+
+         If a chunk is different from any previous ones then its index is 1+ the
+         previous maximum index.
+
+         This is guaranteed.
+         */
+        if (i > max_index + 1)
+            /* Definitely invalid. */
+            return NULL;
+        if (i > max_index)
+            /* It's different from any previous ones. */
+            max_index = i;
+    }
+
+    /*
+     Return a pointer to the end of the charset. The number of chunk indexes
+     depends on the maximum character code of the charset.
+     */
+    return charset + 2 + hi_bytes / 2 + (max_index + 1) * (256 /
+      SRE_BITS_PER_CODE);
+}
+
+/*
+ Validates a set. Returns a pointer to the following op if valid or NULL if
+ invalid.
+*/
+static SRE_CODE* validate_set(SRE_CODE* pattern, SRE_CODE* end_ptr) {
+    /* Check that the set doesn't extend off the end of the pattern. */
+    SRE_CODE* set_end = pattern + pattern[0];
+    if (pattern[0] < 1 || set_end > end_ptr)
+        return NULL;
+
+    pattern++;
+
+    do {
+        SRE_OpInfo* info_ptr;
+
+        if (pattern[0] > SRE_MAX_OP)
+            /* Invalid opcode. */
+            return NULL;
+
+        /* Get the info about the opcode. */
+        info_ptr = &sre_op_info[pattern[0]];
+
+        switch (info_ptr->type) {
+        case SRE_TYPE_CATEGORY:
+            /* <category> category */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern += 2;
+            break;
+        case SRE_TYPE_CHARSET:
+        {
+            /* <charset> skip charset */
+            /*
+             Check that the charset doesn't extend off the end of the pattern.
+             */
+            SRE_CODE* end_charset = pattern + 1 + pattern[1];
+            VTRACE(("%s\n", info_ptr->name));
+            if (end_charset > end_ptr)
+                return NULL;
+
+            pattern = validate_charset(pattern + 2);
+            if (pattern != end_charset)
+                return NULL;
+            break;
+        }
+        case SRE_TYPE_LITERAL:
+            /* <literal> code */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern += 2;
+            break;
+        case SRE_TYPE_RANGE:
+            /* <range> min max */
+            /* The minimum shouldn't be greater than the maximum. */
+            VTRACE(("%s\n", info_ptr->name));
+            if (pattern[1] > pattern[2])
+                return NULL;
+
+            pattern += 3;
+            break;
+        default:
+            /* Unknown opcode type. */
+            VTRACE(("UNKNOWN\n"));
+            return NULL;
+        }
+    } while (pattern < set_end);
+
+    return pattern > set_end ? NULL : pattern;
+}
+
+/*
+ Validates a single-character op. Returns a pointer to the following op if valid
+ or NULL if invalid.
+*/
+static SRE_CODE* validate_one_pattern(SRE_CODE* pattern, SRE_CODE* end_ptr,
+  int* direction) {
+    SRE_OpInfo* info_ptr;
+
+    if (pattern[0] > SRE_MAX_OP)
+        /* Invalid opcode. */
+        return NULL;
+
+    /* Get the info about the opcode. */
+    info_ptr = &sre_op_info[pattern[0]];
+
+    /*
+     Is the direction correct? We'll reject a forwards opcode when the current
+     direction is backwards, and vice versa.
+     */
+    if (*direction != 0 && *direction != info_ptr->direction)
+        return NULL;
+
+    switch (info_ptr->type) {
+    case SRE_TYPE_CATEGORY:
+        /* <category> category */
+        VTRACE(("%s\n", info_ptr->name));
+        pattern += 2;
+        break;
+    case SRE_TYPE_CHARSET:
+    {
+        /* <charset> skip charset */
+        /*
+         Check that the charset doesn't extend off the end of the pattern.
+         */
+        SRE_CODE* end_charset = pattern + 1 + pattern[1];
+        VTRACE(("%s\n", info_ptr->name));
+        if (end_charset > end_ptr)
+            return NULL;
+
+        pattern = validate_charset(pattern + 2);
+        if (pattern != end_charset)
+            return NULL;
+        break;
+    }
+    case SRE_TYPE_LITERAL:
+        /* <literal> code */
+        VTRACE(("%s\n", info_ptr->name));
+        pattern += 2;
+        break;
+    case SRE_TYPE_RANGE:
+        /* <range> min max */
+        /* The minimum shouldn't be greater than the maximum. */
+        VTRACE(("%s\n", info_ptr->name));
+        if (pattern[1] > pattern[2])
+            return NULL;
+
+        pattern += 3;
+        break;
+    case SRE_TYPE_SET:
+        /* <set> set */
+        VTRACE(("%s\n", info_ptr->name));
+        pattern = validate_set(pattern + 1, end_ptr);
+        if (pattern == NULL)
+            return NULL;
+        break;
+    case SRE_TYPE_SIMPLE_CATEGORY:
+        /* <category> */
+        VTRACE(("%s\n", info_ptr->name));
+        pattern++;
+        break;
+    default:
+        /* Unknown opcode type. */
+        return NULL;
+    }
+
+    if (pattern > end_ptr)
+        return NULL;
+
+    /* Set the current direction. */
+    *direction = info_ptr->direction;
+
+    return pattern;
+}
+
+/*
+ Validates a subpattern. Returns a pointer to the following op if valid or NULL
+ if invalid.
+*/
+static SRE_CODE* validate_subpattern(SRE_CODE* pattern, SRE_CODE* end_ptr,
+  int* direction, SRE_Validation* validation) {
+    /* The current direction (forwards/backwards). */
+    int dir = *direction;
+
+    while (pattern < end_ptr) {
+        SRE_OpInfo* info_ptr;
+
+        VTRACE(("op %d\n", pattern[0]));
+        if (pattern[0] > SRE_MAX_OP)
+            /* Invalid opcode. */
+            return NULL;
+
+        /* Get the info about the opcode. */
+        info_ptr = &sre_op_info[pattern[0]];
+        VTRACE(("type %d\n", info_ptr->type));
+
+        /*
+         Is the direction correct? We'll reject a forwards opcode when the
+         current direction is backwards, and vice versa.
+         */
+        if (dir != 0 && info_ptr->direction != 0 && dir != info_ptr->direction) {
+            VTRACE(("wrong direction\n"));
+            return NULL;
+        }
+
+        switch (info_ptr->type) {
+        case SRE_TYPE_ASSERT:
+        {
+            /* <assert> <skip to end> ... <end_assert> */
+            SRE_CODE* tail_ptr = pattern + 1 + pattern[1];
+            int subdir = 0;
+            VTRACE(("%s\n", info_ptr->name));
+            /*
+             Validate the parameters.
+
+             We also check that the 'skip' points to the assert's end marker.
+             */
+            if (pattern[1] < 2 || tail_ptr > end_ptr || tail_ptr[-1] !=
+              info_ptr->end_marker)
+                return NULL;
+
+            /*
+             Validate the subpattern within the assert and check that it ends
+             in the right place.
+             */
+            if (validate_subpattern(pattern + 2, tail_ptr - 1, &subdir,
+              validation) != tail_ptr - 1)
+                return NULL;
+
+            pattern = tail_ptr;
+            break;
+        }
+        case SRE_TYPE_ATOMIC:
+        {
+            /* <ATOMIC> ... <END_ATOMIC> */
+            /*
+             Validate the subpattern within the atomic group.
+
+             The call should return a pointer to the END_ATOMIC, which it
+             doesn't understand.
+             */
+            SRE_CODE* ptr;
+            VTRACE(("%s\n", info_ptr->name));
+            ptr = validate_subpattern(pattern + 1, end_ptr, &dir, validation);
+            if (ptr == NULL || ptr >= end_ptr || ptr[0] != info_ptr->end_marker)
+                return NULL;
+
+            pattern = ptr + 1;
+            break;
+        }
+        case SRE_TYPE_BRANCH:
+        {
+            /*
+             <BRANCH>
+             <skip to next>
+                 ...
+             <JUMP> <skip to end>
+             <skip to next>
+                 ...
+             <JUMP> <skip to end>
+             0
+             */
+            /* All the jumps should end in the same place. */
+            SRE_CODE* skip_end_ptr = NULL;
+            VTRACE(("%s\n", info_ptr->name));
+
+            pattern++;
+
+            do {
+                SRE_CODE* next_ptr = pattern + pattern[0];
+                SRE_CODE* ptr;
+                /* The offset to the next alternative's offset. */
+                if (pattern[0] < 3 || next_ptr >= end_ptr)
+                    return NULL;
+
+                /* Validate this alternative, which stops at the jump. */
+                ptr = validate_subpattern(pattern + 1, next_ptr - 2, &dir,
+                  validation);
+                if (ptr != next_ptr - 2 || ptr[0] != SRE_OP_JUMP || ptr[1] < 1)
+                    return NULL;
+
+                /* The jump to the end. */
+                ptr += 1 + ptr[1];
+                if (skip_end_ptr == NULL)
+                    skip_end_ptr = ptr;
+                else if (ptr != skip_end_ptr)
+                    return NULL;
+
+                pattern = next_ptr;
+            } while (pattern[0] != 0);
+            pattern++;
+            break;
+        }
+        case SRE_TYPE_CATEGORY:
+            /* <category> category */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern += 2;
+            break;
+        case SRE_TYPE_CHARSET:
+        {
+            /* <charset> skip charset */
+            /* Point to the end of the charset. */
+            SRE_CODE* end_charset = pattern + 1 + pattern[1];
+            VTRACE(("%s\n", info_ptr->name));
+            if (end_charset > end_ptr)
+                return NULL;
+
+            /* Validate the charset. */
+            pattern = validate_charset(pattern + 2);
+            if (pattern != end_charset)
+                return NULL;
+            break;
+        }
+        case SRE_TYPE_GROUPREF:
+            /* <groupref> group_id */
+            VTRACE(("%s\n", info_ptr->name));
+            validation->group_ref_count++;
+            validation->max_group_ref = unsigned_max(validation->max_group_ref,
+              pattern[1]);
+            pattern += 2;
+            break;
+        case SRE_TYPE_GROUPREF_EXISTS:
+        {
+            /*
+             <GROUPREF_EXISTS> group_id <skip to code_no>
+             code_yes
+             <JUMP> <skip to end>
+             code_no
+             */
+            SRE_CODE* skip_ptr = pattern + 1 + pattern[2];
+            SRE_CODE* ptr;
+            VTRACE(("%s\n", info_ptr->name));
+            /* Locate code_no. */
+            if (pattern[2] < 2 || skip_ptr > end_ptr)
+                return NULL;
+
+            /* code_yes lies between the 'skip' and code_no. */
+            ptr = validate_subpattern(pattern + 3, skip_ptr, &dir, validation);
+
+            /*
+             'ptr' will point after code_yes and at the jump, if present.
+
+             (The jump will have been rejected by the call.)
+             */
+            /* Validate code_no, if present. */
+            if (ptr == skip_ptr - 2) {
+                if (ptr[0] != SRE_OP_JUMP || ptr[1] < 1)
+                    return NULL;
+
+                skip_ptr = ptr + 1 + ptr[1];
+                if (skip_ptr > end_ptr)
+                    return NULL;
+
+                /*
+                 code_yes lies between the 'skip' and the end of the subpattern.
+                 */
+                ptr = validate_subpattern(ptr + 2, skip_ptr, &dir, validation);
+                if (ptr < skip_ptr)
+                    return NULL;
+            } else if (ptr != skip_ptr)
+                return NULL;
+
+            validation->group_ref_count++;
+            validation->max_group_ref = unsigned_max(validation->max_group_ref,
+              pattern[1]);
+            pattern = skip_ptr;
+            break;
+        }
+        case SRE_TYPE_LITERAL:
+            /* <literal> code */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern += 2;
+            break;
+        case SRE_TYPE_LITERAL_STRING:
+            /* <literal_string> length ... */
+            VTRACE(("%s\n", info_ptr->name));
+            if (pattern[1] == 0)
+                return NULL;
+            pattern += 2 + pattern[1];
+            break;
+        case SRE_TYPE_MARK:
+            /* <MARK> <numbered_id> <named_id> */
+            /*
+             The the capture groups are numbered. Some also have names.
+
+             The name ids are all higher than the number ids.
+             */
+            VTRACE(("%s\n", info_ptr->name));
+            if (pattern[1] > pattern[2])
+                /* Number id not higher than name id. */
+                return NULL;
+
+            /* Found another mark. */
+            validation->numbered_mark_count++;
+
+            /* The highest number id. */
+            validation->min_numbered_mark =
+              unsigned_min(validation->min_numbered_mark, pattern[1]);
+            validation->max_numbered_mark =
+              unsigned_max(validation->max_numbered_mark, pattern[1]);
+
+            if (pattern[2] > pattern[1]) {
+                /* The mark has a name id (it's higher then the number id). */
+                validation->named_mark_count++;
+
+                /* The highest name id. */
+                validation->min_named_mark =
+                  unsigned_min(validation->min_named_mark, pattern[2]);
+                validation->max_named_mark =
+                  unsigned_max(validation->max_named_mark, pattern[2]);
             }
-            code += offset;
-            offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            code += offset;
-            break;
-
-        case SRE_OP_CATEGORY:
-            GET_ARG;
-            switch (arg) {
-            case SRE_CATEGORY_DIGIT:
-            case SRE_CATEGORY_NOT_DIGIT:
-            case SRE_CATEGORY_SPACE:
-            case SRE_CATEGORY_NOT_SPACE:
-            case SRE_CATEGORY_WORD:
-            case SRE_CATEGORY_NOT_WORD:
-            case SRE_CATEGORY_LINEBREAK:
-            case SRE_CATEGORY_NOT_LINEBREAK:
-            case SRE_CATEGORY_LOC_WORD:
-            case SRE_CATEGORY_LOC_NOT_WORD:
-            case SRE_CATEGORY_UNI_DIGIT:
-            case SRE_CATEGORY_UNI_NOT_DIGIT:
-            case SRE_CATEGORY_UNI_SPACE:
-            case SRE_CATEGORY_UNI_NOT_SPACE:
-            case SRE_CATEGORY_UNI_WORD:
-            case SRE_CATEGORY_UNI_NOT_WORD:
-            case SRE_CATEGORY_UNI_LINEBREAK:
-            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-                break;
-            default:
-                FAIL;
-            }
-            break;
-
+            pattern += 3;
+            break;
+        case SRE_TYPE_POSITION:
+            /* <position> */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern++;
+            break;
+        case SRE_TYPE_RANGE:
+            /* <range> min max */
+            /* The minimum shouldn't be greater than the maximum. */
+            VTRACE(("%s\n", info_ptr->name));
+            if (pattern[1] > pattern[2])
+                return NULL;
+
+            pattern += 3;
+            break;
+        case SRE_TYPE_REPEAT:
+        {
+            /*
+             <repeat> <skip to end> <min> <max>
+                 ...
+             <end_repeat> <skip to start>
+             */
+            SRE_CODE* skip_end_ptr;
+            VTRACE(("%s\n", info_ptr->name));
+            /* Validate the parameters. */
+            if (pattern[1] < 4 || pattern[2] > pattern[3])
+                return NULL;
+
+            /* Check that the 'skip' points to the repeat's end marker. */
+            skip_end_ptr = pattern + pattern[1];
+            if (skip_end_ptr + 2 > end_ptr || skip_end_ptr[0] !=
+              info_ptr->end_marker || skip_end_ptr[1] != pattern[1])
+                return NULL;
+
+            /* Validate the subpattern within the repeat. */
+            if (validate_subpattern(pattern + 4, skip_end_ptr, &dir,
+              validation) != skip_end_ptr)
+                return NULL;
+
+            pattern = skip_end_ptr + 2;
+            break;
+        }
+        case SRE_TYPE_REPEAT_ONE:
+        {
+            /* <repeat_one> <skip to end> <min> <max> ... */
+            SRE_CODE* tail_ptr;
+            VTRACE(("%s\n", info_ptr->name));
+            /* Validate the parameters. */
+            if (pattern[1] < 4 || pattern[2] > pattern[3])
+                return NULL;
+
+            /*
+             Check that the repeat doesn't extend off the end of the
+             pattern.
+             */
+            tail_ptr = pattern + 1 + pattern[1];
+            if (tail_ptr > end_ptr)
+                return NULL;
+
+            /* Validate the opcode within the repeat. */
+            if (validate_one_pattern(pattern + 4, tail_ptr, &dir) != tail_ptr)
+                return NULL;
+            pattern = tail_ptr;
+            break;
+        }
+        case SRE_TYPE_SET:
+            /* <set> set */
+            /* Validate the set. */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern = validate_set(pattern + 1, end_ptr);
+            if (pattern == NULL)
+                return NULL;
+            break;
+        case SRE_TYPE_SIMPLE_CATEGORY:
+            /* <category> */
+            VTRACE(("%s\n", info_ptr->name));
+            pattern++;
+            break;
         default:
-            FAIL;
-
-        }
-    }
-
+            /* Anything else might be meaningful to the caller. */
+            *direction = dir;
+            return pattern;
+        }
+
+        /* Record the direction. */
+        if (info_ptr->direction != 0)
+            dir = info_ptr->direction;
+    }
+
+    *direction = dir;
+
+    return pattern > end_ptr ? NULL : pattern;
+}
+
+/* Validates the pattern. */
+static int _validate(PatternObject* self) {
+    SRE_Validation validation;
+    int direction = 0;
+    SRE_CODE* end_ptr = self->code + self->codesize;
+
+    /* Initialise the valdiation info. */
+    validation.numbered_mark_count = 0;
+    validation.min_numbered_mark = ~(unsigned int)0;
+    validation.max_numbered_mark = 0;
+    validation.named_mark_count = 0;
+    validation.min_named_mark = ~(unsigned int)0;
+    validation.max_named_mark = 0;
+    validation.group_ref_count = 0;
+    validation.max_group_ref = 0;
+
+    /*
+     _validate_subpattern will return a pointer to the first op it doesn't
+     understand or NULL if the pattern is invalid.
+
+     It doesn't understand SRE_OP_SUCCESS (which occurs only at the end of the
+     pattern), so the result should be a pointer to that.
+     */
+    if (self->codesize < 1 || end_ptr[-1] != SRE_OP_SUCCESS ||
+      validate_subpattern(self->code, end_ptr, &direction, &validation) !=
+      end_ptr - 1)
+        goto error;
+
+    /* There should be an even number of marks (start and end of a group). */
+    if (validation.numbered_mark_count % 2 != 0 ||
+      validation.named_mark_count % 2 != 0)
+        goto error;
+
+    /*
+     The numbered marks should be in the range 0 .. numbered_mark_count - 1.
+
+     Note that it's possible for several marks to have the same number,
+     so we might need to correct numbered_mark_count.
+     */
+    if (validation.numbered_mark_count > 0) {
+        if (validation.min_numbered_mark > 0 ||
+          validation.max_numbered_mark >= validation.numbered_mark_count)
+            goto error;
+
+        validation.numbered_mark_count = validation.max_numbered_mark + 1;
+    }
+
+    /*
+     All the named marks should be in the range numbered_mark_count ..
+     numbered_mark_count + named_mark_count - 1.
+
+     Note that it's possible for several marks to have the same number.
+     */
+    if (validation.named_mark_count > 0) {
+        if (validation.min_named_mark != validation.numbered_mark_count ||
+          validation.max_named_mark >= validation.min_named_mark +
+          validation.named_mark_count)
+        goto error;
+
+        validation.named_mark_count = validation.max_named_mark -
+          validation.max_numbered_mark;
+    }
+
+    /*
+     All the group refs should be in the range 0 .. numbered_mark_count +
+     named_mark_count - 1.
+     */
+    if (validation.group_ref_count > 0 && validation.max_group_ref * 2 >=
+      validation.numbered_mark_count + validation.named_mark_count)
+        goto error;
+
+    /* Calculate the number of capture groups. */
+    self->groups = validation.numbered_mark_count / 2;
+
+    /* Calculate the number of capture groups + named capture groups. */
+    self->internal_groups = (validation.numbered_mark_count +
+      validation.named_mark_count) / 2;
+
+    VTRACE(("Success!\n"));
     return 1;
-}
-
-static int
-_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
-{
-    /* Some variables are manipulated by the macros above */
-    SRE_CODE op;
-    SRE_CODE arg;
-    SRE_CODE skip;
-
-    VTRACE(("code=%p, end=%p\n", code, end));
-
-    if (code > end)
-        FAIL;
-
-    while (code < end) {
-        GET_OP;
-        switch (op) {
-
-        case SRE_OP_MARK:
-            /* We don't check whether marks are properly nested; the
-               sre_match() code is robust even if they don't, and the worst
-               you can get is nonsensical match results. */
-            GET_ARG;
-            if (arg > 2*groups+1) {
-                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
-                FAIL;
-            }
-            break;
-
-        case SRE_OP_LITERAL:
-        case SRE_OP_NOT_LITERAL:
-        case SRE_OP_LITERAL_IGNORE:
-        case SRE_OP_NOT_LITERAL_IGNORE:
-            GET_ARG;
-            /* The arg is just a character, nothing to check */
-            break;
-
-        case SRE_OP_SUCCESS:
-        case SRE_OP_FAILURE:
-            /* Nothing to check; these normally end the matching process */
-            break;
-
-        case SRE_OP_AT:
-            GET_ARG;
-            switch (arg) {
-            case SRE_AT_BEGINNING:
-            case SRE_AT_BEGINNING_STRING:
-            case SRE_AT_BEGINNING_LINE:
-            case SRE_AT_END:
-            case SRE_AT_END_LINE:
-            case SRE_AT_END_STRING:
-            case SRE_AT_BOUNDARY:
-            case SRE_AT_NON_BOUNDARY:
-            case SRE_AT_LOC_BOUNDARY:
-            case SRE_AT_LOC_NON_BOUNDARY:
-            case SRE_AT_UNI_BOUNDARY:
-            case SRE_AT_UNI_NON_BOUNDARY:
-                break;
-            default:
-                FAIL;
-            }
-            break;
-
-        case SRE_OP_ANY:
-        case SRE_OP_ANY_ALL:
-            /* These have no operands */
-            break;
-
-        case SRE_OP_IN:
-        case SRE_OP_IN_IGNORE:
-            GET_SKIP;
-            /* Stop 1 before the end; we check the FAILURE below */
-            if (!_validate_charset(code, code+skip-2))
-                FAIL;
-            if (code[skip-2] != SRE_OP_FAILURE)
-                FAIL;
-            code += skip-1;
-            break;
-
-        case SRE_OP_INFO:
-            {
-                /* A minimal info field is
-                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
-                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
-                   more follows. */
-                SRE_CODE flags, min, max, i;
-                SRE_CODE *newcode;
-                GET_SKIP;
-                newcode = code+skip-1;
-                GET_ARG; flags = arg;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                /* Check that only valid flags are present */
-                if ((flags & ~(SRE_INFO_PREFIX |
-                               SRE_INFO_LITERAL |
-                               SRE_INFO_CHARSET)) != 0)
-                    FAIL;
-                /* PREFIX and CHARSET are mutually exclusive */
-                if ((flags & SRE_INFO_PREFIX) &&
-                    (flags & SRE_INFO_CHARSET))
-                    FAIL;
-                /* LITERAL implies PREFIX */
-                if ((flags & SRE_INFO_LITERAL) &&
-                    !(flags & SRE_INFO_PREFIX))
-                    FAIL;
-                /* Validate the prefix */
-                if (flags & SRE_INFO_PREFIX) {
-                    SRE_CODE prefix_len, prefix_skip;
-                    GET_ARG; prefix_len = arg;
-                    GET_ARG; prefix_skip = arg;
-                    /* Here comes the prefix string */
-                    if (code+prefix_len < code || code+prefix_len > newcode)
-                        FAIL;
-                    code += prefix_len;
-                    /* And here comes the overlap table */
-                    if (code+prefix_len < code || code+prefix_len > newcode)
-                        FAIL;
-                    /* Each overlap value should be < prefix_len */
-                    for (i = 0; i < prefix_len; i++) {
-                        if (code[i] >= prefix_len)
-                            FAIL;
-                    }
-                    code += prefix_len;
-                }
-                /* Validate the charset */
-                if (flags & SRE_INFO_CHARSET) {
-                    if (!_validate_charset(code, newcode-1))
-                        FAIL;
-                    if (newcode[-1] != SRE_OP_FAILURE)
-                        FAIL;
-                    code = newcode;
-                }
-                else if (code != newcode) {
-                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
-                    FAIL;
-                }
-            }
-            break;
-
-        case SRE_OP_BRANCH:
-            {
-                SRE_CODE *target = NULL;
-                for (;;) {
-                    GET_SKIP;
-                    if (skip == 0)
-                        break;
-                    /* Stop 2 before the end; we check the JUMP below */
-                    if (!_validate_inner(code, code+skip-3, groups))
-                        FAIL;
-                    code += skip-3;
-                    /* Check that it ends with a JUMP, and that each JUMP
-                       has the same target */
-                    GET_OP;
-                    if (op != SRE_OP_JUMP)
-                        FAIL;
-                    GET_SKIP;
-                    if (target == NULL)
-                        target = code+skip-1;
-                    else if (code+skip-1 != target)
-                        FAIL;
-                }
-            }
-            break;
-
-        case SRE_OP_REPEAT_ONE:
-        case SRE_OP_MIN_REPEAT_ONE:
-            {
-                SRE_CODE min, max;
-                GET_SKIP;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                if (min > max)
-                    FAIL;
-#ifdef Py_UNICODE_WIDE
-                if (max > 65535)
-                    FAIL;
-#endif
-                if (!_validate_inner(code, code+skip-4, groups))
-                    FAIL;
-                code += skip-4;
-                GET_OP;
-                if (op != SRE_OP_SUCCESS)
-                    FAIL;
-            }
-            break;
-
-        case SRE_OP_REPEAT:
-            {
-                SRE_CODE min, max;
-                GET_SKIP;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                if (min > max)
-                    FAIL;
-#ifdef Py_UNICODE_WIDE
-                if (max > 65535)
-                    FAIL;
-#endif
-                if (!_validate_inner(code, code+skip-3, groups))
-                    FAIL;
-                code += skip-3;
-                GET_OP;
-                if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
-                    FAIL;
-            }
-            break;
-
-        case SRE_OP_GROUPREF:
-        case SRE_OP_GROUPREF_IGNORE:
-            GET_ARG;
-            if (arg >= groups)
-                FAIL;
-            break;
-
-        case SRE_OP_GROUPREF_EXISTS:
-            /* The regex syntax for this is: '(?(group)then|else)', where
-               'group' is either an integer group number or a group name,
-               'then' and 'else' are sub-regexes, and 'else' is optional. */
-            GET_ARG;
-            if (arg >= groups)
-                FAIL;
-            GET_SKIP_ADJ(1);
-            code--; /* The skip is relative to the first arg! */
-            /* There are two possibilities here: if there is both a 'then'
-               part and an 'else' part, the generated code looks like:
-
-               GROUPREF_EXISTS
-               <group>
-               <skipyes>
-               ...then part...
-               JUMP
-               <skipno>
-               (<skipyes> jumps here)
-               ...else part...
-               (<skipno> jumps here)
-
-               If there is only a 'then' part, it looks like:
-
-               GROUPREF_EXISTS
-               <group>
-               <skip>
-               ...then part...
-               (<skip> jumps here)
-
-               There is no direct way to decide which it is, and we don't want
-               to allow arbitrary jumps anywhere in the code; so we just look
-               for a JUMP opcode preceding our skip target.
-            */
-            if (skip >= 3 && code+skip-3 >= code &&
-                code[skip-3] == SRE_OP_JUMP)
-            {
-                VTRACE(("both then and else parts present\n"));
-                if (!_validate_inner(code+1, code+skip-3, groups))
-                    FAIL;
-                code += skip-2; /* Position after JUMP, at <skipno> */
-                GET_SKIP;
-                if (!_validate_inner(code, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
-            }
-            else {
-                VTRACE(("only a then part present\n"));
-                if (!_validate_inner(code+1, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
-            }
-            break;
-
-        case SRE_OP_ASSERT:
-        case SRE_OP_ASSERT_NOT:
-            GET_SKIP;
-            GET_ARG; /* 0 for lookahead, width for lookbehind */
-            code--; /* Back up over arg to simplify math below */
-            if (arg & 0x80000000)
-                FAIL; /* Width too large */
-            /* Stop 1 before the end; we check the SUCCESS below */
-            if (!_validate_inner(code+1, code+skip-2, groups))
-                FAIL;
-            code += skip-2;
-            GET_OP;
-            if (op != SRE_OP_SUCCESS)
-                FAIL;
-            break;
-
-        default:
-            FAIL;
-
-        }
-    }
-
-    VTRACE(("okay\n"));
-    return 1;
-}
-
-static int
-_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
-{
-    if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
-        FAIL;
-    if (groups == 0)  /* fix for simplejson */
-        groups = 100; /* 100 groups should always be safe */
-    return _validate_inner(code, end-1, groups);
-}
-
-static int
-_validate(PatternObject *self)
-{
-    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
-    {
-        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
-        return 0;
-    }
-    else
-        VTRACE(("Success!\n"));
-    return 1;
+
+error:
+    PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
+    return 0;
 }
 
 /* -------------------------------------------------------------------- */
 /* match methods */
 
-static void
-match_dealloc(MatchObject* self)
-{
+static void match_dealloc(MatchObject* self) {
     Py_XDECREF(self->regs);
     Py_XDECREF(self->string);
     Py_DECREF(self->pattern);
     PyObject_DEL(self);
 }
 
-static PyObject*
-match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
-{
-    if (index < 0 || index >= self->groups) {
+static PyObject* match_getslice_by_index(MatchObject* self, Py_ssize_t index,
+  PyObject* def, BOOL include_internal) {
+    /*
+     Internally we can access the named groups by their name id; externally we
+     can't.
+     */
+    Py_ssize_t groups = include_internal ? self->internal_groups : self->groups;
+    if (index < 0 || index >= groups) {
         /* raise IndexError if we were given a bad group number */
         PyErr_SetString(
             PyExc_IndexError,
@@ -3228,17 +7049,25 @@
     }
 
     return PySequence_GetSlice(
-        self->string, self->mark[index], self->mark[index+1]
+        self->string, self->mark[index], self->mark[index + 1]
         );
 }
 
-static Py_ssize_t
-match_getindex(MatchObject* self, PyObject* index)
-{
+static Py_ssize_t match_getindex(MatchObject* self, PyObject* index,
+  BOOL include_internal) {
     Py_ssize_t i;
 
     if (PyInt_Check(index))
-        return PyInt_AsSsize_t(index);
+    {
+        /*
+         Internally we can access the named groups by their name id; externally
+         we can't.
+         */
+        Py_ssize_t groups = include_internal ? self->internal_groups :
+          self->groups;
+        i = PyInt_AsSsize_t(index);
+        return i >= groups ? -1 : i;
+    }
 
     i = -1;
 
@@ -3255,15 +7084,13 @@
     return i;
 }
 
-static PyObject*
-match_getslice(MatchObject* self, PyObject* index, PyObject* def)
-{
-    return match_getslice_by_index(self, match_getindex(self, index), def);
-}
-
-static PyObject*
-match_expand(MatchObject* self, PyObject* ptemplate)
-{
+static PyObject* match_getslice(MatchObject* self, PyObject* index,
+  PyObject* def, BOOL include_internal) {
+    return match_getslice_by_index(self, match_getindex(self, index,
+      include_internal), def, TRUE);
+}
+
+static PyObject* match_expand(MatchObject* self, PyObject* ptemplate) {
     /* delegate to Python code */
     return call(
         SRE_PY_MODULE, "_expand",
@@ -3271,9 +7098,8 @@
         );
 }
 
-static PyObject*
-match_group(MatchObject* self, PyObject* args)
-{
+static PyObject* sre_get_match_group(MatchObject* self, PyObject* args,
+  BOOL include_internal) {
     PyObject* result;
     Py_ssize_t i, size;
 
@@ -3281,10 +7107,11 @@
 
     switch (size) {
     case 0:
-        result = match_getslice(self, Py_False, Py_None);
+        result = match_getslice(self, Py_False, Py_None, include_internal);
         break;
     case 1:
-        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
+        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None,
+          include_internal);
         break;
     default:
         /* fetch multiple items */
@@ -3293,7 +7120,7 @@
             return NULL;
         for (i = 0; i < size; i++) {
             PyObject* item = match_getslice(
-                self, PyTuple_GET_ITEM(args, i), Py_None
+                self, PyTuple_GET_ITEM(args, i), Py_None, include_internal
                 );
             if (!item) {
                 Py_DECREF(result);
@@ -3306,9 +7133,15 @@
     return result;
 }
 
-static PyObject*
-match_groups(MatchObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* match_group(MatchObject* self, PyObject* args) {
+    return sre_get_match_group(self, args, FALSE);
+}
+
+static PyObject* match_internal_group(MatchObject* self, PyObject* args) {
+    return sre_get_match_group(self, args, TRUE);
+}
+
+static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* kw) {
     PyObject* result;
     Py_ssize_t index;
 
@@ -3317,26 +7150,25 @@
     if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
         return NULL;
 
-    result = PyTuple_New(self->groups-1);
+    result = PyTuple_New(self->groups - 1);
     if (!result)
         return NULL;
 
     for (index = 1; index < self->groups; index++) {
         PyObject* item;
-        item = match_getslice_by_index(self, index, def);
+        item = match_getslice_by_index(self, index, def, FALSE);
         if (!item) {
             Py_DECREF(result);
             return NULL;
         }
-        PyTuple_SET_ITEM(result, index-1, item);
+        PyTuple_SET_ITEM(result, index - 1, item);
     }
 
     return result;
 }
 
-static PyObject*
-match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
-{
+static PyObject* match_groupdict(MatchObject* self, PyObject* args,
+ PyObject* kw) {
     PyObject* result;
     PyObject* keys;
     Py_ssize_t index;
@@ -3361,7 +7193,7 @@
         key = PyList_GET_ITEM(keys, index);
         if (!key)
             goto failed;
-        value = match_getslice(self, key, def);
+        value = match_getslice(self, key, def, FALSE);
         if (!value) {
             Py_DECREF(key);
             goto failed;
@@ -3382,18 +7214,16 @@
     return NULL;
 }
 
-static PyObject*
-match_start(MatchObject* self, PyObject* args)
-{
+static PyObject* match_start(MatchObject* self, PyObject* args) {
     Py_ssize_t index;
 
     PyObject* index_ = Py_False; /* zero */
     if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
         return NULL;
 
-    index = match_getindex(self, index_);
-
-    if (index < 0 || index >= self->groups) {
+    index = match_getindex(self, index_, FALSE);
+
+    if (index < 0 || index >= self->internal_groups) {
         PyErr_SetString(
             PyExc_IndexError,
             "no such group"
@@ -3402,21 +7232,19 @@
     }
 
     /* mark is -1 if group is undefined */
-    return Py_BuildValue("i", self->mark[index*2]);
-}
-
-static PyObject*
-match_end(MatchObject* self, PyObject* args)
-{
+    return Py_BuildValue("i", self->mark[index * 2]);
+}
+
+static PyObject* match_end(MatchObject* self, PyObject* args) {
     Py_ssize_t index;
 
     PyObject* index_ = Py_False; /* zero */
     if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
         return NULL;
 
-    index = match_getindex(self, index_);
-
-    if (index < 0 || index >= self->groups) {
+    index = match_getindex(self, index_, FALSE);
+
+    if (index < 0 || index >= self->internal_groups) {
         PyErr_SetString(
             PyExc_IndexError,
             "no such group"
@@ -3425,12 +7253,10 @@
     }
 
     /* mark is -1 if group is undefined */
-    return Py_BuildValue("i", self->mark[index*2+1]);
-}
-
-LOCAL(PyObject*)
-_pair(Py_ssize_t i1, Py_ssize_t i2)
-{
+    return Py_BuildValue("i", self->mark[index * 2 + 1]);
+}
+
+LOCAL(PyObject*) _pair(Py_ssize_t i1, Py_ssize_t i2) {
     PyObject* pair;
     PyObject* item;
 
@@ -3450,23 +7276,21 @@
 
     return pair;
 
-  error:
+error:
     Py_DECREF(pair);
     return NULL;
 }
 
-static PyObject*
-match_span(MatchObject* self, PyObject* args)
-{
+static PyObject* match_span(MatchObject* self, PyObject* args) {
     Py_ssize_t index;
 
     PyObject* index_ = Py_False; /* zero */
     if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
         return NULL;
 
-    index = match_getindex(self, index_);
-
-    if (index < 0 || index >= self->groups) {
+    index = match_getindex(self, index_, FALSE);
+
+    if (index < 0 || index >= self->internal_groups) {
         PyErr_SetString(
             PyExc_IndexError,
             "no such group"
@@ -3475,12 +7299,10 @@
     }
 
     /* marks are -1 if group is undefined */
-    return _pair(self->mark[index*2], self->mark[index*2+1]);
-}
-
-static PyObject*
-match_regs(MatchObject* self)
-{
+    return _pair(self->mark[index * 2], self->mark[index * 2 + 1]);
+}
+
+static PyObject* match_regs(MatchObject* self) {
     PyObject* regs;
     PyObject* item;
     Py_ssize_t index;
@@ -3490,7 +7312,7 @@
         return NULL;
 
     for (index = 0; index < self->groups; index++) {
-        item = _pair(self->mark[index*2], self->mark[index*2+1]);
+        item = _pair(self->mark[index * 2], self->mark[index * 2 + 1]);
         if (!item) {
             Py_DECREF(regs);
             return NULL;
@@ -3504,14 +7326,12 @@
     return regs;
 }
 
-static PyObject*
-match_copy(MatchObject* self, PyObject *unused)
-{
+static PyObject* match_copy(MatchObject* self, PyObject* unused) {
 #ifdef USE_BUILTIN_COPY
     MatchObject* copy;
     Py_ssize_t slots, offset;
 
-    slots = 2 * (self->pattern->groups+1);
+    slots = 2 * (self->pattern->groups + 1);
 
     copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
     if (!copy)
@@ -3525,23 +7345,21 @@
     Py_XINCREF(self->string);
     Py_XINCREF(self->regs);
 
-    memcpy((char*) copy + offset, (char*) self + offset,
+    memcpy((char*)copy + offset, (char*)self + offset,
            sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
 
-    return (PyObject*) copy;
+    return (PyObject*)copy;
 #else
     PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
     return NULL;
 #endif
 }
 
-static PyObject*
-match_deepcopy(MatchObject* self, PyObject* memo)
-{
+static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) {
 #ifdef USE_BUILTIN_COPY
     MatchObject* copy;
 
-    copy = (MatchObject*) match_copy(self);
+    copy = (MatchObject*)match_copy(self);
     if (!copy)
         return NULL;
 
@@ -3558,7 +7376,53 @@
 #endif
 }
 
+static PyObject* match_subscript(MatchObject* self, PyObject* item) {
+	if (PyIndex_Check(item) || PyString_Check(item) || PyUnicode_Check(item))
+        /* integer or string subscript */
+        return match_getslice(self, item, Py_None, FALSE);
+    else if (PySlice_Check(item)) {
+        /*  slice subscript */
+        Py_ssize_t start, stop, step, slicelength;
+        PyTupleObject *result;
+        Py_ssize_t from, to;
+
+        /* get the slice info */
+		if (PySlice_GetIndicesEx((PySliceObject*)item, self->groups,
+          &start, &stop, &step, &slicelength) < 0) {
+            return NULL;
+        }
+
+        /* empty slice? */
+        if (slicelength <= 0)
+            return PyTuple_New(0);
+
+        /* create the result tuple */
+        result = (PyTupleObject *)PyTuple_New(slicelength);
+        if (result== NULL)
+            return NULL;
+
+        /* get the captures */
+		for (from = start, to = 0; from >= 0 && from < self->groups; from += step, to++) {
+            PyObject* item = match_getslice_by_index(self, from, Py_None, TRUE);
+            if (item == NULL) {
+                Py_DECREF(result);
+                return NULL;
+            }
+            PyTuple_SET_ITEM(result, to, item);
+        }
+
+        return (PyObject*)result;
+    } else {
+        /* invalid subscript type */
+        PyErr_Format(PyExc_TypeError,
+          "match indices must be integers or strings, not %.200s",
+          item->ob_type->tp_name);
+        return NULL;
+    }
+}
+
 static PyMethodDef match_methods[] = {
+    {"__getitem__", (PyCFunction)match_subscript, METH_O|METH_COEXIST},
     {"group", (PyCFunction) match_group, METH_VARARGS},
     {"start", (PyCFunction) match_start, METH_VARARGS},
     {"end", (PyCFunction) match_end, METH_VARARGS},
@@ -3568,15 +7432,14 @@
     {"expand", (PyCFunction) match_expand, METH_O},
     {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
     {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
+    {"_internal_group", (PyCFunction) match_internal_group, METH_VARARGS},
     {NULL, NULL}
 };
 
-static PyObject*
-match_getattr(MatchObject* self, char* name)
-{
+static PyObject* match_getattr(MatchObject* self, char* name) {
     PyObject* res;
 
-    res = Py_FindMethod(match_methods, (PyObject*) self, name);
+    res = Py_FindMethod(match_methods, (PyObject*)self, name);
     if (res)
         return res;
 
@@ -3590,9 +7453,9 @@
     }
 
     if (!strcmp(name, "lastgroup")) {
-        if (self->pattern->indexgroup && self->lastindex >= 0) {
+        if (self->pattern->indexgroup && self->last_named_index >= 0) {
             PyObject* result = PySequence_GetItem(
-                self->pattern->indexgroup, self->lastindex
+                self->pattern->indexgroup, self->last_named_index
                 );
             if (result)
                 return result;
@@ -3638,31 +7501,49 @@
 /* FIXME: implement setattr("string", None) as a special case (to
    detach the associated string, if any */
 
-statichere PyTypeObject Match_Type = {
+static PyTypeObject Match_Type = {
     PyObject_HEAD_INIT(NULL)
     0, "_" SRE_MODULE ".SRE_Match",
     sizeof(MatchObject), sizeof(Py_ssize_t),
-    (destructor)match_dealloc, /*tp_dealloc*/
-    0, /*tp_print*/
-    (getattrfunc)match_getattr /*tp_getattr*/
+    (destructor)match_dealloc,  /*tp_dealloc*/
+    0,                  /*tp_print*/
+    (getattrfunc)match_getattr, /*tp_getattr*/
+    0,                  /* tp_setattr */
+    0,                  /* tp_compare */
+    0,                  /* tp_repr */
+    0,                  /* tp_as_number */
+    0,                  /* tp_as_sequence */
+    &match_as_mapping,  /* tp_as_mapping */
+    0,                  /* tp_hash */
+    0,                  /* tp_call */
+    0,                  /* tp_str */
+    0,                  /* tp_getattro */
+    0,                  /* tp_setattro */
+    0,                  /* tp_as_buffer */
+    Py_TPFLAGS_HAVE_INDEX,  /* tp_flags */
+    0,                  /* tp_doc */
+    0,                  /* tp_traverse */
+    0,                  /* tp_clear */
+    0,                  /* tp_richcompare */
+    0,                  /* tp_weaklistoffset */
+    0,                  /* tp_iter */
+    0,                  /* tp_iternext */
+    match_methods,      /* tp_methods */
 };
 
-static PyObject*
-pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
-{
+static PyObject* pattern_new_match(PatternObject* pattern, SRE_STATE* state,
+  int status) {
     /* create match object (from state object) */
-
-    MatchObject* match;
-    Py_ssize_t i, j;
-    char* base;
-    int n;
-
     if (status > 0) {
+        MatchObject* match;
+        char* base = (char*) state->beginning;
+        Py_ssize_t mark_index;
+        int charsize = state->charsize;
 
         /* create match object (with room for extra group marks) */
         /* coverity[ampersand_in_size] */
-        match = PyObject_NEW_VAR(MatchObject, &Match_Type,
-                                 2*(pattern->groups+1));
+        match = PyObject_NEW_VAR(MatchObject, &Match_Type, 2 *
+          (pattern->internal_groups + 1));
         if (!match)
             return NULL;
 
@@ -3673,36 +7554,42 @@
         match->string = state->string;
 
         match->regs = NULL;
-        match->groups = pattern->groups+1;
+        match->groups = pattern->groups + 1;
+        match->internal_groups = pattern->internal_groups + 1;
 
         /* fill in group slices */
-
-        base = (char*) state->beginning;
-        n = state->charsize;
-
-        match->mark[0] = ((char*) state->start - base) / n;
-        match->mark[1] = ((char*) state->ptr - base) / n;
-
-        for (i = j = 0; i < pattern->groups; i++, j+=2)
-            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
-                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
-                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
+        if (state->reverse) {
+            match->mark[0] = ((char*) state->ptr - base) / charsize;
+            match->mark[1] = ((char*) state->end - base) / charsize;
+        } else {
+            match->mark[0] = ((char*) state->start - base) / charsize;
+            match->mark[1] = ((char*) state->ptr - base) / charsize;
+        }
+
+        for (mark_index = 0; mark_index < pattern->internal_groups * 2;
+          mark_index += 2) {
+            if (state->mark[mark_index] != NULL && state->mark[mark_index] <=
+              state->mark[mark_index + 1]) {
+                match->mark[mark_index + 2] =
+                  ((char*) state->mark[mark_index] - base) / charsize;
+                match->mark[mark_index + 3] =
+                 ((char*) state->mark[mark_index + 1] - base) / charsize;
             } else
-                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
+                match->mark[mark_index + 2] =
+                  match->mark[mark_index + 3] = -1; /* unmatched */
+        }
 
         match->pos = state->pos;
         match->endpos = state->endpos;
 
         match->lastindex = state->lastindex;
-
-        return (PyObject*) match;
-
+        match->last_named_index = state->last_named_index;
+
+        return (PyObject*)match;
     } else if (status == 0) {
-
         /* no match */
         Py_INCREF(Py_None);
         return Py_None;
-
     }
 
     /* internal error */
@@ -3714,30 +7601,31 @@
 /* -------------------------------------------------------------------- */
 /* scanner methods (experimental) */
 
-static void
-scanner_dealloc(ScannerObject* self)
-{
+static void scanner_dealloc(ScannerObject* self) {
     state_fini(&self->state);
     Py_DECREF(self->pattern);
     PyObject_DEL(self);
 }
 
-static PyObject*
-scanner_match(ScannerObject* self, PyObject *unused)
-{
+static PyObject* scanner_match(ScannerObject* self, PyObject* unused) {
     SRE_STATE* state = &self->state;
     PyObject* match;
     int status;
 
     state_reset(state);
 
-    state->ptr = state->start;
+    /* Where should we start the match? */
+    state->ptr = state->reverse ? state->end : state->start;
+    state->search_ptr = state->ptr;
+
+    /* Clear the marks. */
+    memset(state->mark, 0, state->pattern_code[0] * sizeof(SRE_CHAR*));
 
     if (state->charsize == 1) {
-        status = sre_match(state, PatternObject_GetCode(self->pattern));
+        status = sre_bmatch(state);
     } else {
 #if defined(HAVE_UNICODE)
-        status = sre_umatch(state, PatternObject_GetCode(self->pattern));
+        status = sre_umatch(state);
 #endif
     }
     if (PyErr_Occurred())
@@ -3746,43 +7634,56 @@
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
-        state->start = (void*) ((char*) state->ptr + state->charsize);
+    if (state->reverse) {
+        if (status == 0 || state->ptr == state->end)
+            state->end = (void*) ((char*) state->ptr - state->charsize);
+        else
+            state->end = state->ptr;
+    } else {
+        if (status == 0 || state->ptr == state->start)
+            state->start = (void*) ((char*) state->ptr + state->charsize);
+        else
+            state->start = state->ptr;
+    }
+
+    return match;
+}
+
+
+static PyObject* scanner_search(ScannerObject* self, PyObject* unused) {
+    SRE_STATE* state = &self->state;
+    void * start_ptr;
+    PyObject* match;
+    int status;
+
+    state_reset(state);
+
+    /* Where should we start the match? */
+    state->ptr = state->reverse ? state->end : state->start;
+    start_ptr = state->ptr;
+
+    if (state->charsize == 1) {
+        status = sre_bsearch(state);
+    } else {
+#if defined(HAVE_UNICODE)
+        status = sre_usearch(state);
+#endif
+    }
+    if (PyErr_Occurred())
+        return NULL;
+
+    match = pattern_new_match((PatternObject*) self->pattern,
+                               state, status);
+
+    /*
+     Continue the search from where we left off. Forbid another zero-width
+     match at the same start position.
+     */
+    if (state->reverse)
+        state->end = state->ptr;
     else
         state->start = state->ptr;
-
-    return match;
-}
-
-
-static PyObject*
-scanner_search(ScannerObject* self, PyObject *unused)
-{
-    SRE_STATE* state = &self->state;
-    PyObject* match;
-    int status;
-
-    state_reset(state);
-
-    state->ptr = state->start;
-
-    if (state->charsize == 1) {
-        status = sre_search(state, PatternObject_GetCode(self->pattern));
-    } else {
-#if defined(HAVE_UNICODE)
-        status = sre_usearch(state, PatternObject_GetCode(self->pattern));
-#endif
-    }
-    if (PyErr_Occurred())
-        return NULL;
-
-    match = pattern_new_match((PatternObject*) self->pattern,
-                               state, status);
-
-    if (status == 0 || state->ptr == state->start)
-        state->start = (void*) ((char*) state->ptr + state->charsize);
-    else
-        state->start = state->ptr;
+    state->reject_zero_width = state->ptr == start_ptr;
 
     return match;
 }
@@ -3793,12 +7694,10 @@
     {NULL, NULL}
 };
 
-static PyObject*
-scanner_getattr(ScannerObject* self, char* name)
-{
+static PyObject* scanner_getattr(ScannerObject* self, char* name) {
     PyObject* res;
 
-    res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
+    res = Py_FindMethod(scanner_methods, (PyObject*)self, name);
     if (res)
         return res;
 
@@ -3814,7 +7713,7 @@
     return NULL;
 }
 
-statichere PyTypeObject Scanner_Type = {
+static PyTypeObject Scanner_Type = {
     PyObject_HEAD_INIT(NULL)
     0, "_" SRE_MODULE ".SRE_Scanner",
     sizeof(ScannerObject), 0,
@@ -3823,9 +7722,7 @@
     (getattrfunc)scanner_getattr, /*tp_getattr*/
 };
 
-static PyObject*
-pattern_scanner(PatternObject* pattern, PyObject* args)
-{
+static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args) {
     /* create search state object */
 
     ScannerObject* self;
@@ -3833,6 +7730,8 @@
     PyObject* string;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
+    SRE_CODE* pattern_code;
+
     if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
         return NULL;
 
@@ -3841,22 +7740,27 @@
     if (!self)
         return NULL;
 
-    string = state_init(&self->state, pattern, string, start, end);
+    pattern_code = PatternObject_GetCode(pattern);
+
+    string = state_init(&self->state, pattern, string, start, end,
+      pattern_code);
     if (!string) {
         PyObject_DEL(self);
         return NULL;
     }
 
     Py_INCREF(pattern);
-    self->pattern = (PyObject*) pattern;
-
-    return (PyObject*) self;
+    self->pattern = (PyObject*)pattern;
+
+    return (PyObject*)self;
 }
 
 static PyMethodDef _functions[] = {
     {"compile", _compile, METH_VARARGS},
     {"getcodesize", sre_codesize, METH_NOARGS},
     {"getlower", sre_getlower, METH_VARARGS},
+    {"getupper", sre_getupper, METH_VARARGS},
+    {"gettitle", sre_gettitle, METH_VARARGS},
     {NULL, NULL}
 };
 
@@ -3876,7 +7780,7 @@
 
     m = Py_InitModule("_" SRE_MODULE, _functions);
     if (m == NULL)
-    	return;
+        return;
     d = PyModule_GetDict(m);
 
     x = PyInt_FromLong(SRE_MAGIC);
=== modified file Modules/sre.h
--- Modules/sre.h 2006-06-12 03:05:40 +0000
+++ Modules/sre.h 2009-03-05 19:14:43 +0000
@@ -11,19 +11,16 @@
 #ifndef SRE_INCLUDED
 #define SRE_INCLUDED
 
+typedef int BOOL;
+enum BOOL {FALSE, TRUE};
+
 #include "sre_constants.h"
-
-/* size of a code word (must be unsigned short or larger, and
-   large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
-#define SRE_CODE Py_UCS4
-#else
-#define SRE_CODE unsigned short
-#endif
 
 typedef struct {
     PyObject_VAR_HEAD
     Py_ssize_t groups; /* must be first! */
+    Py_ssize_t internal_groups; /* both numbered and named (all named are
+                                   numbered) */
     PyObject* groupindex;
     PyObject* indexgroup;
     /* compatibility */
@@ -36,6 +33,7 @@
 } PatternObject;
 
 #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
+#define PatternObject_GetCodeSize(o) (((PatternObject*)(o))->codesize)
 
 typedef struct {
     PyObject_VAR_HEAD
@@ -43,8 +41,13 @@
     PyObject* regs; /* cached list of matching spans */
     PatternObject* pattern; /* link to the regex (pattern) object */
     Py_ssize_t pos, endpos; /* current target slice */
-    Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
+    Py_ssize_t lastindex; /* last index marker seen by the engine
+                             (-1 if none) */
+    Py_ssize_t last_named_index; /* last named index marker seen by the engine
+                                    (-1 if none) */
     Py_ssize_t groups; /* number of groups (start/end marks) */
+    Py_ssize_t internal_groups; /* number of groups, both numbered and named
+                                   (all named are also numbered) */
     Py_ssize_t mark[1];
 } MatchObject;
 
@@ -53,12 +56,53 @@
 /* FIXME: <fl> shouldn't be a constant, really... */
 #define SRE_MARK_SIZE 200
 
-typedef struct SRE_REPEAT_T {
+#define SRE_BACKTRACK_CHUNK_SIZE 1024
+
+typedef struct SRE_BACKTRACK_ITEM {
+    SRE_CODE op;
+    union
+    {
+        struct {
+            void* text_start;
+            void* text_ptr;
+            SRE_CODE* pattern_ptr;
+        } assert;
+        struct {
+            void* text_ptr;
+            SRE_CODE* pattern_ptr;
+        } branch;
+        struct {
+            Py_ssize_t numbered_index;
+            void* numbered_mark_ptr;
+            Py_ssize_t named_index;
+            void* named_mark_ptr;
+        } mark;
+        struct {
+            void* text_ptr;
+            Py_ssize_t repeat_min;
+            Py_ssize_t repeat_max;
+            Py_ssize_t repeat_counter;
+            void* repeat_start;
+            struct SRE_BACKTRACK_ITEM* loop; /* Outer loop for REPEAT, parent
+                                                loop for END_REPEAT. */
+            SRE_CODE* pattern_ptr;
+        } repeat;
+    };
+    void* marks; // Numbered and named marks.
+} SRE_BACKTRACK_ITEM;
+
+typedef struct SRE_BACKTRACK_CHUNK {
+    struct SRE_BACKTRACK_CHUNK* previous;
+    SRE_BACKTRACK_ITEM items[SRE_BACKTRACK_CHUNK_SIZE];
     Py_ssize_t count;
-    SRE_CODE* pattern; /* points to REPEAT operator arguments */
-    void* last_ptr; /* helper to check for infinite loops */
-    struct SRE_REPEAT_T *prev; /* points to previous repeat context */
-} SRE_REPEAT;
+} SRE_BACKTRACK_CHUNK;
+
+typedef struct SRE_ENCODING_TABLE {
+    BOOL (*in_category)(SRE_CODE category, Py_UCS4 ch);
+    Py_UCS4 (*lower)(Py_UCS4 ch);
+    Py_UCS4 (*upper)(Py_UCS4 ch);
+    Py_UCS4 (*title)(Py_UCS4 ch);
+} SRE_ENCODING_TABLE;
 
 typedef struct {
     /* string pointers */
@@ -66,23 +110,26 @@
     void* beginning; /* start of original string */
     void* start; /* start of current slice */
     void* end; /* end of original string */
+    void* search_ptr; /* start of search (used by \G) */
     /* attributes for the match object */
     PyObject* string;
     Py_ssize_t pos, endpos;
     /* character size */
     int charsize;
+    BOOL reverse;
+    BOOL reject_zero_width;
     /* registers */
     Py_ssize_t lastindex;
     Py_ssize_t lastmark;
+    Py_ssize_t last_named_index;
     void* mark[SRE_MARK_SIZE];
     /* dynamically allocated stuff */
-    char* data_stack;
-    size_t data_stack_size;
-    size_t data_stack_base;
-    /* current repeat context */
-    SRE_REPEAT *repeat;
+    SRE_BACKTRACK_CHUNK* backtrack_chunk;
+    unsigned int numbered_mark_count;
+    unsigned int named_mark_count;
+    SRE_CODE* pattern_code;
     /* hooks */
-    SRE_TOLOWER_HOOK lower;
+    SRE_ENCODING_TABLE* encoding;
 } SRE_STATE;
 
 typedef struct {
=== modified file Modules/sre_constants.h
--- Modules/sre_constants.h 2003-10-17 22:13:16 +0000
+++ Modules/sre_constants.h 2009-03-04 15:26:22 +0000
@@ -11,76 +11,299 @@
  * See the _sre.c file for information on usage and redistribution.
  */
 
-#define SRE_MAGIC 20031017
+#define SRE_MAGIC 20081218
+
+/* size of a code word (must be unsigned short or larger, and
+   large enough to hold a Py_UNICODE character) */
+typedef unsigned int SRE_CODE;
+
+#define SRE_BYTES_PER_CODE 4
+#define SRE_BITS_PER_CODE 32
+#define SRE_UNLIMITED_REPEATS 0xFFFFFFFF
+
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2
 #define SRE_OP_ANY_ALL 3
-#define SRE_OP_ASSERT 4
-#define SRE_OP_ASSERT_NOT 5
-#define SRE_OP_AT 6
-#define SRE_OP_BRANCH 7
-#define SRE_OP_CALL 8
-#define SRE_OP_CATEGORY 9
-#define SRE_OP_CHARSET 10
-#define SRE_OP_BIGCHARSET 11
-#define SRE_OP_GROUPREF 12
-#define SRE_OP_GROUPREF_EXISTS 13
-#define SRE_OP_GROUPREF_IGNORE 14
-#define SRE_OP_IN 15
-#define SRE_OP_IN_IGNORE 16
-#define SRE_OP_INFO 17
-#define SRE_OP_JUMP 18
-#define SRE_OP_LITERAL 19
-#define SRE_OP_LITERAL_IGNORE 20
-#define SRE_OP_MARK 21
-#define SRE_OP_MAX_UNTIL 22
-#define SRE_OP_MIN_UNTIL 23
-#define SRE_OP_NOT_LITERAL 24
-#define SRE_OP_NOT_LITERAL_IGNORE 25
-#define SRE_OP_NEGATE 26
-#define SRE_OP_RANGE 27
-#define SRE_OP_REPEAT 28
-#define SRE_OP_REPEAT_ONE 29
-#define SRE_OP_SUBPATTERN 30
-#define SRE_OP_MIN_REPEAT_ONE 31
-#define SRE_AT_BEGINNING 0
-#define SRE_AT_BEGINNING_LINE 1
-#define SRE_AT_BEGINNING_STRING 2
-#define SRE_AT_BOUNDARY 3
-#define SRE_AT_NON_BOUNDARY 4
-#define SRE_AT_END 5
-#define SRE_AT_END_LINE 6
-#define SRE_AT_END_STRING 7
-#define SRE_AT_LOC_BOUNDARY 8
-#define SRE_AT_LOC_NON_BOUNDARY 9
-#define SRE_AT_UNI_BOUNDARY 10
-#define SRE_AT_UNI_NON_BOUNDARY 11
-#define SRE_CATEGORY_DIGIT 0
-#define SRE_CATEGORY_NOT_DIGIT 1
-#define SRE_CATEGORY_SPACE 2
-#define SRE_CATEGORY_NOT_SPACE 3
-#define SRE_CATEGORY_WORD 4
-#define SRE_CATEGORY_NOT_WORD 5
-#define SRE_CATEGORY_LINEBREAK 6
-#define SRE_CATEGORY_NOT_LINEBREAK 7
-#define SRE_CATEGORY_LOC_WORD 8
-#define SRE_CATEGORY_LOC_NOT_WORD 9
-#define SRE_CATEGORY_UNI_DIGIT 10
-#define SRE_CATEGORY_UNI_NOT_DIGIT 11
-#define SRE_CATEGORY_UNI_SPACE 12
-#define SRE_CATEGORY_UNI_NOT_SPACE 13
-#define SRE_CATEGORY_UNI_WORD 14
-#define SRE_CATEGORY_UNI_NOT_WORD 15
-#define SRE_CATEGORY_UNI_LINEBREAK 16
-#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17
-#define SRE_FLAG_TEMPLATE 1
-#define SRE_FLAG_IGNORECASE 2
-#define SRE_FLAG_LOCALE 4
-#define SRE_FLAG_MULTILINE 8
-#define SRE_FLAG_DOTALL 16
-#define SRE_FLAG_UNICODE 32
-#define SRE_FLAG_VERBOSE 64
-#define SRE_INFO_PREFIX 1
-#define SRE_INFO_LITERAL 2
-#define SRE_INFO_CHARSET 4
+#define SRE_OP_ANY_ALL_REV 4
+#define SRE_OP_ANY_REV 5
+#define SRE_OP_ASSERT 6
+#define SRE_OP_ASSERT_NOT 7
+#define SRE_OP_ATOMIC 8
+#define SRE_OP_BOUNDARY 9
+#define SRE_OP_BRANCH 10
+#define SRE_OP_CATEGORY 11
+#define SRE_OP_CATEGORY_REV 12
+#define SRE_OP_CHARSET 13
+#define SRE_OP_CHARSET_IGNORE 14
+#define SRE_OP_CHARSET_IGNORE_REV 15
+#define SRE_OP_CHARSET_REV 16
+#define SRE_OP_END_ASSERT 17
+#define SRE_OP_END_ASSERT_NOT 18
+#define SRE_OP_END_ATOMIC 19
+#define SRE_OP_END_OF_LINE 20
+#define SRE_OP_END_OF_STRING 21
+#define SRE_OP_END_OF_STRING_LN 22
+#define SRE_OP_END_REPEAT_MAX 23
+#define SRE_OP_END_REPEAT_MAX_REV 24
+#define SRE_OP_END_REPEAT_MIN 25
+#define SRE_OP_END_REPEAT_MIN_REV 26
+#define SRE_OP_END_REPEAT_POSS 27
+#define SRE_OP_END_REPEAT_POSS_REV 28
+#define SRE_OP_GROUPREF 29
+#define SRE_OP_GROUPREF_EXISTS 30
+#define SRE_OP_GROUPREF_IGNORE 31
+#define SRE_OP_GROUPREF_IGNORE_REV 32
+#define SRE_OP_GROUPREF_REV 33
+#define SRE_OP_JUMP 34
+#define SRE_OP_LITERAL 35
+#define SRE_OP_LITERAL_IGNORE 36
+#define SRE_OP_LITERAL_IGNORE_REV 37
+#define SRE_OP_LITERAL_REV 38
+#define SRE_OP_LITERAL_STRING 39
+#define SRE_OP_LITERAL_STRING_IGNORE 40
+#define SRE_OP_LITERAL_STRING_IGNORE_REV 41
+#define SRE_OP_LITERAL_STRING_REV 42
+#define SRE_OP_MARK 43
+#define SRE_OP_NOT_BOUNDARY 44
+#define SRE_OP_NOT_CATEGORY 45
+#define SRE_OP_NOT_CATEGORY_REV 46
+#define SRE_OP_NOT_CHARSET 47
+#define SRE_OP_NOT_CHARSET_IGNORE 48
+#define SRE_OP_NOT_CHARSET_IGNORE_REV 49
+#define SRE_OP_NOT_CHARSET_REV 50
+#define SRE_OP_NOT_LITERAL 51
+#define SRE_OP_NOT_LITERAL_IGNORE 52
+#define SRE_OP_NOT_LITERAL_IGNORE_REV 53
+#define SRE_OP_NOT_LITERAL_REV 54
+#define SRE_OP_NOT_RANGE 55
+#define SRE_OP_NOT_RANGE_IGNORE 56
+#define SRE_OP_NOT_RANGE_IGNORE_REV 57
+#define SRE_OP_NOT_RANGE_REV 58
+#define SRE_OP_NOT_SET 59
+#define SRE_OP_NOT_SET_IGNORE 60
+#define SRE_OP_NOT_SET_IGNORE_REV 61
+#define SRE_OP_NOT_SET_REV 62
+#define SRE_OP_RANGE 63
+#define SRE_OP_RANGE_IGNORE 64
+#define SRE_OP_RANGE_IGNORE_REV 65
+#define SRE_OP_RANGE_REV 66
+#define SRE_OP_REPEAT_MAX 67
+#define SRE_OP_REPEAT_MAX_REV 68
+#define SRE_OP_REPEAT_MIN 69
+#define SRE_OP_REPEAT_MIN_REV 70
+#define SRE_OP_REPEAT_ONE_MAX 71
+#define SRE_OP_REPEAT_ONE_MAX_REV 72
+#define SRE_OP_REPEAT_ONE_MIN 73
+#define SRE_OP_REPEAT_ONE_MIN_REV 74
+#define SRE_OP_REPEAT_ONE_POSS 75
+#define SRE_OP_REPEAT_ONE_POSS_REV 76
+#define SRE_OP_REPEAT_POSS 77
+#define SRE_OP_REPEAT_POSS_REV 78
+#define SRE_OP_SET 79
+#define SRE_OP_SET_IGNORE 80
+#define SRE_OP_SET_IGNORE_REV 81
+#define SRE_OP_SET_REV 82
+#define SRE_OP_START_OF_LINE 83
+#define SRE_OP_START_OF_SEARCH 84
+#define SRE_OP_START_OF_STRING 85
+#define SRE_OP_SUBPATTERN 86
+#define SRE_MAX_OP 86
+
+#define SRE_FLAG_TEMPLATE 0x1
+#define SRE_FLAG_IGNORECASE 0x2
+#define SRE_FLAG_LOCALE 0x4
+#define SRE_FLAG_MULTILINE 0x8
+#define SRE_FLAG_DOTALL 0x10
+#define SRE_FLAG_UNICODE 0x20
+#define SRE_FLAG_VERBOSE 0x40
+#define SRE_FLAG_REVERSE 0x100
+#define SRE_FLAG_ZEROWIDTH 0x200
+
+#define SRE_INFO_PREFIX 0x1
+#define SRE_INFO_LITERAL 0x2
+#define SRE_INFO_CHARSET 0x4
+
+#define SRE_UNI_CAT_Lu 0x1
+#define SRE_UNI_CAT_Ll 0x2
+#define SRE_UNI_CAT_Lt 0x3
+#define SRE_UNI_CAT_Mn 0x4
+#define SRE_UNI_CAT_Mc 0x5
+#define SRE_UNI_CAT_Me 0x6
+#define SRE_UNI_CAT_Nd 0x7
+#define SRE_UNI_CAT_Nl 0x8
+#define SRE_UNI_CAT_No 0x9
+#define SRE_UNI_CAT_Zs 0xA
+#define SRE_UNI_CAT_Zl 0xB
+#define SRE_UNI_CAT_Zp 0xC
+#define SRE_UNI_CAT_Cc 0xD
+#define SRE_UNI_CAT_Cf 0xE
+#define SRE_UNI_CAT_Cs 0xF
+#define SRE_UNI_CAT_Co 0x10
+#define SRE_UNI_CAT_Lm 0x12
+#define SRE_UNI_CAT_Lo 0x13
+#define SRE_UNI_CAT_Pc 0x14
+#define SRE_UNI_CAT_Pd 0x15
+#define SRE_UNI_CAT_Ps 0x16
+#define SRE_UNI_CAT_Pe 0x17
+#define SRE_UNI_CAT_Pi 0x18
+#define SRE_UNI_CAT_Pf 0x19
+#define SRE_UNI_CAT_Po 0x1A
+#define SRE_UNI_CAT_Sm 0x1B
+#define SRE_UNI_CAT_Sc 0x1C
+#define SRE_UNI_CAT_Sk 0x1D
+#define SRE_UNI_CAT_So 0x1E
+#define SRE_UNI_CAT_L 0x20
+#define SRE_UNI_CAT_M 0x21
+#define SRE_UNI_CAT_N 0x22
+#define SRE_UNI_CAT_Z 0x23
+#define SRE_UNI_CAT_C 0x24
+#define SRE_UNI_CAT_P 0x25
+#define SRE_UNI_CAT_S 0x26
+
+#define SRE_CAT_Alpha 0x27
+#define SRE_CAT_Alnum 0x28
+#define SRE_CAT_ASCII 0x29
+#define SRE_CAT_Blank 0x2A
+#define SRE_CAT_Cntrl 0x2B
+#define SRE_CAT_Digit 0x2C
+#define SRE_CAT_Graph 0x2D
+#define SRE_CAT_LineBreak 0x2E
+#define SRE_CAT_Lower 0x2F
+#define SRE_CAT_Print 0x30
+#define SRE_CAT_Punct 0x31
+#define SRE_CAT_Space 0x32
+#define SRE_CAT_Upper 0x33
+#define SRE_CAT_Word 0x34
+#define SRE_CAT_XDigit 0x35
+
+#define SRE_CAT_MASK_C 0x0001E000
+#define SRE_CAT_MASK_L 0x000C000E
+#define SRE_CAT_MASK_M 0x00000070
+#define SRE_CAT_MASK_N 0x00000380
+#define SRE_CAT_MASK_P 0x07F00000
+#define SRE_CAT_MASK_S 0x78000000
+#define SRE_CAT_MASK_Z 0x00001C00
+
+#define SRE_CAT_MASK_Alnum 0x000C008E
+#define SRE_CAT_MASK_Alpha 0x000C000E
+#define SRE_CAT_MASK_Graph 0x7FFC03FE
+#define SRE_CAT_MASK_Print 0x7FFC1FFE
+#define SRE_CAT_MASK_Punct 0x7FF00000
+#define SRE_CAT_MASK_Word 0x001C03FE
+
+// info for operator validation
+typedef struct SRE_OpInfo {
+    char* name;
+    int type;
+    int direction;
+    int end_marker;
+} SRE_OpInfo;
+
+#define SRE_TYPE_INVALID 0
+#define SRE_TYPE_ASSERT 1
+#define SRE_TYPE_ATOMIC 2
+#define SRE_TYPE_BRANCH 3
+#define SRE_TYPE_CATEGORY 4
+#define SRE_TYPE_CHARSET 5
+#define SRE_TYPE_GROUPREF 6
+#define SRE_TYPE_GROUPREF_EXISTS 7
+#define SRE_TYPE_LITERAL 8
+#define SRE_TYPE_LITERAL_STRING 9
+#define SRE_TYPE_MARK 10
+#define SRE_TYPE_POSITION 11
+#define SRE_TYPE_RANGE 12
+#define SRE_TYPE_REPEAT 13
+#define SRE_TYPE_REPEAT_ONE 14
+#define SRE_TYPE_SET 15
+#define SRE_TYPE_SIMPLE_CATEGORY 16
+
+static SRE_OpInfo sre_op_info[] = {
+    {"FAILURE", 0, 0, 0},
+    {"SUCCESS", 0, 0, 0},
+    {"ANY", 16, 1, 0},
+    {"ANY_ALL", 16, 1, 0},
+    {"ANY_ALL_REV", 16, -1, 0},
+    {"ANY_REV", 16, -1, 0},
+    {"ASSERT", 1, 0, SRE_OP_END_ASSERT},
+    {"ASSERT_NOT", 1, 0, SRE_OP_END_ASSERT_NOT},
+    {"ATOMIC", 2, 0, SRE_OP_END_ATOMIC},
+    {"BOUNDARY", 11, 0, 0},
+    {"BRANCH", 3, 0, 0},
+    {"CATEGORY", 4, 1, 0},
+    {"CATEGORY_REV", 4, -1, 0},
+    {"CHARSET", 5, 1, 0},
+    {"CHARSET_IGNORE", 5, 1, 0},
+    {"CHARSET_IGNORE_REV", 5, -1, 0},
+    {"CHARSET_REV", 5, -1, 0},
+    {"END_ASSERT", 0, 0, 0},
+    {"END_ASSERT_NOT", 0, 0, 0},
+    {"END_ATOMIC", 0, 0, 0},
+    {"END_OF_LINE", 11, 0, 0},
+    {"END_OF_STRING", 11, 0, 0},
+    {"END_OF_STRING_LN", 11, 0, 0},
+    {"END_REPEAT_MAX", 0, 1, 0},
+    {"END_REPEAT_MAX_REV", 0, -1, 0},
+    {"END_REPEAT_MIN", 0, 1, 0},
+    {"END_REPEAT_MIN_REV", 0, -1, 0},
+    {"END_REPEAT_POSS", 0, 1, 0},
+    {"END_REPEAT_POSS_REV", 0, -1, 0},
+    {"GROUPREF", 6, 1, 0},
+    {"GROUPREF_EXISTS", 7, 0, 0},
+    {"GROUPREF_IGNORE", 6, 1, 0},
+    {"GROUPREF_IGNORE_REV", 6, -1, 0},
+    {"GROUPREF_REV", 6, -1, 0},
+    {"JUMP", 0, 0, 0},
+    {"LITERAL", 8, 1, 0},
+    {"LITERAL_IGNORE", 8, 1, 0},
+    {"LITERAL_IGNORE_REV", 8, -1, 0},
+    {"LITERAL_REV", 8, -1, 0},
+    {"LITERAL_STRING", 9, 1, 0},
+    {"LITERAL_STRING_IGNORE", 9, 1, 0},
+    {"LITERAL_STRING_IGNORE_REV", 9, -1, 0},
+    {"LITERAL_STRING_REV", 9, -1, 0},
+    {"MARK", 10, 0, 0},
+    {"NOT_BOUNDARY", 11, 0, 0},
+    {"NOT_CATEGORY", 4, 1, 0},
+    {"NOT_CATEGORY_REV", 4, -1, 0},
+    {"NOT_CHARSET", 5, 1, 0},
+    {"NOT_CHARSET_IGNORE", 5, 1, 0},
+    {"NOT_CHARSET_IGNORE_REV", 5, -1, 0},
+    {"NOT_CHARSET_REV", 5, -1, 0},
+    {"NOT_LITERAL", 8, 1, 0},
+    {"NOT_LITERAL_IGNORE", 8, 1, 0},
+    {"NOT_LITERAL_IGNORE_REV", 8, -1, 0},
+    {"NOT_LITERAL_REV", 8, -1, 0},
+    {"NOT_RANGE", 12, 1, 0},
+    {"NOT_RANGE_IGNORE", 12, 1, 0},
+    {"NOT_RANGE_IGNORE_REV", 12, -1, 0},
+    {"NOT_RANGE_REV", 12, -1, 0},
+    {"NOT_SET", 15, 1, 0},
+    {"NOT_SET_IGNORE", 15, 1, 0},
+    {"NOT_SET_IGNORE_REV", 15, -1, 0},
+    {"NOT_SET_REV", 15, -1, 0},
+    {"RANGE", 12, 1, 0},
+    {"RANGE_IGNORE", 12, 1, 0},
+    {"RANGE_IGNORE_REV", 12, -1, 0},
+    {"RANGE_REV", 12, -1, 0},
+    {"REPEAT_MAX", 13, 1, SRE_OP_END_REPEAT_MAX},
+    {"REPEAT_MAX_REV", 13, -1, SRE_OP_END_REPEAT_MAX},
+    {"REPEAT_MIN", 13, 1, SRE_OP_END_REPEAT_MIN},
+    {"REPEAT_MIN_REV", 13, -1, SRE_OP_END_REPEAT_MIN},
+    {"REPEAT_ONE_MAX", 14, 1, 0},
+    {"REPEAT_ONE_MAX_REV", 14, -1, 0},
+    {"REPEAT_ONE_MIN", 14, 1, 0},
+    {"REPEAT_ONE_MIN_REV", 14, -1, 0},
+    {"REPEAT_ONE_POSS", 14, 1, 0},
+    {"REPEAT_ONE_POSS_REV", 14, -1, 0},
+    {"REPEAT_POSS", 13, 1, SRE_OP_END_REPEAT_POSS},
+    {"REPEAT_POSS_REV", 13, -1, SRE_OP_END_REPEAT_POSS},
+    {"SET", 15, 1, 0},
+    {"SET_IGNORE", 15, 1, 0},
+    {"SET_IGNORE_REV", 15, -1, 0},
+    {"SET_REV", 15, -1, 0},
+    {"START_OF_LINE", 11, 0, 0},
+    {"START_OF_SEARCH", 11, 0, 0},
+    {"START_OF_STRING", 11, 0, 0},
+    {"SUBPATTERN", 0, 0, 0},
+};