=== modified file Lib/sre_constants.py
--- Lib/sre_constants.py 2004-08-25 02:22:30 +0000
+++ Lib/sre_constants.py 2009-04-16 14:53:11 +0000
@@ -11,13 +11,16 @@
 
 """Internal support module for sre"""
 
+import operator
+
 # update when constants are added or removed
 
-MAGIC = 20031017
+MAGIC = 20090329
 
 # max code word in this release
 
-MAXREPEAT = 65535
+CODESIZE = 4
+MAXREPEAT = (1 << (CODESIZE * 8)) - 1
 
 # SRE standard exception (access as sre.error)
 # should this really be here?
@@ -25,181 +28,104 @@
 class error(Exception):
     pass
 
-# operators
-
-FAILURE = "failure"
-SUCCESS = "success"
-
-ANY = "any"
-ANY_ALL = "any_all"
-ASSERT = "assert"
-ASSERT_NOT = "assert_not"
-AT = "at"
-BIGCHARSET = "bigcharset"
-BRANCH = "branch"
-CALL = "call"
-CATEGORY = "category"
-CHARSET = "charset"
-GROUPREF = "groupref"
-GROUPREF_IGNORE = "groupref_ignore"
-GROUPREF_EXISTS = "groupref_exists"
-IN = "in"
-IN_IGNORE = "in_ignore"
-INFO = "info"
-JUMP = "jump"
-LITERAL = "literal"
-LITERAL_IGNORE = "literal_ignore"
-MARK = "mark"
-MAX_REPEAT = "max_repeat"
-MAX_UNTIL = "max_until"
-MIN_REPEAT = "min_repeat"
-MIN_UNTIL = "min_until"
-NEGATE = "negate"
-NOT_LITERAL = "not_literal"
-NOT_LITERAL_IGNORE = "not_literal_ignore"
-RANGE = "range"
-REPEAT = "repeat"
-REPEAT_ONE = "repeat_one"
-SUBPATTERN = "subpattern"
-MIN_REPEAT_ONE = "min_repeat_one"
-
-# positions
-AT_BEGINNING = "at_beginning"
-AT_BEGINNING_LINE = "at_beginning_line"
-AT_BEGINNING_STRING = "at_beginning_string"
-AT_BOUNDARY = "at_boundary"
-AT_NON_BOUNDARY = "at_non_boundary"
-AT_END = "at_end"
-AT_END_LINE = "at_end_line"
-AT_END_STRING = "at_end_string"
-AT_LOC_BOUNDARY = "at_loc_boundary"
-AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
-AT_UNI_BOUNDARY = "at_uni_boundary"
-AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
-
-# categories
-CATEGORY_DIGIT = "category_digit"
-CATEGORY_NOT_DIGIT = "category_not_digit"
-CATEGORY_SPACE = "category_space"
-CATEGORY_NOT_SPACE = "category_not_space"
-CATEGORY_WORD = "category_word"
-CATEGORY_NOT_WORD = "category_not_word"
-CATEGORY_LINEBREAK = "category_linebreak"
-CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
-CATEGORY_LOC_WORD = "category_loc_word"
-CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
-CATEGORY_UNI_DIGIT = "category_uni_digit"
-CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
-CATEGORY_UNI_SPACE = "category_uni_space"
-CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
-CATEGORY_UNI_WORD = "category_uni_word"
-CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
-CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
-CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
-
-OPCODES = [
-
-    # failure=0 success=1 (just because it looks better that way :-)
-    FAILURE, SUCCESS,
-
-    ANY, ANY_ALL,
-    ASSERT, ASSERT_NOT,
-    AT,
-    BRANCH,
-    CALL,
-    CATEGORY,
-    CHARSET, BIGCHARSET,
-    GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
-    IN, IN_IGNORE,
-    INFO,
-    JUMP,
-    LITERAL, LITERAL_IGNORE,
-    MARK,
-    MAX_UNTIL,
-    MIN_UNTIL,
-    NOT_LITERAL, NOT_LITERAL_IGNORE,
-    NEGATE,
-    RANGE,
-    REPEAT,
-    REPEAT_ONE,
-    SUBPATTERN,
-    MIN_REPEAT_ONE
-
-]
-
-ATCODES = [
-    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
-    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
-    AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
-    AT_UNI_NON_BOUNDARY
-]
-
-CHCODES = [
-    CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
-    CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
-    CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
-    CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
-    CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
-    CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
-    CATEGORY_UNI_NOT_LINEBREAK
-]
-
-def makedict(list):
-    d = {}
-    i = 0
-    for item in list:
-        d[item] = i
-        i = i + 1
-    return d
-
-OPCODES = makedict(OPCODES)
-ATCODES = makedict(ATCODES)
-CHCODES = makedict(CHCODES)
-
-# replacement operations for "ignore case" mode
-OP_IGNORE = {
-    GROUPREF: GROUPREF_IGNORE,
-    IN: IN_IGNORE,
-    LITERAL: LITERAL_IGNORE,
-    NOT_LITERAL: NOT_LITERAL_IGNORE
-}
-
-AT_MULTILINE = {
-    AT_BEGINNING: AT_BEGINNING_LINE,
-    AT_END: AT_END_LINE
-}
-
-AT_LOCALE = {
-    AT_BOUNDARY: AT_LOC_BOUNDARY,
-    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
-}
-
-AT_UNICODE = {
-    AT_BOUNDARY: AT_UNI_BOUNDARY,
-    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
-}
-
-CH_LOCALE = {
-    CATEGORY_DIGIT: CATEGORY_DIGIT,
-    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
-    CATEGORY_SPACE: CATEGORY_SPACE,
-    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
-    CATEGORY_WORD: CATEGORY_LOC_WORD,
-    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
-    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
-    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
-}
-
-CH_UNICODE = {
-    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
-    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
-    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
-    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
-    CATEGORY_WORD: CATEGORY_UNI_WORD,
-    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
-    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
-    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
-}
+# list of operators
+# the fields are op_code, op_type, terminator
+operators = """
+FAILURE - -
+SUCCESS - -
+
+ANY SIMPLE -
+ANY_ALL SIMPLE -
+ASSERT ASSERT END_ASSERT
+ASSERT_NOT ASSERT END_ASSERT_NOT
+BEGINNING_LINE SIMPLE -
+BEGINNING_STRING SIMPLE -
+BOUNDARY SIMPLE -
+BRANCH BRANCH -
+CATEGORY CATEGORY -
+CHARSET CHARSET -
+CHARSET_IGNORE CHARSET -
+END_LINE SIMPLE -
+END_STRING SIMPLE -
+END_STRING_LINE SIMPLE -
+GROUPREF GROUPREF -
+GROUPREF_EXISTS GROUPREF_EXISTS -
+GROUPREF_IGNORE GROUPREF -
+JUMP - -
+LITERAL LITERAL -
+LITERAL_IGNORE LITERAL -
+MARK MARK -
+MAX_REPEAT REPEAT END_MAX_REPEAT
+MAX_REPEAT_ONE REPEAT_ONE -
+MIN_REPEAT REPEAT END_MIN_REPEAT
+MIN_REPEAT_ONE REPEAT_ONE -
+NOT_BOUNDARY SIMPLE -
+NOT_CATEGORY CATEGORY -
+NOT_CHARSET CHARSET -
+NOT_CHARSET_IGNORE CHARSET -
+NOT_LITERAL LITERAL -
+NOT_LITERAL_IGNORE LITERAL -
+NOT_RANGE RANGE -
+NOT_RANGE_IGNORE RANGE -
+NOT_SET SET -
+NOT_SET_IGNORE SET -
+RANGE RANGE -
+RANGE_IGNORE RANGE -
+SET SET -
+SET_IGNORE SET -
+SUBPATTERN
+"""
+
+# namespace for operators
+class OP(object):
+    pass
+
+# build list of operators
+op_list = []
+
+for op in operators.splitlines():
+    if op:
+        parts = op.split()
+        
+        # add to OP namespace
+        setattr(OP, parts[0], parts[0])
+        
+        if len(parts) == 3:
+            # it's an operator for the regex engine
+            name, op_type, terminator = parts
+            op_list.append((name, op_type, terminator))
+            if terminator != "-":
+                op_list.append((terminator, "-", "-"))
+
+op_list = op_list[ : 2] + sorted(op_list[2 : ])
+
+# build dict of opcode values
+OPCODES = dict((entry[0], value) for value, entry in enumerate(op_list))
+
+for op, value in OPCODES.items():
+    setattr(OP, op, op)
+    OPCODES[op] = value
+
+# dict for obtaining 'ignore' operator
+OP_IGNORE = {}
+for op in OPCODES:
+    if op.endswith("_IGNORE"):
+        OP_IGNORE[op[ : -7]] = op
+    else:
+        OP_IGNORE.setdefault(op, op)
+
+# dict for obtaining 'not' operator
+OP_NOT = {}
+for op in OPCODES:
+    if op.startswith("NOT_"):
+        OP_NOT[op[4 : ]] = op
+    else:
+        OP_NOT.setdefault(op, op)
+
+# categories of character
+CATEGORIES = "Alpha Alnum Digit LineBreak Space Word"
+CATEGORIES = dict((name.lower(), value) for value, name in
+  enumerate(CATEGORIES.split()))
 
 # flags
 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
@@ -211,18 +137,9 @@
 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
 SRE_FLAG_DEBUG = 128 # debugging
 
-# flags for INFO primitive
-SRE_INFO_PREFIX = 1 # has prefix
-SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
-SRE_INFO_CHARSET = 4 # pattern starts with character from given set
-
 if __name__ == "__main__":
-    def dump(f, d, prefix):
-        items = d.items()
-        items.sort(key=lambda a: a[1])
-        for k, v in items:
-            f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
-    f = open("sre_constants.h", "w")
+    # create C header file
+    f = open("sre_constants.h", "wb")
     f.write("""\
 /*
  * Secret Labs' Regular Expression Engine
@@ -240,11 +157,35 @@
 """)
 
     f.write("#define SRE_MAGIC %d\n" % MAGIC)
-
-    dump(f, OPCODES, "SRE_OP")
-    dump(f, ATCODES, "SRE")
-    dump(f, CHCODES, "SRE")
-
+    f.write("\n")
+
+    f.write("""\
+/* size of a code word (must be unsigned short or larger, and
+   large enough to hold a Py_UNICODE character) */
+
+""")
+    if CODESIZE == 4:
+       f.write("#define SRE_CODE Py_UCS4\n")
+    else:
+        f.write("#define SRE_CODE unsigned short")
+    f.write("#define SRE_MAXREPEAT 0x%X\n" % MAXREPEAT)
+
+    # make #defines for operators
+    f.write("\n")
+    for name, op_type, terminator in op_list:
+        f.write("#define SRE_OP_%s %d\n" % (name, OPCODES[name]))
+    f.write("#define SRE_MAXOP %d\n" % max(OPCODES.values()))
+
+    # make #defines for categories
+    f.write("\n")
+    cat_list = CATEGORIES.items()
+    cat_list.sort(key=operator.itemgetter(1))
+    for name, value in cat_list:
+        f.write("#define SRE_CAT_%s %d\n" % (name.upper(), value))
+    f.write("#define SRE_MAXCAT %d\n" % cat_list[-1][1])
+
+    # make #defines for flags
+    f.write("\n")
     f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
     f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
     f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
@@ -253,9 +194,28 @@
     f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
     f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
 
-    f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
-    f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
-    f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
+    # make #defines for op_types
+    op_type_list = sorted(set(entry[1] for entry in op_list))
+    op_type_list = [(name, value) for value, name in enumerate(op_type_list)]
+    f.write("\n")
+    for name, value in op_type_list:
+        if name != "-":
+            f.write("#define SRE_TYPE_%s %d\n" % (name, value))
+
+    # make opcode info table
+    optypes = dict(op_type_list)
+
+    f.write("\n")
+    f.write("typedef struct {\n")
+    f.write("    int type;\n")
+    f.write("    SRE_CODE terminator;\n")
+    f.write("} SRE_OP_INFO;\n")
+    f.write("\n")
+    f.write("SRE_OP_INFO sre_op_info[] = {\n")
+    for name, op_type, terminator in op_list:
+        f.write("    {%d, %d}, /* %s */\n" % (optypes[op_type],
+          OPCODES.get(terminator, 0), name))
+    f.write("};\n")
 
     f.close()
     print "done"
=== modified file Lib/sre_compile.py
--- Lib/sre_compile.py 2008-10-14 22:37:18 +0000
+++ Lib/sre_compile.py 2009-03-25 23:45:47 +0000
@@ -15,96 +15,29 @@
 from sre_constants import *
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
-
-if _sre.CODESIZE == 2:
-    MAXCODE = 65535
-else:
-    MAXCODE = 0xFFFFFFFFL
-
-def _identityfunction(x):
-    return x
-
-_LITERAL_CODES = set([LITERAL, NOT_LITERAL])
-_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
-_SUCCESS_CODES = set([SUCCESS, FAILURE])
-_ASSERT_CODES = set([ASSERT, ASSERT_NOT])
+assert _sre.CODESIZE == CODESIZE, "SRE module mismatch"
+
+_LITERAL_CODES = set([OP.LITERAL, OP.NOT_LITERAL])
+_REPEATING_CODES = set([OP.MIN_REPEAT, OP.MAX_REPEAT])
+_ASSERT_CODES = set([OP.ASSERT, OP.ASSERT_NOT])
+_SET_CODES = set([OP.SET, OP.NOT_SET])
+_POSITION_CODES = set([OP.BEGINNING_STRING, OP.BOUNDARY, OP.END_STRING,
+  OP.NOT_BOUNDARY])
+_CATEGORY_CODES = set([OP.CATEGORY, OP.NOT_CATEGORY])
+_CHARSET_CODES = set([OP.CHARSET, OP.NOT_CHARSET])
+_RANGE_CODES = set([OP.NOT_RANGE, OP.RANGE])
 
 def _compile(code, pattern, flags):
     # internal: compile a (sub)pattern
     emit = code.append
     _len = len
-    LITERAL_CODES = _LITERAL_CODES
-    REPEATING_CODES = _REPEATING_CODES
-    SUCCESS_CODES = _SUCCESS_CODES
-    ASSERT_CODES = _ASSERT_CODES
     for op, av in pattern:
-        if op in LITERAL_CODES:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-                emit(_sre.getlower(av, flags))
-            else:
-                emit(OPCODES[op])
-                emit(av)
-        elif op is IN:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-                def fixup(literal, flags=flags):
-                    return _sre.getlower(literal, flags)
-            else:
-                emit(OPCODES[op])
-                fixup = _identityfunction
-            skip = _len(code); emit(0)
-            _compile_charset(av, flags, code, fixup)
-            code[skip] = _len(code) - skip
-        elif op is ANY:
+        if op == OP.ANY:
             if flags & SRE_FLAG_DOTALL:
-                emit(OPCODES[ANY_ALL])
-            else:
-                emit(OPCODES[ANY])
-        elif op in REPEATING_CODES:
-            if flags & SRE_FLAG_TEMPLATE:
-                raise error, "internal: unsupported template operator"
-                emit(OPCODES[REPEAT])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                emit(OPCODES[SUCCESS])
-                code[skip] = _len(code) - skip
-            elif _simple(av) and op is not REPEAT:
-                if op is MAX_REPEAT:
-                    emit(OPCODES[REPEAT_ONE])
-                else:
-                    emit(OPCODES[MIN_REPEAT_ONE])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                emit(OPCODES[SUCCESS])
-                code[skip] = _len(code) - skip
-            else:
-                emit(OPCODES[REPEAT])
-                skip = _len(code); emit(0)
-                emit(av[0])
-                emit(av[1])
-                _compile(code, av[2], flags)
-                code[skip] = _len(code) - skip
-                if op is MAX_REPEAT:
-                    emit(OPCODES[MAX_UNTIL])
-                else:
-                    emit(OPCODES[MIN_UNTIL])
-        elif op is SUBPATTERN:
-            if av[0]:
-                emit(OPCODES[MARK])
-                emit((av[0]-1)*2)
-            # _compile_info(code, av[1], flags)
-            _compile(code, av[1], flags)
-            if av[0]:
-                emit(OPCODES[MARK])
-                emit((av[0]-1)*2+1)
-        elif op in SUCCESS_CODES:
-            emit(OPCODES[op])
-        elif op in ASSERT_CODES:
+                emit(OPCODES[OP.ANY_ALL])
+            else:
+                emit(OPCODES[OP.ANY])
+        elif op in _ASSERT_CODES:
             emit(OPCODES[op])
             skip = _len(code); emit(0)
             if av[0] >= 0:
@@ -115,24 +48,17 @@
                     raise error, "look-behind requires fixed-width pattern"
                 emit(lo) # look behind
             _compile(code, av[1], flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = _len(code) - skip
-        elif op is CALL:
-            emit(OPCODES[op])
-            skip = _len(code); emit(0)
-            _compile(code, av, flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = _len(code) - skip
-        elif op is AT:
-            emit(OPCODES[op])
+            code[skip] = _len(code) - (skip - 1)
+            if op == OP.ASSERT:
+                emit(OPCODES[OP.END_ASSERT])
+            else:
+                emit(OPCODES[OP.END_ASSERT_NOT])
+        elif op == OP.BEGINNING_LINE:
             if flags & SRE_FLAG_MULTILINE:
-                av = AT_MULTILINE.get(av, av)
-            if flags & SRE_FLAG_LOCALE:
-                av = AT_LOCALE.get(av, av)
-            elif flags & SRE_FLAG_UNICODE:
-                av = AT_UNICODE.get(av, av)
-            emit(ATCODES[av])
-        elif op is BRANCH:
+                emit(OPCODES[op])
+            else:
+                emit(OPCODES[OP.BEGINNING_STRING])
+        elif op == OP.BRANCH:
             emit(OPCODES[op])
             tail = []
             tailappend = tail.append
@@ -140,146 +66,143 @@
                 skip = _len(code); emit(0)
                 # _compile_info(code, av, flags)
                 _compile(code, av, flags)
-                emit(OPCODES[JUMP])
+                emit(OPCODES[OP.JUMP])
                 tailappend(_len(code)); emit(0)
                 code[skip] = _len(code) - skip
             emit(0) # end of branch
-            for tail in tail:
-                code[tail] = _len(code) - tail
-        elif op is CATEGORY:
-            emit(OPCODES[op])
-            if flags & SRE_FLAG_LOCALE:
-                av = CH_LOCALE[av]
-            elif flags & SRE_FLAG_UNICODE:
-                av = CH_UNICODE[av]
-            emit(CHCODES[av])
-        elif op is GROUPREF:
+            for skip in tail:
+                code[skip] = _len(code) - (skip - 1)
+        elif op in _CATEGORY_CODES:
+            emit(OPCODES[op])
+            emit(av)
+        elif op in _CHARSET_CODES:
+            skip = _len(code); emit(0)
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            _compile_charset(av, code)
+            code[skip] = _len(code) - (skip - 1)
+        elif op == OP.END_LINE:
+            if flags & SRE_FLAG_MULTILINE:
+                emit(OPCODES[op])
+            else:
+                emit(OPCODES[OP.END_STRING_LINE])
+        elif op == OP.GROUPREF:
             if flags & SRE_FLAG_IGNORECASE:
                 emit(OPCODES[OP_IGNORE[op]])
             else:
                 emit(OPCODES[op])
             emit(av-1)
-        elif op is GROUPREF_EXISTS:
+        elif op == OP.GROUPREF_EXISTS:
             emit(OPCODES[op])
             emit(av[0]-1)
             skipyes = _len(code); emit(0)
             _compile(code, av[1], flags)
             if av[2]:
-                emit(OPCODES[JUMP])
+                emit(OPCODES[OP.JUMP])
                 skipno = _len(code); emit(0)
-                code[skipyes] = _len(code) - skipyes + 1
-                _compile(code, av[2], flags)
-                code[skipno] = _len(code) - skipno
-            else:
-                code[skipyes] = _len(code) - skipyes + 1
+                code[skipyes] = _len(code) - skipyes + 2
+                _compile(code, av[2], flags)
+                code[skipno] = _len(code) - skipno + 1
+            else:
+                code[skipyes] = _len(code) - skipyes + 2
+        elif op in _LITERAL_CODES:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            emit(av)
+        elif op in _POSITION_CODES:
+            emit(OPCODES[op])
+        elif op in _REPEATING_CODES:
+            if flags & SRE_FLAG_TEMPLATE:
+                raise error, "internal: unsupported template operator"
+                emit(OPCODES[OP.REPEAT])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                emit(OPCODES[OP.SUCCESS])
+                code[skip] = _len(code) - skip
+            elif _simple(av):
+                if op == OP.MAX_REPEAT:
+                    emit(OPCODES[OP.MAX_REPEAT_ONE])
+                else:
+                    emit(OPCODES[OP.MIN_REPEAT_ONE])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                code[skip] = _len(code) - (skip - 1)
+            else:
+                emit(OPCODES[op])
+                skip = _len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                offset = _len(code) - (skip - 1)
+                code[skip] = offset
+                if op == OP.MAX_REPEAT:
+                    emit(OPCODES[OP.END_MAX_REPEAT])
+                else:
+                    emit(OPCODES[OP.END_MIN_REPEAT])
+                emit(offset)
+        elif op in _RANGE_CODES:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            emit(av[0])
+            emit(av[1])
+        elif op in _SET_CODES:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            skip = _len(code); emit(0)
+            _compile_set(av, code)
+            code[skip] = _len(code) - (skip - 1)
+        elif op == OP.SUBPATTERN:
+            if av[0]:
+                emit(OPCODES[OP.MARK])
+                emit((av[0]-1)*2)
+            # _compile_info(code, av[1], flags)
+            _compile(code, av[1], flags)
+            if av[0]:
+                emit(OPCODES[OP.MARK])
+                emit((av[0]-1)*2+1)
+        elif op == OP.SUCCESS:
+            emit(OPCODES[op])
         else:
             raise ValueError, ("unsupported operand type", op)
 
-def _compile_charset(charset, flags, code, fixup=None):
+def _compile_set(set, code):
     # compile charset subprogram
     emit = code.append
-    if fixup is None:
-        fixup = _identityfunction
-    for op, av in _optimize_charset(charset, fixup):
+    for op, av in set:
         emit(OPCODES[op])
-        if op is NEGATE:
-            pass
-        elif op is LITERAL:
-            emit(fixup(av))
-        elif op is RANGE:
-            emit(fixup(av[0]))
-            emit(fixup(av[1]))
-        elif op is CHARSET:
-            code.extend(av)
-        elif op is BIGCHARSET:
-            code.extend(av)
-        elif op is CATEGORY:
-            if flags & SRE_FLAG_LOCALE:
-                emit(CHCODES[CH_LOCALE[av]])
-            elif flags & SRE_FLAG_UNICODE:
-                emit(CHCODES[CH_UNICODE[av]])
-            else:
-                emit(CHCODES[av])
+        if op in (OP.CATEGORY, OP.NOT_CATEGORY):
+            emit(av)
+        elif op == OP.CHARSET:
+            skip = _len(code); emit(0)
+            _compile_charset(av)
+            code[skip] = _len(code) - (skip - 1)
+        elif op == OP.LITERAL:
+            emit(av)
+        elif op == OP.RANGE:
+            emit(av[0])
+            emit(av[1])
         else:
             raise error, "internal: unsupported set operator"
-    emit(OPCODES[FAILURE])
-
-def _optimize_charset(charset, fixup):
-    # internal: optimize character set
-    out = []
-    outappend = out.append
-    charmap = [0]*256
-    try:
-        for op, av in charset:
-            if op is NEGATE:
-                outappend((op, av))
-            elif op is LITERAL:
-                charmap[fixup(av)] = 1
-            elif op is RANGE:
-                for i in range(fixup(av[0]), fixup(av[1])+1):
-                    charmap[i] = 1
-            elif op is CATEGORY:
-                # XXX: could append to charmap tail
-                return charset # cannot compress
-    except IndexError:
-        # character set contains unicode characters
-        return _optimize_unicode(charset, fixup)
-    # compress character map
-    i = p = n = 0
-    runs = []
-    runsappend = runs.append
-    for c in charmap:
-        if c:
-            if n == 0:
-                p = i
-            n = n + 1
-        elif n:
-            runsappend((p, n))
-            n = 0
-        i = i + 1
-    if n:
-        runsappend((p, n))
-    if len(runs) <= 2:
-        # use literal/range
-        for p, n in runs:
-            if n == 1:
-                outappend((LITERAL, p))
-            else:
-                outappend((RANGE, (p, p+n-1)))
-        if len(out) < len(charset):
-            return out
-    else:
-        # use bitmap
-        data = _mk_bitmap(charmap)
-        outappend((CHARSET, data))
-        return out
-    return charset
-
-def _mk_bitmap(bits):
-    data = []
-    dataappend = data.append
-    if _sre.CODESIZE == 2:
-        start = (1, 0)
-    else:
-        start = (1L, 0L)
-    m, v = start
-    for c in bits:
-        if c:
-            v = v + m
-        m = m + m
-        if m > MAXCODE:
-            dataappend(v)
-            m, v = start
-    return data
-
-# To represent a big charset, first a bitmap of all characters in the
-# set is constructed. Then, this bitmap is sliced into chunks of 256
-# characters, duplicate chunks are eliminated, and each chunk is
-# given a number. In the compiled expression, the charset is
-# represented by a 16-bit word sequence, consisting of one word for
-# the number of different chunks, a sequence of 256 bytes (128 words)
-# of chunk numbers indexed by their original chunk position, and a
-# sequence of chunks (16 words each).
+
+# To represent a charset, first a bitmap of all characters in the set is
+# constructed. Then, this bitmap is sliced into chunks of 256
+# characters, duplicate chunks are eliminated, and each chunk is given a
+# number. In the compiled expression, the charset is represented by a
+# codeword sequence, consisting of one word for the maximum character
+# code, a sequence of codewords of chunk numbers (2 per codeword)
+# indexed by their original chunk position, and a sequence of chunks.
 
 # Compression is normally good: in a typical charset, large ranges of
 # Unicode will be either completely excluded (e.g. if only cyrillic
@@ -287,182 +210,37 @@
 # subranges of Kanji match). These ranges will be represented by
 # chunks of all one-bits or all zero-bits.
 
-# Matching can be also done efficiently: the more significant byte of
-# the Unicode character is an index into the chunk number, and the
-# less significant byte is a bit index in the chunk (just like the
-# CHARSET matching).
-
-# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
-# of the basic multilingual plane; an efficient representation
-# for all of UTF-16 has not yet been developed. This means,
-# in particular, that negated charsets cannot be represented as
-# bigcharsets.
-
-def _optimize_unicode(charset, fixup):
-    try:
-        import array
-    except ImportError:
-        return charset
-    charmap = [0]*65536
-    negate = 0
-    try:
-        for op, av in charset:
-            if op is NEGATE:
-                negate = 1
-            elif op is LITERAL:
-                charmap[fixup(av)] = 1
-            elif op is RANGE:
-                for i in xrange(fixup(av[0]), fixup(av[1])+1):
-                    charmap[i] = 1
-            elif op is CATEGORY:
-                # XXX: could expand category
-                return charset # cannot compress
-    except IndexError:
-        # non-BMP characters
-        return charset
-    if negate:
-        if sys.maxunicode != 65535:
-            # XXX: negation does not work with big charsets
-            return charset
-        for i in xrange(65536):
-            charmap[i] = not charmap[i]
-    comps = {}
-    mapping = [0]*256
-    block = 0
-    data = []
-    for i in xrange(256):
-        chunk = tuple(charmap[i*256:(i+1)*256])
-        new = comps.setdefault(chunk, block)
-        mapping[i] = new
-        if new == block:
-            block = block + 1
-            data = data + _mk_bitmap(chunk)
-    header = [block]
-    if _sre.CODESIZE == 2:
-        code = 'H'
-    else:
-        code = 'I'
-    # Convert block indices to byte array of 256 bytes
-    mapping = array.array('b', mapping).tostring()
-    # Convert byte array to word array
-    mapping = array.array(code, mapping)
-    assert mapping.itemsize == _sre.CODESIZE
-    header = header + mapping.tolist()
-    data[0:0] = header
-    return [(BIGCHARSET, data)]
+# Matching can be also done efficiently: the more significant bytes of
+# the Unicode character is an index into the chunk number, and the less
+# significant byte is a bit index in the chunk.
+
+def _compile_charset(charset, code):
+    emit = code.append
+
+    bits_per_code = CODESIZE * 8
+
+    max_code, indexes, subsets = charset
+
+    # maximum character code
+    emit(max_code)
+
+    # indexes to subsets
+    shift = bits_per_code // 2
+    for lo, hi in zip(indexes[0 : : 2], indexes[1 : : 2] + [0]):
+        code.append(lo | (hi << shift))
+
+    # subsets themselves
+    for s in subset:
+        for i in range(256 // bits_per_code):
+            code.append(s & MAXREPEAT)
+            s >>= bits_per_code
 
 def _simple(av):
     # check if av is a "simple" operator
     lo, hi = av[2].getwidth()
     if lo == 0 and hi == MAXREPEAT:
         raise error, "nothing to repeat"
-    return lo == hi == 1 and av[2][0][0] != SUBPATTERN
-
-def _compile_info(code, pattern, flags):
-    # internal: compile an info block.  in the current version,
-    # this contains min/max pattern width, and an optional literal
-    # prefix or a character map
-    lo, hi = pattern.getwidth()
-    if lo == 0:
-        return # not worth it
-    # look for a literal prefix
-    prefix = []
-    prefixappend = prefix.append
-    prefix_skip = 0
-    charset = [] # not used
-    charsetappend = charset.append
-    if not (flags & SRE_FLAG_IGNORECASE):
-        # look for literal prefix
-        for op, av in pattern.data:
-            if op is LITERAL:
-                if len(prefix) == prefix_skip:
-                    prefix_skip = prefix_skip + 1
-                prefixappend(av)
-            elif op is SUBPATTERN and len(av[1]) == 1:
-                op, av = av[1][0]
-                if op is LITERAL:
-                    prefixappend(av)
-                else:
-                    break
-            else:
-                break
-        # if no prefix, look for charset prefix
-        if not prefix and pattern.data:
-            op, av = pattern.data[0]
-            if op is SUBPATTERN and av[1]:
-                op, av = av[1][0]
-                if op is LITERAL:
-                    charsetappend((op, av))
-                elif op is BRANCH:
-                    c = []
-                    cappend = c.append
-                    for p in av[1]:
-                        if not p:
-                            break
-                        op, av = p[0]
-                        if op is LITERAL:
-                            cappend((op, av))
-                        else:
-                            break
-                    else:
-                        charset = c
-            elif op is BRANCH:
-                c = []
-                cappend = c.append
-                for p in av[1]:
-                    if not p:
-                        break
-                    op, av = p[0]
-                    if op is LITERAL:
-                        cappend((op, av))
-                    else:
-                        break
-                else:
-                    charset = c
-            elif op is IN:
-                charset = av
-##     if prefix:
-##         print "*** PREFIX", prefix, prefix_skip
-##     if charset:
-##         print "*** CHARSET", charset
-    # add an info block
-    emit = code.append
-    emit(OPCODES[INFO])
-    skip = len(code); emit(0)
-    # literal flag
-    mask = 0
-    if prefix:
-        mask = SRE_INFO_PREFIX
-        if len(prefix) == prefix_skip == len(pattern.data):
-            mask = mask + SRE_INFO_LITERAL
-    elif charset:
-        mask = mask + SRE_INFO_CHARSET
-    emit(mask)
-    # pattern length
-    if lo < MAXCODE:
-        emit(lo)
-    else:
-        emit(MAXCODE)
-        prefix = prefix[:MAXCODE]
-    if hi < MAXCODE:
-        emit(hi)
-    else:
-        emit(0)
-    # add literal prefix
-    if prefix:
-        emit(len(prefix)) # length
-        emit(prefix_skip) # skip
-        code.extend(prefix)
-        # generate overlap table
-        table = [-1] + ([0]*len(prefix))
-        for i in xrange(len(prefix)):
-            table[i+1] = table[i]+1
-            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
-                table[i+1] = table[table[i+1]-1]+1
-        code.extend(table[1:]) # don't store first entry
-    elif charset:
-        _compile_charset(charset, flags, code)
-    code[skip] = len(code) - skip
+    return lo == hi == 1 and av[2][0][0] != OP.SUBPATTERN
 
 try:
     unicode
@@ -482,13 +260,10 @@
     flags = p.pattern.flags | flags
     code = []
 
-    # compile info block
-    _compile_info(code, p, flags)
-
     # compile the pattern
     _compile(code, p.data, flags)
 
-    code.append(OPCODES[SUCCESS])
+    code.append(OPCODES[OP.SUCCESS])
 
     return code
 
=== modified file Lib/sre_parse.py
--- Lib/sre_parse.py 2008-10-14 22:37:18 +0000
+++ Lib/sre_parse.py 2009-03-25 17:24:46 +0000
@@ -27,27 +27,30 @@
 WHITESPACE = set(" \t\n\r\v\f")
 
 ESCAPES = {
-    r"\a": (LITERAL, ord("\a")),
-    r"\b": (LITERAL, ord("\b")),
-    r"\f": (LITERAL, ord("\f")),
-    r"\n": (LITERAL, ord("\n")),
-    r"\r": (LITERAL, ord("\r")),
-    r"\t": (LITERAL, ord("\t")),
-    r"\v": (LITERAL, ord("\v")),
-    r"\\": (LITERAL, ord("\\"))
+    r"\a": (OP.LITERAL, ord("\a")),
+    r"\b": (OP.LITERAL, ord("\b")),
+    r"\f": (OP.LITERAL, ord("\f")),
+    r"\n": (OP.LITERAL, ord("\n")),
+    r"\r": (OP.LITERAL, ord("\r")),
+    r"\t": (OP.LITERAL, ord("\t")),
+    r"\v": (OP.LITERAL, ord("\v")),
+    r"\\": (OP.LITERAL, ord("\\"))
 }
 
-CATEGORIES = {
-    r"\A": (AT, AT_BEGINNING_STRING), # start of string
-    r"\b": (AT, AT_BOUNDARY),
-    r"\B": (AT, AT_NON_BOUNDARY),
-    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
-    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
-    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
-    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
-    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
-    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
-    r"\Z": (AT, AT_END_STRING), # end of string
+CLASS_ESCAPES = {
+    r"\d": (OP.CATEGORY, CATEGORIES["digit"]),
+    r"\D": (OP.NOT_CATEGORY, CATEGORIES["digit"]),
+    r"\s": (OP.CATEGORY, CATEGORIES["space"]),
+    r"\S": (OP.NOT_CATEGORY, CATEGORIES["space"]),
+    r"\w": (OP.CATEGORY, CATEGORIES["word"]),
+    r"\W": (OP.NOT_CATEGORY, CATEGORIES["word"]),
+}
+
+POSITION_ESCAPES = {
+    r"\A": (OP.BEGINNING_STRING, None), # start of string
+    r"\b": (OP.BOUNDARY, None),
+    r"\B": (OP.NOT_BOUNDARY, None),
+    r"\Z": (OP.END_STRING, None), # end of string
 }
 
 FLAGS = {
@@ -98,12 +101,12 @@
         seqtypes = type(()), type([])
         for op, av in self.data:
             print level*"  " + op,; nl = 0
-            if op == "in":
+            if op == OP.SET:
                 # member sublanguage
                 print; nl = 1
                 for op, a in av:
                     print (level+1)*"  " + op, a
-            elif op == "branch":
+            elif op == OP.BRANCH:
                 print; nl = 1
                 i = 0
                 for a in av[1]:
@@ -142,10 +145,12 @@
         if self.width:
             return self.width
         lo = hi = 0L
-        UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
-        REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
+        UNITCODES = set([OP.ANY, OP.CATEGORY, OP.CHARSET, OP.LITERAL,
+          OP.NOT_CATEGORY, OP.NOT_CHARSET, OP.NOT_LITERAL, OP.NOT_RANGE,
+          OP.NOT_SET, OP.RANGE, OP.SET])
+        REPEATCODES = set([OP.MIN_REPEAT, OP.MAX_REPEAT])
         for op, av in self.data:
-            if op is BRANCH:
+            if op is OP.BRANCH:
                 i = sys.maxint
                 j = 0
                 for av in av[1]:
@@ -154,11 +159,7 @@
                     j = max(j, h)
                 lo = lo + i
                 hi = hi + j
-            elif op is CALL:
-                i, j = av.getwidth()
-                lo = lo + i
-                hi = hi + j
-            elif op is SUBPATTERN:
+            elif op == OP.SUBPATTERN:
                 i, j = av[1].getwidth()
                 lo = lo + i
                 hi = hi + j
@@ -169,7 +170,7 @@
             elif op in UNITCODES:
                 lo = lo + 1
                 hi = hi + 1
-            elif op == SUCCESS:
+            elif op == OP.SUCCESS:
                 break
         self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
         return self.width
@@ -224,10 +225,7 @@
 
 def _class_escape(source, escape):
     # handle escape code inside character class
-    code = ESCAPES.get(escape)
-    if code:
-        return code
-    code = CATEGORIES.get(escape)
+    code = ESCAPES.get(escape) or CLASS_ESCAPES.get(escape)
     if code:
         return code
     try:
@@ -239,27 +237,25 @@
             escape = escape[2:]
             if len(escape) != 2:
                 raise error, "bogus escape: %s" % repr("\\" + escape)
-            return LITERAL, int(escape, 16) & 0xff
+            return OP.LITERAL, int(escape, 16) & 0xff
         elif c in OCTDIGITS:
             # octal escape (up to three digits)
             while source.next in OCTDIGITS and len(escape) < 4:
                 escape = escape + source.get()
             escape = escape[1:]
-            return LITERAL, int(escape, 8) & 0xff
+            return OP.LITERAL, int(escape, 8) & 0xff
         elif c in DIGITS:
             raise error, "bogus escape: %s" % repr(escape)
         if len(escape) == 2:
-            return LITERAL, ord(escape[1])
+            return OP.LITERAL, ord(escape[1])
     except ValueError:
         pass
     raise error, "bogus escape: %s" % repr(escape)
 
 def _escape(source, escape, state):
     # handle escape code in expression
-    code = CATEGORIES.get(escape)
-    if code:
-        return code
-    code = ESCAPES.get(escape)
+    code = (POSITION_ESCAPES.get(escape) or ESCAPES.get(escape) or
+      CLASS_ESCAPES.get(escape))
     if code:
         return code
     try:
@@ -270,12 +266,12 @@
                 escape = escape + source.get()
             if len(escape) != 4:
                 raise ValueError
-            return LITERAL, int(escape[2:], 16) & 0xff
+            return OP.LITERAL, int(escape[2:], 16) & 0xff
         elif c == "0":
             # octal escape
             while source.next in OCTDIGITS and len(escape) < 4:
                 escape = escape + source.get()
-            return LITERAL, int(escape[1:], 8) & 0xff
+            return OP.LITERAL, int(escape[1:], 8) & 0xff
         elif c in DIGITS:
             # octal escape *or* decimal group reference (sigh)
             if source.next in DIGITS:
@@ -284,16 +280,16 @@
                     source.next in OCTDIGITS):
                     # got three octal digits; this is an octal escape
                     escape = escape + source.get()
-                    return LITERAL, int(escape[1:], 8) & 0xff
+                    return OP.LITERAL, int(escape[1:], 8) & 0xff
             # not an octal escape, so this is a group reference
             group = int(escape[1:])
             if group < state.groups:
                 if not state.checkgroup(group):
                     raise error, "cannot refer to open group"
-                return GROUPREF, group
+                return OP.GROUPREF, group
             raise ValueError
         if len(escape) == 2:
-            return LITERAL, ord(escape[1])
+            return OP.LITERAL, ord(escape[1])
     except ValueError:
         pass
     raise error, "bogus escape: %s" % repr(escape)
@@ -342,7 +338,7 @@
 
     # check if the branch can be replaced by a character set
     for item in items:
-        if len(item) != 1 or item[0][0] != LITERAL:
+        if len(item) != 1 or item[0][0] != OP.LITERAL:
             break
     else:
         # we can store this as a character set instead of a
@@ -351,10 +347,10 @@
         setappend = set.append
         for item in items:
             setappend(item[0])
-        subpatternappend((IN, set))
+        subpatternappend((OP.SET, set))
         return subpattern
 
-    subpattern.append((BRANCH, (None, items)))
+    subpattern.append((OP.BRANCH, (None, items)))
     return subpattern
 
 def _parse_sub_cond(source, state, condgroup):
@@ -368,13 +364,15 @@
     if source.next and not source.match(")", 0):
         raise error, "pattern not properly closed"
     subpattern = SubPattern(state)
-    subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
+    subpattern.append((OP.GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
     return subpattern
 
 _PATTERNENDERS = set("|)")
 _ASSERTCHARS = set("=!<")
 _LOOKBEHINDASSERTCHARS = set("=!")
-_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
+_REPEATCODES = set([OP.MIN_REPEAT, OP.MAX_REPEAT])
+_POSITIONCODES = set([OP.BEGINNING_LINE, OP.BEGINNING_STRING, OP.BOUNDARY,
+  OP.END_LINE, OP.END_STRING, OP.END_STRING_LINE, OP.NOT_BOUNDARY])
 
 def _parse(source, state):
     # parse a simple pattern
@@ -410,7 +408,7 @@
                 continue
 
         if this and this[0] not in SPECIAL_CHARS:
-            subpatternappend((LITERAL, ord(this)))
+            subpatternappend((OP.LITERAL, ord(this)))
 
         elif this == "[":
             # character set
@@ -418,56 +416,55 @@
             setappend = set.append
 ##          if sourcematch(":"):
 ##              pass # handle character classes
-            if sourcematch("^"):
-                setappend((NEGATE, None))
+            negate = sourcematch("^")
             # check remaining characters
-            start = set[:]
             while 1:
                 this = sourceget()
-                if this == "]" and set != start:
+                if this == "]" and set:
                     break
                 elif this and this[0] == "\\":
                     code1 = _class_escape(source, this)
                 elif this:
-                    code1 = LITERAL, ord(this)
+                    code1 = OP.LITERAL, ord(this)
                 else:
                     raise error, "unexpected end of regular expression"
                 if sourcematch("-"):
                     # potential range
                     this = sourceget()
                     if this == "]":
-                        if code1[0] is IN:
-                            code1 = code1[1][0]
                         setappend(code1)
-                        setappend((LITERAL, ord("-")))
+                        setappend((OP.LITERAL, ord("-")))
                         break
                     elif this:
                         if this[0] == "\\":
                             code2 = _class_escape(source, this)
                         else:
-                            code2 = LITERAL, ord(this)
-                        if code1[0] != LITERAL or code2[0] != LITERAL:
+                            code2 = OP.LITERAL, ord(this)
+                        if code1[0] != OP.LITERAL or code2[0] != OP.LITERAL:
                             raise error, "bad character range"
                         lo = code1[1]
                         hi = code2[1]
                         if hi < lo:
                             raise error, "bad character range"
-                        setappend((RANGE, (lo, hi)))
+                        setappend((OP.RANGE, (lo, hi)))
                     else:
                         raise error, "unexpected end of regular expression"
                 else:
-                    if code1[0] is IN:
-                        code1 = code1[1][0]
                     setappend(code1)
 
             # XXX: <fl> should move set optimization to compiler!
-            if _len(set)==1 and set[0][0] is LITERAL:
-                subpatternappend(set[0]) # optimization
-            elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
-                subpatternappend((NOT_LITERAL, set[1][1])) # optimization
+            if _len(set)==1 and set[0][0] == OP.LITERAL:
+                # optimization
+                if negate:
+                    subpatternappend((OP.NOT_LITERAL, set[0][1]))
+                else:
+                    subpatternappend(set[0])
             else:
                 # XXX: <fl> should add charmap optimization here
-                subpatternappend((IN, set))
+                if negate:
+                    subpatternappend((OP.NOT_SET, set))
+                else:
+                    subpatternappend((OP.SET, set))
 
         elif this and this[0] in REPEAT_CHARS:
             # repeat previous item
@@ -480,7 +477,7 @@
                 min, max = 1, MAXREPEAT
             elif this == "{":
                 if source.next == "}":
-                    subpatternappend((LITERAL, ord(this)))
+                    subpatternappend((OP.LITERAL, ord(this)))
                     continue
                 here = source.tell()
                 min, max = 0, MAXREPEAT
@@ -493,7 +490,7 @@
                 else:
                     hi = lo
                 if not sourcematch("}"):
-                    subpatternappend((LITERAL, ord(this)))
+                    subpatternappend((OP.LITERAL, ord(this)))
                     source.seek(here)
                     continue
                 if lo:
@@ -509,17 +506,17 @@
                 item = subpattern[-1:]
             else:
                 item = None
-            if not item or (_len(item) == 1 and item[0][0] == AT):
+            if not item or (_len(item) == 1 and item[0][0] in _POSITIONCODES):
                 raise error, "nothing to repeat"
             if item[0][0] in REPEATCODES:
                 raise error, "multiple repeat"
             if sourcematch("?"):
-                subpattern[-1] = (MIN_REPEAT, (min, max, item))
+                subpattern[-1] = (OP.MIN_REPEAT, (min, max, item))
             else:
-                subpattern[-1] = (MAX_REPEAT, (min, max, item))
+                subpattern[-1] = (OP.MAX_REPEAT, (min, max, item))
 
         elif this == ".":
-            subpatternappend((ANY, None))
+            subpatternappend((OP.ANY, None))
 
         elif this == "(":
             group = 1
@@ -558,7 +555,7 @@
                         gid = state.groupdict.get(name)
                         if gid is None:
                             raise error, "unknown group name"
-                        subpatternappend((GROUPREF, gid))
+                        subpatternappend((OP.GROUPREF, gid))
                         continue
                     else:
                         char = sourceget()
@@ -590,9 +587,9 @@
                     if not sourcematch(")"):
                         raise error, "unbalanced parenthesis"
                     if char == "=":
-                        subpatternappend((ASSERT, (dir, p)))
+                        subpatternappend((OP.ASSERT, (dir, p)))
                     else:
-                        subpatternappend((ASSERT_NOT, (dir, p)))
+                        subpatternappend((OP.ASSERT_NOT, (dir, p)))
                     continue
                 elif sourcematch("("):
                     # conditional backreference group
@@ -635,7 +632,7 @@
                     raise error, "unbalanced parenthesis"
                 if group is not None:
                     state.closegroup(group)
-                subpatternappend((SUBPATTERN, (group, p)))
+                subpatternappend((OP.SUBPATTERN, (group, p)))
             else:
                 while 1:
                     char = sourceget()
@@ -646,10 +643,10 @@
                     raise error, "unknown extension"
 
         elif this == "^":
-            subpatternappend((AT, AT_BEGINNING))
+            subpatternappend((OP.BEGINNING_LINE, None))
 
         elif this == "$":
-            subpattern.append((AT, AT_END))
+            subpattern.append((OP.END_LINE, None))
 
         elif this and this[0] == "\\":
             code = _escape(source, this, state)
@@ -696,10 +693,10 @@
     p = []
     a = p.append
     def literal(literal, p=p, pappend=a):
-        if p and p[-1][0] is LITERAL:
-            p[-1] = LITERAL, p[-1][1] + literal
+        if p and p[-1][0] == OP.LITERAL:
+            p[-1] = OP.LITERAL, p[-1][1] + literal
         else:
-            pappend((LITERAL, literal))
+            pappend((OP.LITERAL, literal))
     sep = source[:0]
     if type(sep) is type(""):
         makechar = chr
@@ -735,7 +732,7 @@
                         index = pattern.groupindex[name]
                     except KeyError:
                         raise IndexError, "unknown group name"
-                a((MARK, index))
+                a((OP.MARK, index))
             elif c == "0":
                 if s.next in OCTDIGITS:
                     this = this + sget()
@@ -752,7 +749,7 @@
                         isoctal = True
                         literal(makechar(int(this[1:], 8) & 0xff))
                 if not isoctal:
-                    a((MARK, int(this[1:])))
+                    a((OP.MARK, int(this[1:])))
             else:
                 try:
                     this = makechar(ESCAPES[this][1])
@@ -767,7 +764,7 @@
     groupsappend = groups.append
     literals = [None] * len(p)
     for c, s in p:
-        if c is MARK:
+        if c == OP.MARK:
             groupsappend((i, s))
             # literal[i] is already None
         else:
=== modified file Lib/re.py
--- Lib/re.py 2009-03-02 05:13:57 +0000
+++ Lib/re.py 2009-03-25 23:48:11 +0000
@@ -292,7 +292,7 @@
 
 class Scanner:
     def __init__(self, lexicon, flags=0):
-        from sre_constants import BRANCH, SUBPATTERN
+        from sre_constants import OP
         self.lexicon = lexicon
         # combine phrases into a compound pattern
         p = []
@@ -300,10 +300,10 @@
         s.flags = flags
         for phrase, action in lexicon:
             p.append(sre_parse.SubPattern(s, [
-                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
+                (OP.SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
                 ]))
         s.groups = len(p)+1
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
+        p = sre_parse.SubPattern(s, [(OP.BRANCH, (None, p))])
         self.scanner = sre_compile.compile(p)
     def scan(self, string):
         result = []
=== modified file Modules/_sre.c
--- Modules/_sre.c 2008-09-10 14:27:00 +0000
+++ Modules/_sre.c 2009-04-11 13:35:15 +0000
@@ -4,24 +4,25 @@
  * regular expression matching engine
  *
  * partial history:
- * 1999-10-24 fl  created (based on existing template matcher code)
- * 2000-03-06 fl  first alpha, sort of
- * 2000-08-01 fl  fixes for 1.6b1
- * 2000-08-07 fl  use PyOS_CheckStack() if available
- * 2000-09-20 fl  added expand method
- * 2001-03-20 fl  lots of fixes for 2.1b2
- * 2001-04-15 fl  export copyright as Python attribute, not global
- * 2001-04-28 fl  added __copy__ methods (work in progress)
- * 2001-05-14 fl  fixes for 1.5.2 compatibility
- * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
- * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
- * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
- * 2001-10-21 fl  added sub/subn primitive
- * 2001-10-24 fl  added finditer primitive (for 2.2 only)
- * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
- * 2002-11-09 fl  fixed empty sub/subn return type
- * 2003-04-18 mvl fully support 4-byte codes
- * 2003-10-17 gn  implemented non recursive scheme
+ * 1999-10-24 fl   created (based on existing template matcher code)
+ * 2000-03-06 fl   first alpha, sort of
+ * 2000-08-01 fl   fixes for 1.6b1
+ * 2000-08-07 fl   use PyOS_CheckStack() if available
+ * 2000-09-20 fl   added expand method
+ * 2001-03-20 fl   lots of fixes for 2.1b2
+ * 2001-04-15 fl   export copyright as Python attribute, not global
+ * 2001-04-28 fl   added __copy__ methods (work in progress)
+ * 2001-05-14 fl   fixes for 1.5.2 compatibility
+ * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis)
+ * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller)
+ * 2001-10-20 fl   added split primitive; reenable unicode for 1.6/2.0/2.1
+ * 2001-10-21 fl   added sub/subn primitive
+ * 2001-10-24 fl   added finditer primitive (for 2.2 only)
+ * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum)
+ * 2002-11-09 fl   fixed empty sub/subn return type
+ * 2003-04-18 mvl  fully support 4-byte codes
+ * 2003-10-17 gn   implemented non recursive scheme
+ * 2009-03-28 mrab major reworking
  *
  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
  *
@@ -68,9 +69,6 @@
 /* -------------------------------------------------------------------- */
 /* optional features */
 
-/* enables fast searching */
-#define USE_FAST_SEARCH
-
 /* enables aggressive inlining (always on for Visual C) */
 #undef USE_INLINE
 
@@ -97,7 +95,6 @@
 /* error codes */
 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
 #define SRE_ERROR_STATE -2 /* illegal state */
-#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
 #define SRE_ERROR_MEMORY -9 /* out of memory */
 #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
 
@@ -110,192 +107,599 @@
 /* -------------------------------------------------------------------- */
 /* search engine state */
 
-/* default character predicates (run sre_chars.py to regenerate tables) */
-
-#define SRE_DIGIT_MASK 1
-#define SRE_SPACE_MASK 2
-#define SRE_LINEBREAK_MASK 4
-#define SRE_ALNUM_MASK 8
-#define SRE_WORD_MASK 16
-
-/* FIXME: this assumes ASCII.  create tables in init_sre() instead */
-
-static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
-25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
-0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
-
-static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
-10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
-61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
-108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
-122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
-106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-120, 121, 122, 123, 124, 125, 126, 127 };
-
-#define SRE_IS_DIGIT(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
-#define SRE_IS_SPACE(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
-#define SRE_IS_LINEBREAK(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
-#define SRE_IS_ALNUM(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
-#define SRE_IS_WORD(ch)\
-    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
-
-static unsigned int sre_lower(unsigned int ch)
-{
-    return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
-}
-
-/* locale-specific character predicates */
-/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
- * warnings when c's type supports only numbers < N+1 */
-#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
-#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
-#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
-#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
-#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
-
-static unsigned int sre_lower_locale(unsigned int ch)
-{
-    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
-}
-
-/* unicode-specific character predicates */
-
-#if defined(HAVE_UNICODE)
-
-#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
-#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
-#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
-#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
-#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
-
-static unsigned int sre_lower_unicode(unsigned int ch)
-{
-    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
-}
-
-#endif
-
-LOCAL(int)
-sre_category(SRE_CODE category, unsigned int ch)
-{
-    switch (category) {
-
-    case SRE_CATEGORY_DIGIT:
-        return SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_NOT_DIGIT:
-        return !SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_SPACE:
-        return SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_NOT_SPACE:
-        return !SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_WORD:
-        return SRE_IS_WORD(ch);
-    case SRE_CATEGORY_NOT_WORD:
-        return !SRE_IS_WORD(ch);
-    case SRE_CATEGORY_LINEBREAK:
-        return SRE_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_NOT_LINEBREAK:
-        return !SRE_IS_LINEBREAK(ch);
-
-    case SRE_CATEGORY_LOC_WORD:
-        return SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_LOC_NOT_WORD:
-        return !SRE_LOC_IS_WORD(ch);
-
-#if defined(HAVE_UNICODE)
-    case SRE_CATEGORY_UNI_DIGIT:
-        return SRE_UNI_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_NOT_DIGIT:
-        return !SRE_UNI_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_SPACE:
-        return SRE_UNI_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_NOT_SPACE:
-        return !SRE_UNI_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_WORD:
-        return SRE_UNI_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_NOT_WORD:
-        return !SRE_UNI_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_LINEBREAK:
-        return SRE_UNI_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-        return !SRE_UNI_IS_LINEBREAK(ch);
-#else
-    case SRE_CATEGORY_UNI_DIGIT:
-        return SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_NOT_DIGIT:
-        return !SRE_IS_DIGIT(ch);
-    case SRE_CATEGORY_UNI_SPACE:
-        return SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_NOT_SPACE:
-        return !SRE_IS_SPACE(ch);
-    case SRE_CATEGORY_UNI_WORD:
-        return SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_NOT_WORD:
-        return !SRE_LOC_IS_WORD(ch);
-    case SRE_CATEGORY_UNI_LINEBREAK:
-        return SRE_IS_LINEBREAK(ch);
-    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-        return !SRE_IS_LINEBREAK(ch);
-#endif
-    }
+#define SRE_BITS_PER_CODE (sizeof(SRE_CODE) * 8)
+
+#define SRE_MARK_OP_SIZE 2
+
+/* ascii-specific */
+
+#define SRE_ASCII_MAX 0x7F
+
+#define SRE_UPPER_MASK 0x1
+#define SRE_LOWER_MASK 0x2
+#define SRE_DIGIT_MASK 0x4
+#define SRE_UNDERSCORE_MASK 0x8
+#define SRE_LINEBREAK_MASK 0x10
+#define SRE_SPACE_MASK 0x20
+/* alpha = upper | lower */
+#define SRE_ALPHA_MASK 0x3
+/* alnum = alpha | digit */
+#define SRE_ALNUM_MASK 0x7
+/* word = alnum | underscore */
+#define SRE_WORD_MASK 0xF
+
+/* categories of ascii characters */
+static char ascii_category[128] = {
+     0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,
+     0x0, 0x20, 0x30, 0x20, 0x20, 0x20,  0x0,  0x0,
+     0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,
+     0x0,  0x0,  0x0,  0x0, 0x20, 0x20, 0x20, 0x20,
+    0x20,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,
+     0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,
+     0x4,  0x4,  0x4,  0x4,  0x4,  0x4,  0x4,  0x4,
+     0x4,  0x4,  0x0,  0x0,  0x0,  0x0,  0x0,  0x0,
+     0x0,  0x1,  0x1,  0x1,  0x1,  0x1,  0x1,  0x1,
+     0x1,  0x1,  0x1,  0x1,  0x1,  0x1,  0x1,  0x1,
+     0x1,  0x1,  0x1,  0x1,  0x1,  0x1,  0x1,  0x1,
+     0x1,  0x1,  0x1,  0x0,  0x0,  0x0,  0x0,  0x8,
+     0x0,  0x2,  0x2,  0x2,  0x2,  0x2,  0x2,  0x2,
+     0x2,  0x2,  0x2,  0x2,  0x2,  0x2,  0x2,  0x2,
+     0x2,  0x2,  0x2,  0x2,  0x2,  0x2,  0x2,  0x2,
+     0x2,  0x2,  0x2,  0x0,  0x0,  0x0,  0x0,  0x0,
+};
+
+/* checks whether an ascii character is in the given category */
+static BOOL ascii_in_category(SRE_CODE category, SRE_CODE ch) {
+    if (ch > SRE_ASCII_MAX)
+        /* outside the ascii range */
+        return FALSE;
+
+    switch(category)
+    {
+    case SRE_CAT_ALPHA:
+        return (ascii_category[ch] & SRE_ALPHA_MASK) != 0;
+    case SRE_CAT_ALNUM:
+        return (ascii_category[ch] & SRE_ALNUM_MASK) != 0;
+    case SRE_CAT_DIGIT:
+        return (ascii_category[ch] & SRE_DIGIT_MASK) != 0;
+    case SRE_CAT_LINEBREAK:
+        return (ascii_category[ch] & SRE_LINEBREAK_MASK) != 0;
+    case SRE_CAT_SPACE:
+        return (ascii_category[ch] & SRE_SPACE_MASK) != 0;
+    case SRE_CAT_WORD:
+        return (ascii_category[ch] & SRE_WORD_MASK) != 0;
+    default:
+        return FALSE;
+    }
+}
+
+/* converts an ascii character to lowercase */
+static BOOL ascii_lower(SRE_CODE ch) {
+    if (ch <= SRE_ASCII_MAX && (ascii_category[ch] & SRE_UPPER_MASK) != 0)
+        return ch ^ 0x20;
+
+    return ch;
+}
+
+/* converts an ascii character to uppercase */
+static BOOL ascii_upper(SRE_CODE ch) {
+    if (ch <= SRE_ASCII_MAX && (ascii_category[ch] & SRE_LOWER_MASK) != 0)
+        return ch ^ 0x20;
+
+    return ch;
+}
+
+/* the handlers for ascii characters */
+static SRE_EncodingTable ascii_encoding = {
+    ascii_in_category,
+    ascii_lower,
+    ascii_upper,
+    ascii_upper, /* titlecase for ascii is the same as uppercase */
+};
+
+/* locale-specific */
+
+#define SRE_LOCALE_MAX 0xFF
+
+/* checks whether a locale character is in the given category */
+static BOOL locale_in_category(SRE_CODE category, SRE_CODE ch) {
+    if (ch > SRE_LOCALE_MAX)
+        /* outside the locale range */
+        return FALSE;
+
+    switch(category)
+    {
+    case SRE_CAT_ALPHA:
+        return isalpha(ch);
+    case SRE_CAT_ALNUM:
+        return isalnum(ch);
+    case SRE_CAT_DIGIT:
+        return isdigit(ch);
+    case SRE_CAT_LINEBREAK:
+        return ch == '\n';
+    case SRE_CAT_SPACE:
+        return isspace(ch);
+    case SRE_CAT_WORD:
+        return ch == '_' || isalnum(ch);
+    default:
+        return FALSE;
+    }
+}
+
+/* converts a locale character to lowercase */
+static BOOL locale_lower(SRE_CODE ch) {
+    if (ch <= SRE_LOCALE_MAX)
+        return tolower(ch);
+
+    return ch;
+}
+
+/* converts a locale character to uppercase */
+static BOOL locale_upper(SRE_CODE ch) {
+    if (ch <= SRE_LOCALE_MAX)
+        return toupper(ch);
+
+    return ch;
+}
+
+/* the handlers for locale characters */
+static SRE_EncodingTable locale_encoding = {
+    locale_in_category,
+    locale_lower,
+    locale_upper,
+    locale_upper, /* titlecase for locale is the same as uppercase */
+};
+
+/* unicode-specific */
+
+/* checks whether a unicode character is in the given category */
+static BOOL unicode_in_category(SRE_CODE category, SRE_CODE ch) {
+    switch(category)
+    {
+    case SRE_CAT_ALPHA:
+        return Py_UNICODE_ISALPHA((Py_UNICODE)ch);
+    case SRE_CAT_ALNUM:
+        return Py_UNICODE_ISALNUM((Py_UNICODE)ch);
+    case SRE_CAT_DIGIT:
+        return Py_UNICODE_ISDIGIT((Py_UNICODE)ch);
+    case SRE_CAT_LINEBREAK:
+        return ch == '\n';
+    case SRE_CAT_SPACE:
+        return Py_UNICODE_ISSPACE((Py_UNICODE)ch);
+    case SRE_CAT_WORD:
+        return ch == '_' || Py_UNICODE_ISALNUM((Py_UNICODE)ch);
+    default:
+        return FALSE;
+    }
+}
+
+/* converts a unicode character to lowercase */
+static BOOL unicode_lower(SRE_CODE ch) {
+    return Py_UNICODE_TOLOWER((Py_UNICODE)ch);
+}
+
+/* converts a unicode character to uppercase */
+static BOOL unicode_upper(SRE_CODE ch) {
+    return Py_UNICODE_TOUPPER((Py_UNICODE)ch);
+}
+
+/* converts a unicode character to titlecase */
+static BOOL unicode_title(SRE_CODE ch) {
+    return Py_UNICODE_TOTITLE((Py_UNICODE)ch);
+}
+
+/* the handlers for unicode characters */
+static SRE_EncodingTable unicode_encoding = {
+    unicode_in_category,
+    unicode_lower,
+    unicode_upper,
+    unicode_title,
+};
+
+/* helpers */
+
+/* returns the minimum of 2 unsigned ints */
+LOCAL(unsigned int) unsigned_min(unsigned int x, unsigned int y) {
+    return x <= y ? x : y;
+}
+
+/* returns the minimum of 2 signed ints */
+LOCAL(int) signed_min(int x, int y) {
+    return x <= y ? x : y;
+}
+
+/* returns the maximum of 2 signed ints */
+LOCAL(int) signed_max(int x, int y) {
+    return x >= y ? x : y;
+}
+
+/* pushes the marks */
+LOCAL(int) push_marks(SRE_STATE* state) {
+    SRE_MarkChunk* chunk;
+    int mark_pos = 0;
+
+    chunk = state->mark_chunk;
+
+    while (mark_pos < state->mark_count) {
+        int n;
+
+        /* is there space for more marks in this chunk? */
+        if (chunk->count >= SRE_MARK_CHUNK_SIZE) {
+            /* is there another chunk available? */
+            if (chunk->next == NULL) {
+                SRE_MarkChunk* new_chunk;
+
+                /* create a new chunk */
+                new_chunk = PyMem_MALLOC(sizeof(SRE_MarkChunk));
+                if (new_chunk == NULL)
+                    return SRE_ERROR_MEMORY;
+
+                /* link in the new chunk */
+                new_chunk->previous = chunk;
+                new_chunk->next = NULL;
+                chunk->next = new_chunk;
+            }
+
+            chunk = chunk->next;
+			chunk->count = 0;
+        }
+
+        /* store the marks that will fit in this chunk */
+        n = min(state->mark_count - mark_pos, SRE_MARK_CHUNK_SIZE -
+          chunk->count);
+        memmove(chunk->items + chunk->count, state->mark + mark_pos, n *
+          sizeof(void*));
+
+        chunk->count += n;
+        mark_pos += n;
+    }
+
+    state->mark_chunk = chunk;
+
     return 0;
 }
 
-/* helpers */
-
-static void
-data_stack_dealloc(SRE_STATE* state)
-{
-    if (state->data_stack) {
-        PyMem_FREE(state->data_stack);
-        state->data_stack = NULL;
-    }
-    state->data_stack_size = state->data_stack_base = 0;
-}
-
-static int
-data_stack_grow(SRE_STATE* state, Py_ssize_t size)
-{
-    Py_ssize_t minsize, cursize;
-    minsize = state->data_stack_base+size;
-    cursize = state->data_stack_size;
-    if (cursize < minsize) {
-        void* stack;
-        cursize = minsize+minsize/4+1024;
-        TRACE(("allocate/grow stack %d\n", cursize));
-        stack = PyMem_REALLOC(state->data_stack, cursize);
-        if (!stack) {
-            data_stack_dealloc(state);
-            return SRE_ERROR_MEMORY;
+/* pops the marks */
+LOCAL(void) pop_marks(SRE_STATE* state) {
+    SRE_MarkChunk* chunk;
+    int mark_count = state->mark_count;
+
+    chunk = state->mark_chunk;
+
+    while (mark_count > 0) {
+        int n;
+
+        /* pop the marks that are in this chunk */
+        n = signed_min(mark_count, chunk->count);
+        mark_count -= n;
+        chunk->count -= n;
+        memmove(state->mark + mark_count, chunk->items + chunk->count, n *
+          sizeof(void*));
+
+        /* step back to the previous chunk if it's empty and not the first chunk */
+        if (chunk->count == 0 && chunk->previous != NULL)
+            chunk = chunk->previous;
+    }
+
+    state->mark_chunk = chunk;
+}
+
+/* discards some marks */
+LOCAL(void) discard_marks(SRE_STATE* state, int mark_count) {
+    SRE_MarkChunk* chunk;
+
+    chunk = state->mark_chunk;
+
+    while (mark_count > 0) {
+        int n;
+
+        /* discard the marks that are in this chunk */
+        n = min(mark_count, chunk->count);
+        mark_count -= n;
+        chunk->count -= n;
+
+        /* step back to the previous chunk if it's empty and not the first chunk */
+        if (chunk->count == 0 && chunk->previous != NULL)
+            chunk = chunk->previous;
+    }
+
+    state->mark_chunk = chunk;
+}
+
+/* pushes a backtrack point */
+LOCAL(int) push_backtrack(SRE_STATE* state, SRE_CODE op) {
+    SRE_BacktrackChunk* chunk;
+
+    chunk = state->backtrack_chunk;
+
+    /* is there space for another backtrack point in this chunk? */
+    if (chunk->count >= SRE_BACKTRACK_CHUNK_SIZE) {
+        /* is there another chunk available? */
+        if (chunk->next == NULL) {
+            SRE_BacktrackChunk* new_chunk;
+
+            /* create a new chunk */
+            new_chunk = PyMem_MALLOC(sizeof(SRE_BacktrackChunk));
+            if (new_chunk == NULL)
+                return SRE_ERROR_MEMORY;
+
+            /* link in the new chunk */
+            new_chunk->previous = chunk;
+            new_chunk->next = NULL;
+            chunk->next = new_chunk;
         }
-        state->data_stack = (char *)stack;
-        state->data_stack_size = cursize;
-    }
+
+        chunk = chunk->next;
+        chunk->count = 0;
+
+        state->backtrack_chunk = chunk;
+    }
+
+    /* allocate and point to the new backtrack point */
+    state->backtrack_item = &chunk->items[chunk->count++];
+    state->backtrack_item->op = op;
+    state->backtrack_item->mark_count = 0;
+
     return 0;
 }
 
+/* points to the most recent backtrack point */
+LOCAL(void) top_backtrack(SRE_STATE* state) {
+    state->backtrack_item =
+      &state->backtrack_chunk->items[state->backtrack_chunk->count - 1];
+}
+
+/* pops the most recent backtrack point */
+LOCAL(void) discard_backtrack(SRE_STATE* state) {
+    SRE_BacktrackChunk* chunk;
+
+    chunk = state->backtrack_chunk;
+
+    --chunk->count;
+
+    /* discard any marks for this backtrack point */
+    discard_marks(state, chunk->items[chunk->count].mark_count);
+
+    /* step back to the previous chunk if it's empty and not the first chunk */
+    if (chunk->count == 0 && chunk->previous != NULL)
+        chunk = chunk->previous;
+
+    state->backtrack_chunk = chunk;
+}
+
+/* discards backtrack points until a given op is seen */
+LOCAL(void) discard_backtrack_until(SRE_STATE* state, SRE_CODE op) {
+    SRE_BacktrackChunk* chunk;
+    int total_mark_count = 0;
+
+    chunk = state->backtrack_chunk;
+
+    /* discard until we see the given op */
+    while (chunk->items[chunk->count - 1].op != op) {
+        --chunk->count;
+
+        /* count how many marks we should also discard */
+        total_mark_count += chunk->items[chunk->count].mark_count;
+
+        /* step back to the previous chunk if it's empty and not the first chunk */
+        if (chunk->count == 0 && chunk->previous != NULL)
+            chunk = chunk->previous;
+    }
+
+    state->backtrack_chunk = chunk;
+    state->backtrack_item = &chunk->items[chunk->count - 1];
+
+    /* discard any unwanted marks */
+    discard_marks(state, total_mark_count);
+}
+
+/* cleans up the backtrack points */
+LOCAL(void) cleanup_backtrack(SRE_STATE* state) {
+    state->backtrack_chunk = state->first_backtrack_chunk;
+    state->backtrack_chunk->count = 0;
+}
+
+/* cleans up the marks */
+LOCAL(void) cleanup_marks(SRE_STATE* state) {
+    state->mark_chunk = state->first_mark_chunk;
+    state->mark_chunk->count = 0;
+}
+
+/* cleans up before exiting SRE_MATCH */
+LOCAL(int) cleanup(SRE_STATE* state, int result) {
+    cleanup_backtrack(state);
+    cleanup_marks(state);
+
+    return result;
+}
+
+/*
+  checks whether 2 characters are the same, ignoring case
+
+  it must cater for lowercase, uppercase and titlecase, and also for different
+  case handling in different locales, for example dotted and dotless I/i in
+  Turkish
+ */
+LOCAL(BOOL) same_char_ignore(SRE_STATE* state, SRE_CODE ch_1, SRE_CODE ch_2) {
+    SRE_EncodingTable* encoding = state->encoding;
+
+    return encoding->lower(ch_1) == encoding->lower(ch_2) ||
+      encoding->upper(ch_1) == encoding->upper(ch_2) || encoding->title(ch_1)
+      == encoding->title(ch_2);
+}
+
+/*
+  checks whether a character is in the given charset
+
+  the charset format is:
+
+  <CHARSET> skip_to_end max_char index... subset...
+ */
+LOCAL(BOOL) in_charset(SRE_CODE ch, SRE_CODE* pattern_ptr) {
+    SRE_CODE hi_bytes;
+    SRE_CODE lo_byte;
+    SRE_CODE index;
+    SRE_CODE* subset_ptr;
+    SRE_CODE bit_flags;
+
+    /* is the character greater than the maximum character code? */
+    if (ch > pattern_ptr[2])
+        return FALSE;
+
+    /* split the character code into low and high bytes */
+    hi_bytes = ch >> 8;
+    lo_byte = ch & 0xFF;
+
+    /*
+      the charset is split into subsets, each of 256 characters
+
+      what is the index of the subset?
+
+      (there are 2 indexes in each codeword)
+     */
+    index = pattern_ptr[3 + hi_bytes / 2];
+    index >>= (hi_bytes % 2) * (SRE_BITS_PER_CODE / 2);
+    index &= (1 << (SRE_BITS_PER_CODE / 2)) - 1;
+
+    /* point to the subset */
+    subset_ptr = pattern_ptr + 4 + (pattern_ptr[2] >> 9) + index * (256 /
+      SRE_BITS_PER_CODE);
+
+    /* get the codeword containing the bitflags */
+    bit_flags = subset_ptr[lo_byte / SRE_BITS_PER_CODE];
+
+    /* return the value of the appropriate bitflag as a BOOL */
+    return ((bit_flags >> (lo_byte % SRE_BITS_PER_CODE)) & 0x1) != 0;
+}
+
+/*
+  checks whether a character is in the given charset, ignoring case
+
+  the charset format is:
+
+  <CHARSET_IGNORE> skip_to_end max_char index... subset...
+
+  it must cater for lowercase, uppercase and titlecase, and also for different
+  case handling in different locales, for example dotted and dotless I/i in
+  Turkish
+ */
+LOCAL(BOOL) in_charset_ignore(SRE_STATE* state, SRE_CODE ch, SRE_CODE*
+  pattern_ptr) {
+    SRE_EncodingTable* encoding = state->encoding;
+
+    return in_charset(encoding->lower(ch), pattern_ptr) ||
+      in_charset(encoding->upper(ch), pattern_ptr) ||
+      in_charset(encoding->title(ch), pattern_ptr);
+}
+
+/* checks whether a character is in the given range */
+LOCAL(BOOL) in_range(SRE_CODE ch, SRE_CODE min, SRE_CODE max) {
+    return ch >= min && ch <= max;
+}
+
+/*
+  checks whether a character is in the given range, ignoring case
+
+  it must cater for lowercase, uppercase and titlecase, and also for different
+  case handling in different locales, for example dotted and dotless I/i in
+  Turkish
+ */
+LOCAL(BOOL) in_range_ignore(SRE_STATE* state, SRE_CODE ch, SRE_CODE min,
+  SRE_CODE max) {
+    SRE_EncodingTable* encoding = state->encoding;
+
+    return in_range(encoding->lower(ch), min, max) ||
+      in_range(encoding->upper(ch), min, max) || in_range(encoding->title(ch),
+      min, max);
+}
+
+/*
+  checks whether a character is in the given set
+
+  the set format is:
+
+  <SET> skip_to_end pattern
+ */
+LOCAL(BOOL) in_set(SRE_STATE* state, SRE_CODE ch, SRE_CODE* pattern_ptr) {
+    SRE_EncodingTable* encoding = state->encoding;
+    SRE_CODE* end_ptr = pattern_ptr + pattern_ptr[1];
+
+    pattern_ptr += 2;
+
+    while (pattern_ptr < end_ptr) {
+        switch(pattern_ptr[0]) {
+        case SRE_OP_CATEGORY:
+            /* <CATEGORY> <code> */
+            if (encoding->in_category(pattern_ptr[1], ch))
+                return TRUE;
+
+            pattern_ptr += 2;
+            break;
+        case SRE_OP_CHARSET:
+            /* <CHARSET> <skip_to_tail> <charset> */
+            if (in_charset(ch, pattern_ptr))
+                return TRUE;
+
+            pattern_ptr += pattern_ptr[1];
+            break;
+        case SRE_OP_LITERAL:
+            /* <LITERAL> <code> */
+            if (ch == pattern_ptr[1])
+                return TRUE;
+
+            pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_CATEGORY:
+            /* <NOT_CATEGORY> <code> */
+            if (!encoding->in_category(pattern_ptr[1], ch))
+                return TRUE;
+
+            pattern_ptr += 2;
+            break;
+        case SRE_OP_RANGE:
+            /* <RANGE> <min> <max> */
+            if (in_range(ch, pattern_ptr[1], pattern_ptr[2]))
+                return TRUE;
+
+            pattern_ptr += 3;
+            break;
+        default:
+            return FALSE;
+        }
+    }
+
+    return FALSE;
+}
+
+/*
+  checks whether a character is in the given set, ignoring case
+
+  the set format is:
+
+  <SET_IGNORE> skip_to_end pattern
+
+  it must cater for lowercase, uppercase and titlecase, and also for different
+  case handling in different locales, for example dotted and dotless I/i in
+  Turkish
+ */
+LOCAL(BOOL) in_set_ignore(SRE_STATE* state, SRE_CODE ch, SRE_CODE* pattern_ptr)
+  {
+    SRE_EncodingTable* encoding = state->encoding;
+
+    return in_set(state, encoding->lower(ch), pattern_ptr) || in_set(state,
+      encoding->upper(ch), pattern_ptr) || in_set(state, encoding->title(ch),
+      pattern_ptr);
+}
+
 /* generate 8-bit version */
 
 #define SRE_CHAR unsigned char
-#define SRE_AT sre_at
-#define SRE_COUNT sre_count
-#define SRE_CHARSET sre_charset
-#define SRE_INFO sre_info
+#define SRE_PCHAR(PTR,OFS) ((unsigned char*)(PTR))[OFS]
+#define SRE_AT_BOUNDARY at_boundary
+#define SRE_TRY_MATCH try_match
 #define SRE_MATCH sre_match
-#define SRE_MATCH_CONTEXT sre_match_context
 #define SRE_SEARCH sre_search
 #define SRE_LITERAL_TEMPLATE sre_literal_template
-
 #if defined(HAVE_UNICODE)
 
 #define SRE_RECURSIVE
@@ -305,22 +709,18 @@
 #undef SRE_LITERAL_TEMPLATE
 #undef SRE_SEARCH
 #undef SRE_MATCH
-#undef SRE_MATCH_CONTEXT
-#undef SRE_INFO
-#undef SRE_CHARSET
-#undef SRE_COUNT
-#undef SRE_AT
+#undef SRE_TRY_MATCH
+#undef SRE_AT_BOUNDARY
+#undef SRE_PCHAR
 #undef SRE_CHAR
 
-/* generate 16-bit unicode version */
+/* generate unicode version */
 
 #define SRE_CHAR Py_UNICODE
-#define SRE_AT sre_uat
-#define SRE_COUNT sre_ucount
-#define SRE_CHARSET sre_ucharset
-#define SRE_INFO sre_uinfo
+#define SRE_PCHAR(PTR,OFS) ((Py_UNICODE*)(PTR))[OFS]
+#define SRE_AT_BOUNDARY uat_boundary
+#define SRE_TRY_MATCH utry_match
 #define SRE_MATCH sre_umatch
-#define SRE_MATCH_CONTEXT sre_umatch_context
 #define SRE_SEARCH sre_usearch
 #define SRE_LITERAL_TEMPLATE sre_uliteral_template
 #endif
@@ -330,1286 +730,1900 @@
 /* -------------------------------------------------------------------- */
 /* String matching engine */
 
-/* the following section is compiled twice, with different character
-   settings */
-
-LOCAL(int)
-SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
-{
-    /* check if pointer is at given position */
-
-    Py_ssize_t thisp, thatp;
-
-    switch (at) {
-
-    case SRE_AT_BEGINNING:
-    case SRE_AT_BEGINNING_STRING:
-        return ((void*) ptr == state->beginning);
-
-    case SRE_AT_BEGINNING_LINE:
-        return ((void*) ptr == state->beginning ||
-                SRE_IS_LINEBREAK((int) ptr[-1]));
-
-    case SRE_AT_END:
-        return (((void*) (ptr+1) == state->end &&
-                 SRE_IS_LINEBREAK((int) ptr[0])) ||
-                ((void*) ptr == state->end));
-
-    case SRE_AT_END_LINE:
-        return ((void*) ptr == state->end ||
-                SRE_IS_LINEBREAK((int) ptr[0]));
-
-    case SRE_AT_END_STRING:
-        return ((void*) ptr == state->end);
-
-    case SRE_AT_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
-
-    case SRE_AT_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-
-    case SRE_AT_LOC_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
-
-    case SRE_AT_LOC_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-
-#if defined(HAVE_UNICODE)
-    case SRE_AT_UNI_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
-        return thisp != thatp;
-
-    case SRE_AT_UNI_NON_BOUNDARY:
-        if (state->beginning == state->end)
-            return 0;
-        thatp = ((void*) ptr > state->beginning) ?
-            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
-        thisp = ((void*) ptr < state->end) ?
-            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
-        return thisp == thatp;
-#endif
-
-    }
-
-    return 0;
-}
-
-LOCAL(int)
-SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
-{
-    /* check if character is a member of the given set */
-
-    int ok = 1;
-
-    for (;;) {
-        switch (*set++) {
-
-        case SRE_OP_FAILURE:
-            return !ok;
-
-        case SRE_OP_LITERAL:
-            /* <LITERAL> <code> */
-            if (ch == set[0])
-                return ok;
-            set++;
-            break;
-
-        case SRE_OP_CATEGORY:
-            /* <CATEGORY> <code> */
-            if (sre_category(set[0], (int) ch))
-                return ok;
-            set += 1;
-            break;
-
-        case SRE_OP_CHARSET:
-            if (sizeof(SRE_CODE) == 2) {
-                /* <CHARSET> <bitmap> (16 bits per code word) */
-                if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
-                    return ok;
-                set += 16;
-            }
-            else {
-                /* <CHARSET> <bitmap> (32 bits per code word) */
-                if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
-                    return ok;
-                set += 8;
-            }
-            break;
-
-        case SRE_OP_RANGE:
-            /* <RANGE> <lower> <upper> */
-            if (set[0] <= ch && ch <= set[1])
-                return ok;
-            set += 2;
-            break;
-
-        case SRE_OP_NEGATE:
-            ok = !ok;
-            break;
-
-        case SRE_OP_BIGCHARSET:
-            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
-        {
-            Py_ssize_t count, block;
-            count = *(set++);
-
-            if (sizeof(SRE_CODE) == 2) {
-                block = ((unsigned char*)set)[ch >> 8];
-                set += 128;
-                if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
-                    return ok;
-                set += count*16;
-            }
-            else {
-                /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
-                 * warnings when c's type supports only numbers < N+1 */
-                if (!(ch & ~65535))
-                    block = ((unsigned char*)set)[ch >> 8];
-                else
-                    block = -1;
-                set += 64;
-                if (block >=0 &&
-                    (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
-                    return ok;
-                set += count*8;
-            }
-            break;
-        }
-
-        default:
-            /* internal error -- there's not much we can do about it
-               here, so let's just pretend it didn't match... */
-            return 0;
-        }
-    }
-}
-
 LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
 
-LOCAL(Py_ssize_t)
-SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
-{
-    SRE_CODE chr;
-    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t i;
-
-    /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
-        end = ptr + maxcount;
-
-    switch (pattern[0]) {
-
-    case SRE_OP_IN:
-        /* repeated set */
-        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
-        while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
-            ptr++;
-        break;
-
+/* checks whether we're at a word boundary */
+LOCAL(BOOL) SRE_AT_BOUNDARY(SRE_STATE* state) {
+    SRE_EncodingTable* encoding = state->encoding;
+
+    BOOL before = state->text_ptr > state->text_start &&
+      encoding->in_category(SRE_CAT_WORD, SRE_PCHAR(state->text_ptr, -1));
+
+    BOOL after = state->text_ptr < state->text_end &&
+      encoding->in_category(SRE_CAT_WORD, SRE_PCHAR(state->text_ptr, 0));
+
+    return before != after;
+}
+
+/* checks to see whether a match _could_ occur */
+LOCAL(BOOL) SRE_TRY_MATCH(SRE_STATE* state, SRE_CODE* pattern_ptr) {
+    SRE_EncodingTable* encoding = state->encoding;
+    SRE_CHAR* text_ptr = state->text_ptr;
+
+    /* skip over any marks */
+    while (pattern_ptr[0] == SRE_OP_MARK)
+        pattern_ptr += SRE_MARK_OP_SIZE;
+
+    switch(pattern_ptr[0]) {
     case SRE_OP_ANY:
-        /* repeated dot wildcard. */
-        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
-        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
-            ptr++;
-        break;
-
+        /* any character except linebreak */
+        /* <ANY> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          !encoding->in_category(SRE_CAT_LINEBREAK, text_ptr[0]);
     case SRE_OP_ANY_ALL:
-        /* repeated dot wildcard.  skip to the end of the target
-           string, and backtrack from there */
-        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
-        ptr = end;
-        break;
-
+        /* any character at all */
+        /* <ANY_ALL> */
+        return text_ptr < (SRE_CHAR*)state->slice_end;
+    case SRE_OP_BEGINNING_LINE:
+        /* beginning of line */
+        /* <BEGINNING_LINE> */
+        return text_ptr <= (SRE_CHAR*)state->text_start ||
+          encoding->in_category(SRE_CAT_LINEBREAK, text_ptr[-1]);
+    case SRE_OP_BEGINNING_STRING:
+        /* beginning of string */
+        /* <BEGINNING_STRING> */
+        return text_ptr <= (SRE_CHAR*)state->text_start;
+    case SRE_OP_BOUNDARY:
+        /* boundary between word and non-word */
+        /* <BOUNDARY> */
+        return SRE_AT_BOUNDARY(state);
+    case SRE_OP_CATEGORY:
+        /* character in category */
+        /* <CATEGORY> <code> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          encoding->in_category(pattern_ptr[1], text_ptr[0]);
+    case SRE_OP_CHARSET:
+        /* character in charset */
+        /* <CHARSET> <skip_to_tail> <charset> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          in_charset(text_ptr[0], pattern_ptr);
+    case SRE_OP_CHARSET_IGNORE:
+        /* character in charset, ignoring case */
+        /* <CHARSET_IGNORE> <skip_to_tail> <charset> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          in_charset_ignore(state, text_ptr[0], pattern_ptr);
+    case SRE_OP_END_LINE:
+        /* end of line */
+        /* <END_LINE> */
+        return text_ptr >= (SRE_CHAR*)state->text_end ||
+          state->encoding->in_category(SRE_CAT_LINEBREAK, text_ptr[0]);
+    case SRE_OP_END_STRING:
+        /* end of string */
+        /* <END_STRING> */
+        return text_ptr >= (SRE_CHAR*)state->text_end;
+    case SRE_OP_END_STRING_LINE:
+        /* end of string or final linebreak at end of string */
+        /* <END_STRING_LINE> */
+        return text_ptr >= (SRE_CHAR*)state->text_end || text_ptr ==
+          state->final_linebreak;
     case SRE_OP_LITERAL:
-        /* repeated literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) *ptr == chr)
-            ptr++;
-        break;
-
+        /* this character */
+        /* <LITERAL> <code> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && (SRE_CODE)text_ptr[0]
+          == pattern_ptr[1];
     case SRE_OP_LITERAL_IGNORE:
-        /* repeated literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
-            ptr++;
-        break;
-
+        /* this character, ignoring case */
+        /* <LITERAL_IGNORE> <code> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          same_char_ignore(state, text_ptr[0], pattern_ptr[1]);
+    case SRE_OP_NOT_BOUNDARY:
+        /* boundary between word and non-word */
+        /* <NOT_BOUNDARY> */
+        return !SRE_AT_BOUNDARY(state);
+    case SRE_OP_NOT_CATEGORY:
+        /* character not in category */
+        /* <NOT_CATEGORY> <code> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          !encoding->in_category(pattern_ptr[1], text_ptr[0]);
+    case SRE_OP_NOT_CHARSET:
+        /* character not in charset */
+        /* <NOT_CHARSET> <skip_to_tail> <charset> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          !in_charset(text_ptr[0], pattern_ptr);
+    case SRE_OP_NOT_CHARSET_IGNORE:
+        /* character not in charset, ignoring case */
+        /* <NOT_CHARSET_IGNORE> <skip_to_tail> <charset> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          !in_charset_ignore(state, text_ptr[0], pattern_ptr);
     case SRE_OP_NOT_LITERAL:
-        /* repeated non-literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) *ptr != chr)
-            ptr++;
-        break;
-
+        /* any character except this */
+        /* <NOT_LITERAL> <code> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && (SRE_CODE)text_ptr[0]
+          != pattern_ptr[1];
     case SRE_OP_NOT_LITERAL_IGNORE:
-        /* repeated non-literal */
-        chr = pattern[1];
-        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
-            ptr++;
-        break;
-
+        /* any character except this, ignoring case */
+        /* <NOT_LITERAL_IGNORE> <code> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          !same_char_ignore(state, text_ptr[0], pattern_ptr[1]);
+    case SRE_OP_NOT_RANGE:
+        /* character not in range */
+        /* <NOT_RANGE> <min> <max> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && !in_range(text_ptr[0],
+          pattern_ptr[1], pattern_ptr[2]);
+    case SRE_OP_NOT_RANGE_IGNORE:
+        /* character not in range, ignoring case */
+        /* <NOT_RANGE_IGNORE> <min> <max> */
+        return text_ptr < (SRE_CHAR*)state->slice_end &&
+          !in_range_ignore(state, text_ptr[0], pattern_ptr[1], pattern_ptr[2]);
+    case SRE_OP_NOT_SET:
+        /* character not in set */
+        /* <NOT_SET> <skip_to_tail> <set> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && !in_set(state,
+          text_ptr[0], pattern_ptr);
+    case SRE_OP_NOT_SET_IGNORE:
+        /* character not in set, ignoring case */
+        /* <NOT_SET_IGNORE> <skip_to_tail> <set> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && !in_set_ignore(state,
+          text_ptr[0], pattern_ptr);
+    case SRE_OP_RANGE:
+        /* character in range */
+        /* <RANGE> <min> <max> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && in_range(text_ptr[0],
+          pattern_ptr[1], pattern_ptr[2]);
+    case SRE_OP_RANGE_IGNORE:
+        /* character in range, ignoring case */
+        /* <RANGE_IGNORE> <min> <max> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && in_range_ignore(state,
+          text_ptr[0], pattern_ptr[1], pattern_ptr[2]);
+    case SRE_OP_SET:
+        /* character in set */
+        /* <SET> <skip_to_tail> <set> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && in_set(state,
+          text_ptr[0], pattern_ptr);
+    case SRE_OP_SET_IGNORE:
+        /* character in set, ignoring case */
+        /* <SET_IGNORE> <skip_to_tail> <set> */
+        return text_ptr < (SRE_CHAR*)state->slice_end && in_set_ignore(state,
+          text_ptr[0], pattern_ptr);
     default:
-        /* repeated single character pattern */
-        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
-        while ((SRE_CHAR*) state->ptr < end) {
-            i = SRE_MATCH(state, pattern);
-            if (i < 0)
-                return i;
-            if (!i)
-                break;
-        }
-        TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
-               (SRE_CHAR*) state->ptr - ptr));
-        return (SRE_CHAR*) state->ptr - ptr;
-    }
-
-    TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
-    return ptr - (SRE_CHAR*) state->ptr;
-}
-
-#if 0 /* not used in this release */
-LOCAL(int)
-SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
-{
-    /* check if an SRE_OP_INFO block matches at the current position.
-       returns the number of SRE_CODE objects to skip if successful, 0
-       if no match */
-
-    SRE_CHAR* end = state->end;
-    SRE_CHAR* ptr = state->ptr;
-    Py_ssize_t i;
-
-    /* check minimal length */
-    if (pattern[3] && (end - ptr) < pattern[3])
-        return 0;
-
-    /* check known prefix */
-    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
-        /* <length> <skip> <prefix data> <overlap data> */
-        for (i = 0; i < pattern[5]; i++)
-            if ((SRE_CODE) ptr[i] != pattern[7 + i])
-                return 0;
-        return pattern[0] + 2 * pattern[6];
-    }
-    return pattern[0];
-}
-#endif
-
-/* The macros below should be used to protect recursive SRE_MATCH()
- * calls that *failed* and do *not* return immediately (IOW, those
- * that will backtrack). Explaining:
- *
- * - Recursive SRE_MATCH() returned true: that's usually a success
- *   (besides atypical cases like ASSERT_NOT), therefore there's no
- *   reason to restore lastmark;
- *
- * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
- *   is returning to the caller: If the current SRE_MATCH() is the
- *   top function of the recursion, returning false will be a matching
- *   failure, and it doesn't matter where lastmark is pointing to.
- *   If it's *not* the top function, it will be a recursive SRE_MATCH()
- *   failure by itself, and the calling SRE_MATCH() will have to deal
- *   with the failure by the same rules explained here (it will restore
- *   lastmark by itself if necessary);
- *
- * - Recursive SRE_MATCH() returned false, and will continue the
- *   outside 'for' loop: must be protected when breaking, since the next
- *   OP could potentially depend on lastmark;
- *
- * - Recursive SRE_MATCH() returned false, and will be called again
- *   inside a local for/while loop: must be protected between each
- *   loop iteration, since the recursive SRE_MATCH() could do anything,
- *   and could potentially depend on lastmark.
- *
- * For more information, check the discussion at SF patch #712900.
+        return TRUE;
+    }
+}
+
+/*
+  check if string matches the given pattern.  returns <0 for error, 0 for
+  failure, and 1 for success
  */
-#define LASTMARK_SAVE()     \
-    do { \
-        ctx->lastmark = state->lastmark; \
-        ctx->lastindex = state->lastindex; \
-    } while (0)
-#define LASTMARK_RESTORE()  \
-    do { \
-        state->lastmark = ctx->lastmark; \
-        state->lastindex = ctx->lastindex; \
-    } while (0)
-
-#define RETURN_ERROR(i) do { return i; } while(0)
-#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
-#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
-
-#define RETURN_ON_ERROR(i) \
-    do { if (i < 0) RETURN_ERROR(i); } while (0)
-#define RETURN_ON_SUCCESS(i) \
-    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
-#define RETURN_ON_FAILURE(i) \
-    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
-
-#define SFY(x) #x
-
-#define DATA_STACK_ALLOC(state, type, ptr) \
-do { \
-    alloc_pos = state->data_stack_base; \
-    TRACE(("allocating %s in %d (%d)\n", \
-           SFY(type), alloc_pos, sizeof(type))); \
-    if (state->data_stack_size < alloc_pos+sizeof(type)) { \
-        int j = data_stack_grow(state, sizeof(type)); \
-        if (j < 0) return j; \
-        if (ctx_pos != -1) \
-            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
-    } \
-    ptr = (type*)(state->data_stack+alloc_pos); \
-    state->data_stack_base += sizeof(type); \
-} while (0)
-
-#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
-do { \
-    TRACE(("looking up %s at %d\n", SFY(type), pos)); \
-    ptr = (type*)(state->data_stack+pos); \
-} while (0)
-
-#define DATA_STACK_PUSH(state, data, size) \
-do { \
-    TRACE(("copy data in %p to %d (%d)\n", \
-           data, state->data_stack_base, size)); \
-    if (state->data_stack_size < state->data_stack_base+size) { \
-        int j = data_stack_grow(state, size); \
-        if (j < 0) return j; \
-        if (ctx_pos != -1) \
-            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
-    } \
-    memcpy(state->data_stack+state->data_stack_base, data, size); \
-    state->data_stack_base += size; \
-} while (0)
-
-#define DATA_STACK_POP(state, data, size, discard) \
-do { \
-    TRACE(("copy data to %p from %d (%d)\n", \
-           data, state->data_stack_base-size, size)); \
-    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
-    if (discard) \
-        state->data_stack_base -= size; \
-} while (0)
-
-#define DATA_STACK_POP_DISCARD(state, size) \
-do { \
-    TRACE(("discard data from %d (%d)\n", \
-           state->data_stack_base-size, size)); \
-    state->data_stack_base -= size; \
-} while(0)
-
-#define DATA_PUSH(x) \
-    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
-#define DATA_POP(x) \
-    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
-#define DATA_POP_DISCARD(x) \
-    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
-#define DATA_ALLOC(t,p) \
-    DATA_STACK_ALLOC(state, t, p)
-#define DATA_LOOKUP_AT(t,p,pos) \
-    DATA_STACK_LOOKUP_AT(state,t,p,pos)
-
-#define MARK_PUSH(lastmark) \
-    do if (lastmark > 0) { \
-        i = lastmark; /* ctx->lastmark may change if reallocated */ \
-        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
-    } while (0)
-#define MARK_POP(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
-    } while (0)
-#define MARK_POP_KEEP(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
-    } while (0)
-#define MARK_POP_DISCARD(lastmark) \
-    do if (lastmark > 0) { \
-        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
-    } while (0)
-
-#define JUMP_NONE            0
-#define JUMP_MAX_UNTIL_1     1
-#define JUMP_MAX_UNTIL_2     2
-#define JUMP_MAX_UNTIL_3     3
-#define JUMP_MIN_UNTIL_1     4
-#define JUMP_MIN_UNTIL_2     5
-#define JUMP_MIN_UNTIL_3     6
-#define JUMP_REPEAT          7
-#define JUMP_REPEAT_ONE_1    8
-#define JUMP_REPEAT_ONE_2    9
-#define JUMP_MIN_REPEAT_ONE  10
-#define JUMP_BRANCH          11
-#define JUMP_ASSERT          12
-#define JUMP_ASSERT_NOT      13
-
-#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
-    DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
-    nextctx->last_ctx_pos = ctx_pos; \
-    nextctx->jump = jumpvalue; \
-    nextctx->pattern = nextpattern; \
-    ctx_pos = alloc_pos; \
-    ctx = nextctx; \
-    goto entrance; \
-    jumplabel: \
-    while (0) /* gcc doesn't like labels at end of scopes */ \
-
-typedef struct {
-    Py_ssize_t last_ctx_pos;
-    Py_ssize_t jump;
-    SRE_CHAR* ptr;
-    SRE_CODE* pattern;
-    Py_ssize_t count;
-    Py_ssize_t lastmark;
-    Py_ssize_t lastindex;
-    union {
-        SRE_CODE chr;
-        SRE_REPEAT* rep;
-    } u;
-} SRE_MATCH_CONTEXT;
-
-/* check if string matches the given pattern.  returns <0 for
-   error, 0 for failure, and 1 for success */
 LOCAL(Py_ssize_t)
 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 {
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
-    Py_ssize_t alloc_pos, ctx_pos = -1;
-    Py_ssize_t i, ret = 0;
-    Py_ssize_t jump;
     unsigned int sigcount=0;
-
-    SRE_MATCH_CONTEXT* ctx;
-    SRE_MATCH_CONTEXT* nextctx;
-
-    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
-
-    DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
-    ctx->last_ctx_pos = -1;
-    ctx->jump = JUMP_NONE;
-    ctx->pattern = pattern;
-    ctx_pos = alloc_pos;
-
-entrance:
-
-    ctx->ptr = (SRE_CHAR *)state->ptr;
-
-    if (ctx->pattern[0] == SRE_OP_INFO) {
-        /* optimization info block */
-        /* <INFO> <1=skip> <2=flags> <3=min> ... */
-        if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
-            TRACE(("reject (got %d chars, need %d)\n",
-                   (end - ctx->ptr), ctx->pattern[3]));
-            RETURN_FAILURE;
-        }
-        ctx->pattern += ctx->pattern[1] + 1;
-    }
-
+    int result;
+    SRE_BacktrackItem* current_repeat = NULL;
+    SRE_CODE op;
+    SRE_EncodingTable* encoding = state->encoding;
+
+    TRACE(("|%p|%p|ENTER\n", pattern, state->text_ptr));
+
+    /* clear the marks */
+    memset(state->mark, 0, state->mark_count * sizeof(state->mark[0]));
+    state->lastindex = -1;
+
+    /* failure backtrack point in case the entire pattern fails */
+    result = push_backtrack(state, SRE_OP_FAILURE);
+    if (result != 0)
+        return cleanup(state, result);
+
+    state->pattern_ptr = pattern;
+
+advance:
+    /* advance through the pattern */
     for (;;) {
+        /* should we abort the matching? */
         ++sigcount;
         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
-            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
-
-        switch (*ctx->pattern++) {
-
-        case SRE_OP_MARK:
-            /* set mark */
-            /* <MARK> <gid> */
-            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            if (i & 1)
-                state->lastindex = i/2 + 1;
-            if (i > state->lastmark) {
-                /* state->lastmark is the highest valid index in the
-                   state->mark array.  If it is increased by more than 1,
-                   the intervening marks must be set to NULL to signal
-                   that these marks have not been encountered. */
-                Py_ssize_t j = state->lastmark + 1;
-                while (j < i)
-                    state->mark[j++] = NULL;
-                state->lastmark = i;
+            return cleanup(state, SRE_ERROR_INTERRUPTED);
+
+        /* try the next opcode */
+        op = state->pattern_ptr[0];
+        switch (op) {
+        case SRE_OP_SUCCESS:
+        {
+            int i;
+            /* end of pattern */
+            /* <SUCCESS> */
+            TRACE(("|%p|%p|SUCCESS\n", state->pattern_ptr, state->text_ptr));
+
+            state->lastmark = -1;
+
+            /* clear any marks that aren't valid captures */
+            for (i = 0; i < state->mark_count / 2; i++) {
+                SRE_CHAR* group_start = state->mark[i * 2];
+                SRE_CHAR* group_end = state->mark[i * 2 + 1];
+                if (group_start == NULL || group_start > group_end) {
+                    /* not a capture, so clear the marks */
+                    state->mark[i * 2] = NULL;
+                    state->mark[i * 2 + 1] = NULL;
+                } else
+                    state->lastmark = i * 2 + 1;
             }
-            state->mark[i] = ctx->ptr;
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_LITERAL:
-            /* match literal string */
-            /* <LITERAL> <code> */
-            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_NOT_LITERAL:
-            /* match anything that is not literal character */
-            /* <NOT_LITERAL> <code> */
-            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_SUCCESS:
-            /* end of pattern */
-            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
-            state->ptr = ctx->ptr;
-            RETURN_SUCCESS;
-
-        case SRE_OP_AT:
-            /* match at given position */
-            /* <AT> <code> */
-            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
-            if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            break;
-
+
+            if (state->lastindex >= 0)
+                state->lastindex = state->lastindex / 2 + 1;
+
+            return cleanup(state, 1);
+        }
+        case SRE_OP_ANY:
+            /* any character except linebreak */
+            /* <ANY> */
+            TRACE(("|%p|%p|ANY\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr >= state->slice_end ||
+              encoding->in_category(SRE_CAT_LINEBREAK,
+              SRE_PCHAR(state->text_ptr, 0)))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_ANY_ALL:
+            /* any character */
+            /* <ANY_ALL> */
+            TRACE(("|%p|%p|ANY_ALL\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr >= state->slice_end)
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_ASSERT:
+        {
+            /* assert subpattern */
+            /* <ASSERT> <skip_to_end> <back> pattern <END_ASSERT> */
+            SRE_CHAR* ptr;
+            TRACE(("|%p|%p|ASSERT %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1]));
+
+            /* where does the lookaround start matching? */
+            ptr = (SRE_CHAR*)state->text_ptr - state->pattern_ptr[2];
+            if (ptr < (SRE_CHAR*)state->text_start)
+                /*
+                  the subpattern wants to start matching before the start of
+                  the text, so the positive lookbehind fails
+                 */
+                goto backtrack;
+
+            /* push the state for backtracking */
+            result = push_backtrack(state, op);
+            if (result != 0)
+                return cleanup(state, result);
+
+            state->backtrack_item->assert.slice_start = state->slice_start;
+            state->backtrack_item->assert.slice_end = state->slice_end;
+            state->backtrack_item->assert.text_ptr = state->text_ptr;
+            state->backtrack_item->assert.pattern_ptr = state->pattern_ptr;
+
+            /* push the marks */
+            result = push_marks(state);
+            if (result != 0)
+                return cleanup(state, result);
+
+            /* a lookaround can access the entire text */
+            state->slice_start = state->text_start;
+            state->slice_end = state->text_end;
+            state->text_ptr = ptr;
+
+            /* try the body */
+            state->pattern_ptr += 3;
+            break;
+        }
+        case SRE_OP_ASSERT_NOT:
+        {
+            /* assert not subpattern */
+            /* <ASSERT_NOT> <skip_to_end> <back> pattern <END_ASSERT_NOT> */
+            SRE_CHAR* ptr;
+            TRACE(("|%p|%p|ASSERT_NOT %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+
+            /* where does the lookaround start matching? */
+            ptr = (SRE_CHAR*)state->text_ptr - state->pattern_ptr[2];
+            if (ptr < (SRE_CHAR*)state->text_start)
+                /*
+                  the subpattern wants to start matching before the start of
+                  the text, so the negative lookbehind succeeds
+
+                  try the tail
+                 */
+                state->pattern_ptr += state->pattern_ptr[1] + 1;
+            else {
+                /* push the state for backtracking */
+                result = push_backtrack(state, op);
+                if (result != 0)
+                    return cleanup(state, result);
+
+                state->backtrack_item->assert.slice_start = state->slice_start;
+                state->backtrack_item->assert.slice_end = state->slice_end;
+                state->backtrack_item->assert.text_ptr = state->text_ptr;
+                state->backtrack_item->assert.pattern_ptr = state->pattern_ptr;
+
+                /* push the marks */
+                result = push_marks(state);
+                if (result != 0)
+                    return cleanup(state, result);
+
+                /* a lookaround can access the entire text */
+                state->slice_start = state->text_start;
+                state->slice_end = state->text_end;
+                state->text_ptr = ptr;
+
+                /* try the body */
+                state->pattern_ptr += 3;
+            }
+            break;
+        }
+        case SRE_OP_BEGINNING_LINE:
+            /* beginning of line */
+            /* <BEGINNING_LINE> */
+            TRACE(("|%p|%p|BEGINNING_LINE\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr > state->text_start &&
+              !encoding->in_category(SRE_CAT_LINEBREAK,
+              SRE_PCHAR(state->text_ptr, -1)))
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_BEGINNING_STRING:
+            /* beginning of string */
+            /* <BEGINNING_STRING> */
+            TRACE(("|%p|%p|BEGINNING_STRING\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr > state->text_start)
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_BOUNDARY:
+            /* boundary between word and non-word */
+            /* <BOUNDARY> */
+            TRACE(("|%p|%p|BOUNDARY\n", state->pattern_ptr, state->text_ptr));
+            if (!SRE_AT_BOUNDARY(state))
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_BRANCH:
+        {
+            /* alternation */
+            /* <BRANCH> [<skip> code <JUMP> <skip_to_tail>]+ <0> */
+            SRE_CODE* skip_ptr;
+            TRACE(("|%p|%p|BRANCH\n", state->pattern_ptr, state->text_ptr));
+
+            /* point to the skip of the first branch */
+            skip_ptr = state->pattern_ptr + 1;
+
+            /* look for a branch which could match */
+            while (!SRE_TRY_MATCH(state, skip_ptr + 1)) {
+                /* try the next branch */
+                skip_ptr += skip_ptr[0];
+                /* if there are no more branches then backtrack */
+                if (skip_ptr[0] == 0)
+                    goto backtrack;
+            }
+
+            /* we'll try this branch */
+            state->pattern_ptr = skip_ptr + 1;
+
+            /* point to the _next_ skip */
+            skip_ptr += skip_ptr[0];
+
+            /* if the skip isn't 0 then there's another branch */
+            if (skip_ptr[0] != 0)
+            {
+                /* push the state for backtracking */
+                result = push_backtrack(state, op);
+                if (result != 0)
+                    return cleanup(state, result);
+
+                state->backtrack_item->branch.text_ptr = state->text_ptr;
+                state->backtrack_item->branch.skip_ptr = skip_ptr;
+            }
+            break;
+        }
         case SRE_OP_CATEGORY:
-            /* match at given category */
+            /* character in category */
             /* <CATEGORY> <code> */
-            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
-                   ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_ANY:
-            /* match anything (except a newline) */
-            /* <ANY> */
-            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
-                RETURN_FAILURE;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_ANY_ALL:
-            /* match anything */
-            /* <ANY_ALL> */
-            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end)
-                RETURN_FAILURE;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_IN:
-            /* match set member (or non_member) */
-            /* <IN> <skip> <set> */
-            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
-                RETURN_FAILURE;
-            ctx->pattern += ctx->pattern[0];
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_LITERAL_IGNORE:
-            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
-                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
-            if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_NOT_LITERAL_IGNORE:
-            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
-                   ctx->pattern, ctx->ptr, *ctx->pattern));
-            if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
-                RETURN_FAILURE;
-            ctx->pattern++;
-            ctx->ptr++;
-            break;
-
-        case SRE_OP_IN_IGNORE:
-            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
-            if (ctx->ptr >= end
-                || !SRE_CHARSET(ctx->pattern+1,
-                                (SRE_CODE)state->lower(*ctx->ptr)))
-                RETURN_FAILURE;
-            ctx->pattern += ctx->pattern[0];
-            ctx->ptr++;
-            break;
-
+            TRACE(("|%p|%p|CATEGORY %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1]));
+            if (state->text_ptr >= state->slice_end ||
+              !encoding->in_category(state->pattern_ptr[1],
+              SRE_PCHAR(state->text_ptr, 0)))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 2;
+            break;
+        case SRE_OP_CHARSET:
+            /* character in charset */
+            /* <CHARSET> <skip_to_tail> <charset> */
+            TRACE(("|%p|%p|CHARSET\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr >= state->slice_end ||
+              !in_charset(SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_CHARSET_IGNORE:
+            /* character in charset, ignoring case */
+            /* <CHARSET_IGNORE> <skip_to_tail> <charset> */
+            TRACE(("|%p|%p|CHARSET_IGNORE\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr >= state->slice_end ||
+              !in_charset_ignore(state, SRE_PCHAR(state->text_ptr, 0),
+              state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_END_ASSERT:
+        {
+            /* assert subpattern */
+            /* <ASSERT> <skip> <back> pattern <END_ASSERT> */
+            TRACE(("|%p|%p|END_ASSERT\n", state->pattern_ptr, state->text_ptr));
+
+            /* discard any backtracking in the subpattern */
+            discard_backtrack_until(state, SRE_OP_ASSERT);
+
+            /* pop the marks */
+            pop_marks(state);
+
+            /* pop the state */
+            state->slice_start = state->backtrack_item->assert.slice_start;
+            state->slice_end = state->backtrack_item->assert.slice_end;
+            state->text_ptr = state->backtrack_item->assert.text_ptr;
+
+            discard_backtrack(state);
+
+            /*
+              the subpattern of the positive lookaround succeeded, so the
+              lookaround succeeded
+             */
+            ++state->pattern_ptr;
+            break;
+        }
+        case SRE_OP_END_ASSERT_NOT:
+        {
+            /* assert not subpattern */
+            /* <ASSERT_NOT> <skip> <back> pattern <END_ASSERT_NOT> */
+            TRACE(("|%p|%p|END_ASSERT_NOT\n", state->pattern_ptr,
+              state->text_ptr));
+
+            /* discard any backtracking in the subpattern */
+            discard_backtrack_until(state, SRE_OP_ASSERT_NOT);
+
+            /* pop the marks */
+            pop_marks(state);
+
+            /* pop the state */
+            state->slice_start = state->backtrack_item->assert.slice_start;
+            state->slice_end = state->backtrack_item->assert.slice_end;
+
+            discard_backtrack(state);
+
+            /*
+              the subpattern of the negative lookaround succeeded, so the
+              lookaround failed
+             */
+            goto backtrack;
+        }
+        case SRE_OP_END_LINE:
+            /* end of line */
+            /* <END_LINE> */
+            TRACE(("|%p|%p|END_LINE\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr < state->text_end &&
+              !encoding->in_category(SRE_CAT_LINEBREAK,
+              SRE_PCHAR(state->text_ptr, 0)))
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_END_MAX_REPEAT:
+        {
+            /* greedy repeat */
+            /*
+              <MAX_REPEAT> <skip_to_end> <min> <max> pattern <END_MAX_REPEAT>
+              <skip_to_start>
+             */
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body_ptr;
+            SRE_CODE* tail_ptr;
+            Py_ssize_t available;
+            unsigned int min_rep;
+            unsigned int max_rep;
+            BOOL consumed;
+            BOOL try_body;
+            BOOL try_tail;
+            SRE_BacktrackItem* outer_repeat;
+            SRE_BacktrackItem* inner_repeat;
+            TRACE(("|%p|%p|END_MAX_REPEAT\n", state->pattern_ptr,
+              state->text_ptr));
+
+            /* point to various parts of the pattern */
+            end_repeat_ptr = state->pattern_ptr;
+            repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            body_ptr = repeat_ptr + 4;
+            tail_ptr = end_repeat_ptr + 2;
+
+            /* how many characters are still available? */
+            available = (SRE_CHAR*)state->slice_end -
+              (SRE_CHAR*)state->text_ptr;
+
+            inner_repeat = current_repeat;
+            outer_repeat = inner_repeat->repeat.repeat;
+
+            /* just matched the body */
+            ++inner_repeat->repeat.count;
+
+            /*
+              normally we expect the body of the repeat to match at least one
+              character in each iteration, so that gives us an upper limit to
+              the number of repeats
+             */
+            min_rep = inner_repeat->repeat.min;
+            max_rep = unsigned_min(inner_repeat->repeat.max,
+              inner_repeat->repeat.count + available);
+
+            /*
+              trying the body takes precedence over trying the tail
+
+              if the body didn't consume any characters then it would just
+              match at the same position repeatedly until the maximum was
+              reached, at which point the tail would be tried
+
+              in such a case we can save work by going straight to trying the
+              tail
+             */
+            consumed = state->text_ptr != inner_repeat->repeat.start_ptr;
+            try_body = inner_repeat->repeat.count < max_rep && consumed &&
+              SRE_TRY_MATCH(state, body_ptr);
+            try_tail = (inner_repeat->repeat.count >= min_rep || !consumed) &&
+              SRE_TRY_MATCH(state, tail_ptr);
+
+            if (!try_body && !try_tail) {
+                /* neither the body nor the tail could match */
+                --inner_repeat->repeat.count;
+                goto backtrack;
+            }
+
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                      both the body and the tail could match, but the body
+                      takes precedence
+                     */
+
+                    /*
+                      push the state for matching the tail in case the body
+                      fails
+                     */
+                    result = push_backtrack(state, SRE_OP_END_MAX_REPEAT);
+                    if (result != 0)
+                        return cleanup(state, result);
+
+                    state->backtrack_item->repeat.start_ptr =
+                      inner_repeat->repeat.start_ptr;
+                    state->backtrack_item->repeat.text_ptr = state->text_ptr;
+                    state->backtrack_item->repeat.count =
+                      inner_repeat->repeat.count;
+                    state->backtrack_item->repeat.repeat = inner_repeat;
+                    state->backtrack_item->repeat.pattern_ptr = tail_ptr;
+                } else {
+                    /*
+                      if the body didn't push any backtracking then we dont'
+                      need to either
+                     */
+                    top_backtrack(state);
+                    if (state->backtrack_item->op != SRE_OP_END_MAX_REPEAT ||
+                      state->backtrack_item->repeat.repeat != inner_repeat) {
+                        /*
+                          push the state for backtracking in case the body fails
+
+                          if we have to backtrack then we need to restore the
+                          values of inner_repeat
+
+                          we indicate that we're just restoring values for
+                          backtracking into the body by setting the
+                          'pattern_ptr' member to NULL
+                         */
+                        result = push_backtrack(state, SRE_OP_END_MAX_REPEAT);
+                        if (result != 0)
+                            return cleanup(state, result);
+
+                        state->backtrack_item->repeat.start_ptr =
+                          inner_repeat->repeat.start_ptr;
+                        state->backtrack_item->repeat.count =
+                          inner_repeat->repeat.count - 1;
+                        state->backtrack_item->repeat.repeat = inner_repeat;
+                        state->backtrack_item->repeat.pattern_ptr = NULL;
+                    }
+                }
+
+                /* record where the body started */
+                inner_repeat->repeat.start_ptr = state->text_ptr;
+
+                /* try the body */
+                current_repeat = inner_repeat;
+                state->pattern_ptr = body_ptr;
+            } else {
+                /* only the tail could match */
+
+                /*
+                  push the state for backtracking in case the tail fails
+
+                  if we have to backtrack into the body then we need to restore
+                  current_repeat and the values of inner_repeat
+
+                  we indicate that we're just restoring values for backtracking
+                  into the body by setting the 'pattern_ptr' member to NULL
+                 */
+                result = push_backtrack(state, SRE_OP_END_MAX_REPEAT);
+                if (result != 0)
+                    return cleanup(state, result);
+
+                state->backtrack_item->repeat.start_ptr =
+                  inner_repeat->repeat.start_ptr;
+                state->backtrack_item->repeat.count =
+                  inner_repeat->repeat.count - 1;
+                state->backtrack_item->repeat.repeat = inner_repeat;
+                state->backtrack_item->repeat.pattern_ptr = NULL;
+
+                /* try the tail */
+                current_repeat = outer_repeat;
+                state->pattern_ptr = tail_ptr;
+            }
+            break;
+        }
+        case SRE_OP_END_MIN_REPEAT:
+        {
+            /* lazy repeat */
+            /*
+              <MIN_REPEAT> <skip_to_end> <min> <max> pattern <END_MIN_REPEAT>
+              <skip_to_start>
+             */
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body_ptr;
+            SRE_CODE* tail_ptr;
+            Py_ssize_t available;
+            unsigned int min_rep;
+            unsigned int max_rep;
+            BOOL consumed;
+            BOOL try_body;
+            BOOL try_tail;
+            SRE_BacktrackItem* outer_repeat;
+            SRE_BacktrackItem* inner_repeat;
+            TRACE(("|%p|%p|END_MIN_REPEAT\n", state->pattern_ptr,
+              state->text_ptr));
+
+            /* point to various parts of the pattern */
+            end_repeat_ptr = state->pattern_ptr;
+            repeat_ptr = end_repeat_ptr - end_repeat_ptr[1];
+            body_ptr = repeat_ptr + 4;
+            tail_ptr = end_repeat_ptr + 2;
+
+            /* how many characters are still available? */
+            available = (SRE_CHAR*)state->slice_end -
+              (SRE_CHAR*)state->text_ptr;
+
+            inner_repeat = current_repeat;
+            outer_repeat = inner_repeat->repeat.repeat;
+
+            /* just matched the body */
+            ++inner_repeat->repeat.count;
+
+            /*
+              normally we expect the body of the repeat to match at least one
+              character in each iteration, so that gives us an upper limit to
+              the number of repeats
+             */
+            min_rep = inner_repeat->repeat.min;
+            max_rep = unsigned_min(inner_repeat->repeat.max,
+              inner_repeat->repeat.count + available);
+
+            /*
+              trying the tail takes precedence over trying the body
+
+              if the body didn't consume any characters then the tail would
+              just match at the same position it has already matched and
+              failed, and then the body would be matched again, and this would
+              repeat until the maximum was reached, at which point we would
+              backtrack
+
+              in such a case we can save work by just backtracking
+             */
+            consumed = state->text_ptr != inner_repeat->repeat.start_ptr;
+            try_body = inner_repeat->repeat.count < max_rep && consumed &&
+              SRE_TRY_MATCH(state, body_ptr);
+            try_tail = (inner_repeat->repeat.count >= min_rep || !consumed) &&
+              SRE_TRY_MATCH(state, tail_ptr);
+
+            if (!try_body && !try_tail) {
+                /* neither the body nor the tail could match */
+                --inner_repeat->repeat.count;
+                goto backtrack;
+            }
+
+            if (try_tail) {
+                if (try_body) {
+                    /*
+                      both the body and the tail could match, but the tail
+                      takes precedence
+                     */
+
+                    /*
+                      push the state for matching the body in case the tail
+                      fails
+                     */
+                    result = push_backtrack(state, SRE_OP_END_MIN_REPEAT);
+                    if (result != 0)
+                        return cleanup(state, result);
+
+                    state->backtrack_item->repeat.start_ptr =
+                      inner_repeat->repeat.start_ptr;
+                    state->backtrack_item->repeat.text_ptr = state->text_ptr;
+                    state->backtrack_item->repeat.count =
+                      inner_repeat->repeat.count;
+                    state->backtrack_item->repeat.repeat = inner_repeat;
+                    state->backtrack_item->repeat.pattern_ptr = body_ptr;
+
+                    /* record where the body started */
+                    inner_repeat->repeat.start_ptr = state->text_ptr;
+                }
+
+                /* try the tail */
+                current_repeat = outer_repeat;
+                state->pattern_ptr = tail_ptr;
+            } else {
+                /* only the body could match */
+
+                /* record where the body started */
+                inner_repeat->repeat.start_ptr = state->text_ptr;
+
+                /* try the body */
+                current_repeat = inner_repeat;
+                state->pattern_ptr = body_ptr;
+            }
+            break;
+        }
+        case SRE_OP_END_STRING:
+            /* end of string */
+            /* <END_STRING> */
+            TRACE(("|%p|%p|END_STRING\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr < state->text_end)
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_END_STRING_LINE:
+            /* end of string or final linebreak at end of string */
+            /* <END_STRING_LINE> */
+            TRACE(("|%p|%p|END_STRING_LINE\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr < state->text_end && state->text_ptr !=
+              state->final_linebreak)
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_GROUPREF:
+        {
+            /* group reference */
+            /* <GROUPREF> <group_id> */
+            SRE_CODE id;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|GROUPREF %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1]));
+
+            /* is the group defined? */
+            id = state->pattern_ptr[1];
+            group_start = state->mark[id * 2];
+            group_end = state->mark[id * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+
+            /* could the group be duplicated here? */
+            length = group_end - group_start;
+            if ((SRE_CHAR*)state->slice_end - (SRE_CHAR*)state->text_ptr <
+              length)
+                goto backtrack;
+
+            /* is the group duplicated here? */
+            for (i = 0; i < length; i ++) {
+                if (SRE_PCHAR(state->text_ptr, i) != group_start[i])
+                    goto backtrack;
+            }
+
+            /* step over the matched group */
+            (SRE_CHAR*)state->text_ptr += length;
+            state->pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_GROUPREF_EXISTS:
+        {
+            /* conditional on group existence */
+            /*
+              <GROUPREF_EXISTS> <group_id> <skip_to_codeno> codeyes [<JUMP>
+              <skip_to_end> codeno] ...
+             */
+            SRE_CODE id;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+
+            /* is the group defined? */
+            id = state->pattern_ptr[1];
+            group_start = state->mark[id * 2];
+            group_end = state->mark[id * 2 + 1];
+            if (group_start != NULL && group_start <= group_end)
+                /* the group exists, so skip to codeyes */
+                state->pattern_ptr += 3;
+            else
+                /* the group doesn't exist, so skip to codeno */
+                state->pattern_ptr += state->pattern_ptr[2];
+            break;
+        }
+        case SRE_OP_GROUPREF_IGNORE:
+        {
+            /* group reference, ignoring case */
+            /* <GROUPREF_IGNORE> <group_id> */
+            SRE_CODE id;
+            SRE_CHAR* group_start;
+            SRE_CHAR* group_end;
+            Py_ssize_t length;
+            Py_ssize_t i;
+            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+
+            /* is the group defined? */
+            id = state->pattern_ptr[1];
+            group_start = state->mark[id * 2];
+            group_end = state->mark[id * 2 + 1];
+            if (group_start == NULL || group_start > group_end)
+                goto backtrack;
+
+            /* could the group be duplicated here? */
+            length = group_end - group_start;
+            if ((SRE_CHAR*)state->slice_end - (SRE_CHAR*)state->text_ptr <
+              length)
+                goto backtrack;
+
+            /* is the group duplicated here? */
+            for (i = 0; i < length; i ++) {
+                if (!same_char_ignore(state, SRE_PCHAR(state->text_ptr, i),
+                  group_start[i]))
+                    goto backtrack;
+            }
+
+            /* step over the matched group */
+            (SRE_CHAR*)state->text_ptr += length;
+            state->pattern_ptr += 2;
+            break;
+        }
         case SRE_OP_JUMP:
-        case SRE_OP_INFO:
             /* jump forward */
             /* <JUMP> <offset> */
-            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_BRANCH:
-            /* alternation */
-            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
-            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
-            LASTMARK_SAVE();
-            ctx->u.rep = state->repeat;
-            if (ctx->u.rep)
-                MARK_PUSH(ctx->lastmark);
-            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
-                if (ctx->pattern[1] == SRE_OP_LITERAL &&
-                    (ctx->ptr >= end ||
-                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
-                    continue;
-                if (ctx->pattern[1] == SRE_OP_IN &&
-                    (ctx->ptr >= end ||
-                     !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
-                    continue;
-                state->ptr = ctx->ptr;
-                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
-                if (ret) {
-                    if (ctx->u.rep)
-                        MARK_POP_DISCARD(ctx->lastmark);
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
+            TRACE(("|%p|%p|JUMP %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1]));
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_LITERAL:
+            /* this character */
+            /* <LITERAL> <code> */
+            TRACE(("|%p|%p|LITERAL %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1]));
+            if (state->text_ptr >= state->slice_end ||
+              (SRE_CODE)SRE_PCHAR(state->text_ptr, 0) != state->pattern_ptr[1])
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 2;
+            break;
+        case SRE_OP_LITERAL_IGNORE:
+            /* this character, ignoring case */
+            /* <LITERAL_IGNORE> <code> */
+            TRACE(("|%p|%p|LITERAL_IGNORE %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+            if (state->text_ptr >= state->slice_end || !same_char_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr[1]))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 2;
+            break;
+        case SRE_OP_MARK:
+        {
+            /* set mark */
+            /* <MARK> <mark_id> */
+            SRE_CODE id;
+            TRACE(("|%p|%p|MARK %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1]));
+
+            /* push the state */
+            result = push_backtrack(state, op);
+            if (result != 0)
+                return cleanup(state, result);
+
+            id = state->pattern_ptr[1];
+            state->backtrack_item->mark.id = id;
+            state->backtrack_item->mark.text_ptr = state->mark[id];
+            state->backtrack_item->mark.last_index = state->lastindex;
+
+            /* set the mark */
+            state->mark[id] = state->text_ptr;
+            state->lastindex = id;
+
+            state->pattern_ptr += 2;
+            break;
+        }
+        case SRE_OP_MAX_REPEAT:
+        {
+            /* greedy repeat */
+            /*
+              <MAX_REPEAT> <skip_to_end> <min> <max> pattern <END_MAX_REPEAT>
+              <skip_to_start>
+             */
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body_ptr;
+            SRE_CODE* tail_ptr;
+            Py_ssize_t available;
+            unsigned int min_rep;
+            unsigned int max_rep;
+            BOOL try_body;
+            BOOL try_tail;
+            SRE_BacktrackItem* outer_repeat;
+            SRE_BacktrackItem* inner_repeat;
+            TRACE(("|%p|%p|MAX_REPEAT %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[2], state->pattern_ptr[3]));
+
+            /* point to various parts of the pattern */
+            repeat_ptr = state->pattern_ptr;
+            end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            body_ptr = repeat_ptr + 4;
+            tail_ptr = end_repeat_ptr + 2;
+
+            /* how many characters are still available? */
+            available = (SRE_CHAR*)state->slice_end -
+              (SRE_CHAR*)state->text_ptr;
+
+            /* number of repeats */
+            min_rep = repeat_ptr[2];
+            if (repeat_ptr[3] == SRE_MAXREPEAT)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+            if (min_rep > max_rep)
+                goto backtrack;
+
+            try_body = max_rep > 0 && SRE_TRY_MATCH(state, body_ptr);
+            try_tail = min_rep == 0 && SRE_TRY_MATCH(state, tail_ptr);
+
+            if (!try_body && !try_tail)
+                /* neither the body nor the tail could match */
+                goto backtrack;
+
+            outer_repeat = current_repeat;
+
+            /*
+              push the state in case the repeat fails
+
+              we only need to restore enough to backtrack after a failed
+              repeat, but we'll also store the repeat info here
+             */
+            result = push_backtrack(state, SRE_OP_MAX_REPEAT);
+            if (result != 0)
+                return cleanup(state, result);
+
+            state->backtrack_item->repeat.start_ptr = state->text_ptr;
+            state->backtrack_item->repeat.min = min_rep;
+            state->backtrack_item->repeat.max = max_rep;
+            state->backtrack_item->repeat.count = 0;
+            state->backtrack_item->repeat.repeat = outer_repeat;
+
+            inner_repeat = state->backtrack_item;
+
+            if (try_body) {
+                if (try_tail) {
+                    /*
+                      both the body and the tail could match, but the body
+                      takes precedence
+                     */
+
+                    /*
+                      push the state for matching the tail in case the body
+                      fails
+                     */
+                    result = push_backtrack(state, SRE_OP_END_MAX_REPEAT);
+                    if (result != 0)
+                        return cleanup(state, result);
+
+                    state->backtrack_item->repeat.start_ptr =
+                      inner_repeat->repeat.start_ptr;
+                    state->backtrack_item->repeat.text_ptr = state->text_ptr;
+                    state->backtrack_item->repeat.count =
+                      inner_repeat->repeat.count;
+                    state->backtrack_item->repeat.repeat = inner_repeat;
+                    state->backtrack_item->repeat.pattern_ptr = tail_ptr;
                 }
-                if (ctx->u.rep)
-                    MARK_POP_KEEP(ctx->lastmark);
-                LASTMARK_RESTORE();
+
+                /* try the body */
+                current_repeat = inner_repeat;
+                state->pattern_ptr = body_ptr;
+            } else {
+                /* only the tail could match, so try it */
+                current_repeat = outer_repeat;
+                state->pattern_ptr = tail_ptr;
             }
-            if (ctx->u.rep)
-                MARK_POP_DISCARD(ctx->lastmark);
-            RETURN_FAILURE;
-
-        case SRE_OP_REPEAT_ONE:
-            /* match repeated sequence (maximizing regexp) */
-
-            /* this operator only works if the repeated item is
-               exactly one character wide, and we're not already
-               collecting backtracking points.  for other cases,
-               use the MAX_REPEAT operator */
-
-            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
-
-            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            if (ctx->ptr + ctx->pattern[1] > end)
-                RETURN_FAILURE; /* cannot match */
-
-            state->ptr = ctx->ptr;
-
-            ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
-            RETURN_ON_ERROR(ret);
-            DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-            ctx->count = ret;
-            ctx->ptr += ctx->count;
-
-            /* when we arrive here, count contains the number of
-               matches, and ctx->ptr points to the tail of the target
-               string.  check if the rest of the pattern matches,
-               and backtrack if not. */
-
-            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
-                RETURN_FAILURE;
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
-                /* tail is empty.  we're finished */
-                state->ptr = ctx->ptr;
-                RETURN_SUCCESS;
+            break;
+        }
+        case SRE_OP_MAX_REPEAT_ONE:
+        {
+            /* greedy repeat */
+            /* <MAX_REPEAT_ONE> <skip_to_tail> <min> <max> character_pattern */
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* body_ptr;
+            SRE_CODE* tail_ptr;
+            Py_ssize_t available;
+            unsigned int min_rep;
+            unsigned int max_rep;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* max_ptr;
+            SRE_CHAR* min_ptr;
+            TRACE(("|%p|%p|MAX_REPEAT_ONE %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[2], state->pattern_ptr[3]));
+
+            /* point to various parts of the pattern */
+            repeat_ptr = state->pattern_ptr;
+            body_ptr = repeat_ptr + 4;
+            tail_ptr = repeat_ptr + repeat_ptr[1];
+
+            /* how many characters are still available? */
+            available = (SRE_CHAR*)state->slice_end -
+              (SRE_CHAR*)state->text_ptr;
+
+            /* number of repeats */
+            min_rep = repeat_ptr[2];
+            if (repeat_ptr[3] == SRE_MAXREPEAT)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+            if (min_rep > max_rep)
+                goto backtrack;
+
+            /* match up to the maximum */
+            start_ptr = state->text_ptr;
+            max_ptr = start_ptr + max_rep;
+            while ((SRE_CHAR*)state->text_ptr < max_ptr && SRE_TRY_MATCH(state,
+              body_ptr))
+                ++(SRE_CHAR*)state->text_ptr;
+
+            /* match down to the minimum until the tail could match */
+            min_ptr = start_ptr + min_rep;
+            while ((SRE_CHAR*)state->text_ptr >= min_ptr &&
+              !SRE_TRY_MATCH(state, tail_ptr))
+                --(SRE_CHAR*)state->text_ptr;
+
+            /* did it match at least the minimum? */
+            if ((SRE_CHAR*)state->text_ptr < min_ptr)
+                goto backtrack;
+
+             /*
+               push the state in case the tail fails
+
+               if it matched only the minimum then we can avoid pushing the
+               state because there's nothing we'd need to restore for
+               backtracking
+              */
+            if ((SRE_CHAR*)state->text_ptr > min_ptr) {
+                result = push_backtrack(state, op);
+                if (result != 0)
+                    return cleanup(state, result);
+
+                state->backtrack_item->repeat.start_ptr = start_ptr;
+                state->backtrack_item->repeat.text_ptr = state->text_ptr;
+                state->backtrack_item->repeat.min = min_rep;
+                state->backtrack_item->repeat.max = max_rep;
+                state->backtrack_item->repeat.repeat = current_repeat;
+                state->backtrack_item->repeat.pattern_ptr = repeat_ptr;
             }
 
-            LASTMARK_SAVE();
-
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
-                /* tail starts with a literal. skip positions where
-                   the rest of the pattern cannot possibly match */
-                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
-                for (;;) {
-                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
-                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
-                        ctx->ptr--;
-                        ctx->count--;
-                    }
-                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
-                        break;
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-
-                    LASTMARK_RESTORE();
-
-                    ctx->ptr--;
-                    ctx->count--;
+            /* try the tail */
+            state->pattern_ptr = tail_ptr;
+            break;
+        }
+        case SRE_OP_MIN_REPEAT:
+        {
+            /* lazy repeat */
+            /*
+              <MIN_REPEAT> <skip_to_end> <min> <max> pattern <END_MIN_REPEAT>
+              <skip_to_start>
+             */
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* end_repeat_ptr;
+            SRE_CODE* body_ptr;
+            SRE_CODE* tail_ptr;
+            Py_ssize_t available;
+            unsigned int min_rep;
+            unsigned int max_rep;
+            BOOL try_body;
+            BOOL try_tail;
+            SRE_BacktrackItem* outer_repeat;
+            SRE_BacktrackItem* inner_repeat;
+            TRACE(("|%p|%p|MIN_REPEAT %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[2], state->pattern_ptr[3]));
+
+            /* point to various parts of the pattern */
+            repeat_ptr = state->pattern_ptr;
+            end_repeat_ptr = repeat_ptr + repeat_ptr[1];
+            body_ptr = repeat_ptr + 4;
+            tail_ptr = end_repeat_ptr + 2;
+
+            /* how many characters are still available? */
+            available = (SRE_CHAR*)state->slice_end -
+              (SRE_CHAR*)state->text_ptr;
+
+            /* number of repeats */
+            min_rep = repeat_ptr[2];
+            if (repeat_ptr[3] == SRE_MAXREPEAT)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+            if (min_rep > max_rep)
+                goto backtrack;
+
+            try_body = max_rep > 0 && SRE_TRY_MATCH(state, body_ptr);
+            try_tail = min_rep == 0 && SRE_TRY_MATCH(state, tail_ptr);
+
+            if (!try_body && !try_tail)
+                /* neither the body nor the tail could match */
+                goto backtrack;
+
+            outer_repeat = current_repeat;
+
+            /* push the state in case the repeat fails */
+            result = push_backtrack(state, SRE_OP_MIN_REPEAT);
+            if (result != 0)
+                return cleanup(state, result);
+
+            state->backtrack_item->repeat.start_ptr = state->text_ptr;
+            state->backtrack_item->repeat.min = min_rep;
+            state->backtrack_item->repeat.max = max_rep;
+            state->backtrack_item->repeat.count = 0;
+            state->backtrack_item->repeat.repeat = outer_repeat;
+
+            inner_repeat = state->backtrack_item;
+
+            if (try_tail) {
+                if (try_body) {
+                    /*
+                      both the body and the tail could match, but the tail
+                      takes precedence
+                     */
+
+                    /*
+                      push the state for matching the body in case the tail
+                      fails
+                     */
+                    result = push_backtrack(state, SRE_OP_END_MIN_REPEAT);
+                    if (result != 0)
+                        return cleanup(state, result);
+
+                    state->backtrack_item->repeat.start_ptr =
+                      inner_repeat->repeat.start_ptr;
+                    state->backtrack_item->repeat.text_ptr = state->text_ptr;
+                    state->backtrack_item->repeat.count =
+                      inner_repeat->repeat.count;
+                    state->backtrack_item->repeat.repeat = inner_repeat;
+                    state->backtrack_item->repeat.pattern_ptr = body_ptr;
                 }
 
+                /* try the tail */
+                current_repeat = outer_repeat;
+                state->pattern_ptr = tail_ptr;
             } else {
-                /* general case */
-                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-                    ctx->ptr--;
-                    ctx->count--;
-                    LASTMARK_RESTORE();
-                }
+                /* only the body could match, so try it */
+                current_repeat = inner_repeat;
+                state->pattern_ptr = body_ptr;
             }
-            RETURN_FAILURE;
-
+            break;
+        }
         case SRE_OP_MIN_REPEAT_ONE:
-            /* match repeated sequence (minimizing regexp) */
-
-            /* this operator only works if the repeated item is
-               exactly one character wide, and we're not already
-               collecting backtracking points.  for other cases,
-               use the MIN_REPEAT operator */
-
-            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
-
-            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            if (ctx->ptr + ctx->pattern[1] > end)
-                RETURN_FAILURE; /* cannot match */
-
-            state->ptr = ctx->ptr;
-
-            if (ctx->pattern[1] == 0)
-                ctx->count = 0;
-            else {
-                /* count using pattern min as the maximum */
-                ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
-                RETURN_ON_ERROR(ret);
-                DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-                if (ret < (Py_ssize_t) ctx->pattern[1])
-                    /* didn't match minimum number of times */
-                    RETURN_FAILURE;
-                /* advance past minimum matches of repeat */
-                ctx->count = ret;
-                ctx->ptr += ctx->count;
+        {
+            /* lazy repeat */
+            /* <MIN_REPEAT_ONE> <skip_to_tail> <min> <max> character_pattern */
+            SRE_CODE* repeat_ptr;
+            SRE_CODE* body_ptr;
+            SRE_CODE* tail_ptr;
+            Py_ssize_t available;
+            unsigned int min_rep;
+            unsigned int max_rep;
+            SRE_CHAR* start_ptr;
+            SRE_CHAR* min_ptr;
+            SRE_CHAR* max_ptr;
+            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[2], state->pattern_ptr[3]));
+
+            /* point to various parts of the pattern */
+            repeat_ptr = state->pattern_ptr;
+            body_ptr = repeat_ptr + 4;
+            tail_ptr = repeat_ptr + repeat_ptr[1];
+
+            /* how many characters are still available? */
+            available = (SRE_CHAR*)state->slice_end -
+              (SRE_CHAR*)state->text_ptr;
+
+            /* number of repeats */
+            min_rep = repeat_ptr[2];
+            if (repeat_ptr[3] == SRE_MAXREPEAT)
+                max_rep = available;
+            else
+                max_rep = unsigned_min(repeat_ptr[3], available);
+            if (min_rep > max_rep)
+                goto backtrack;
+
+            /* match up to the minimum */
+            start_ptr = state->text_ptr;
+            min_ptr = start_ptr + min_rep;
+            while ((SRE_CHAR*)state->text_ptr < min_ptr && SRE_TRY_MATCH(state,
+              body_ptr))
+                ++(SRE_CHAR*)state->text_ptr;
+
+            /* did it match at least the minimum? */
+            if ((SRE_CHAR*)state->text_ptr < min_ptr)
+                goto backtrack;
+
+            /* match up to the maximum until the tail could match */
+            max_ptr = start_ptr + max_rep;
+            while ((SRE_CHAR*)state->text_ptr < max_ptr &&
+              !SRE_TRY_MATCH(state, tail_ptr)) {
+                /* the tail hasn't matched yet */
+                if (!SRE_TRY_MATCH(state, body_ptr))
+                    /* the body failed to match, so backtrack */
+                    goto backtrack;
+                ++(SRE_CHAR*)state->text_ptr;
             }
 
-            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
-                /* tail is empty.  we're finished */
-                state->ptr = ctx->ptr;
-                RETURN_SUCCESS;
-
-            } else {
-                /* general case */
-                LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
-                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
-                    state->ptr = ctx->ptr;
-                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
-                            ctx->pattern+ctx->pattern[0]);
-                    if (ret) {
-                        RETURN_ON_ERROR(ret);
-                        RETURN_SUCCESS;
-                    }
-                    state->ptr = ctx->ptr;
-                    ret = SRE_COUNT(state, ctx->pattern+3, 1);
-                    RETURN_ON_ERROR(ret);
-                    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-                    if (ret == 0)
-                        break;
-                    assert(ret == 1);
-                    ctx->ptr++;
-                    ctx->count++;
-                    LASTMARK_RESTORE();
-                }
+            /*
+              push the state in case the tail fails
+
+              if it actually matched the maximum then we can avoid pushing the
+              state because there's nothing we'd need to restore for
+              backtracking
+             */
+            if ((SRE_CHAR*)state->text_ptr < max_ptr) {
+                result = push_backtrack(state, op);
+                if (result != 0)
+                    return cleanup(state, result);
+
+                state->backtrack_item->repeat.start_ptr = start_ptr;
+                state->backtrack_item->repeat.text_ptr = state->text_ptr;
+                state->backtrack_item->repeat.min = min_rep;
+                state->backtrack_item->repeat.max = max_rep;
+                state->backtrack_item->repeat.repeat = current_repeat;
+                state->backtrack_item->repeat.pattern_ptr = repeat_ptr;
             }
-            RETURN_FAILURE;
-
-        case SRE_OP_REPEAT:
-            /* create repeat context.  all the hard work is done
-               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
-            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
-            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[1], ctx->pattern[2]));
-
-            /* install new repeat context */
-            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
-            if (!ctx->u.rep) {
-                PyErr_NoMemory();
-                RETURN_FAILURE;
+
+            /* try the tail */
+            state->pattern_ptr = tail_ptr;
+            break;
+        }
+        case SRE_OP_NOT_BOUNDARY:
+            /* boundary between word and non-word */
+            /* <NOT_BOUNDARY> */
+            TRACE(("|%p|%p|NOT_BOUNDARY\n", state->pattern_ptr,
+              state->text_ptr));
+            if (SRE_AT_BOUNDARY(state))
+                goto backtrack;
+
+            ++state->pattern_ptr;
+            break;
+        case SRE_OP_NOT_CATEGORY:
+            /* character not in category */
+            /* <NOT_CATEGORY> <code> */
+            TRACE(("|%p|%p|NOT_CATEGORY %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+            if (state->text_ptr >= state->slice_end ||
+              encoding->in_category(state->pattern_ptr[1],
+              SRE_PCHAR(state->text_ptr, 0)))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_CHARSET:
+            /* character not in charset */
+            /* <NOT_CHARSET> <skip_to_tail> <charset> */
+            TRACE(("|%p|%p|NOT_CHARSET\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr >= state->slice_end ||
+              in_charset(SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_CHARSET_IGNORE:
+            /* character not in charset, ignoring case */
+            /* <NOT_CHARSET_IGNORE> <skip_to_tail> <charset> */
+            TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr >= state->slice_end || in_charset_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_LITERAL:
+            /* any character except this */
+            /* <NOT_LITERAL> <code> */
+            TRACE(("|%p|%p|NOT_LITERAL %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+            if (state->text_ptr >= state->slice_end ||
+              (SRE_CODE)SRE_PCHAR(state->text_ptr, 0) == state->pattern_ptr[1])
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_LITERAL_IGNORE:
+            /* any character except this, ignoring case */
+            /* <NOT_LITERAL_IGNORE> <code> */
+            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1]));
+            if (state->text_ptr >= state->slice_end || same_char_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr[1]))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 2;
+            break;
+        case SRE_OP_NOT_RANGE:
+            /* character not in range */
+            /* <NOT_RANGE> <min> <max> */
+            TRACE(("|%p|%p|NOT_RANGE %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1], state->pattern_ptr[2]));
+            if (state->text_ptr >= state->slice_end ||
+              in_range(SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr[1],
+              state->pattern_ptr[2]))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_RANGE_IGNORE:
+            /* character not in range, ignoring case */
+            /* <NOT_RANGE_IGNORE> <min> <max> */
+            TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1], state->pattern_ptr[2]));
+            if (state->text_ptr >= state->slice_end || in_range_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr[1],
+              state->pattern_ptr[2]))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 3;
+            break;
+        case SRE_OP_NOT_SET:
+            /* character not in set */
+            /* <NOT_SET> <skip_to_tail> <set> */
+            TRACE(("|%p|%p|NOT_SET\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr >= state->slice_end || in_set(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_NOT_SET_IGNORE:
+            /* character not in set, ignoring case */
+            /* <NOT_SET_IGNORE> <skip_to_tail> <set> */
+            TRACE(("|%p|%p|NOT_SET_IGNORE\n", state->pattern_ptr,
+              state->text_ptr));
+            if (state->text_ptr >= state->slice_end || in_set_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_RANGE:
+            /* character in range */
+            /* <RANGE> <min> <max> */
+            TRACE(("|%p|%p|RANGE %d %d\n", state->pattern_ptr, state->text_ptr,
+              state->pattern_ptr[1], state->pattern_ptr[2]));
+            if (state->text_ptr >= state->slice_end ||
+              !in_range(SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr[1],
+              state->pattern_ptr[2]))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 3;
+            break;
+        case SRE_OP_RANGE_IGNORE:
+            /* character in range, ignoring case */
+            /* <RANGE_IGNORE> <min> <max> */
+            TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", state->pattern_ptr,
+              state->text_ptr, state->pattern_ptr[1], state->pattern_ptr[2]));
+            if (state->text_ptr >= state->slice_end || !in_range_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr[1],
+              state->pattern_ptr[2]))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += 3;
+            break;
+        case SRE_OP_SET:
+            /* character not in set */
+            /* <SET> <skip_to_tail> <set> */
+            TRACE(("|%p|%p|SET\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr >= state->slice_end || !in_set(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        case SRE_OP_SET_IGNORE:
+            /* character not in set, ignoring case */
+            /* <SET_IGNORE> <skip_to_tail> <set> */
+            TRACE(("|%p|%p|SET_IGNORE\n", state->pattern_ptr, state->text_ptr));
+            if (state->text_ptr >= state->slice_end || !in_set_ignore(state,
+              SRE_PCHAR(state->text_ptr, 0), state->pattern_ptr))
+                goto backtrack;
+
+            ++(SRE_CHAR*)state->text_ptr;
+            state->pattern_ptr += state->pattern_ptr[1];
+            break;
+        default:
+            TRACE(("|%p|%p|UNKNOWN %d\n", state->pattern_ptr, state->text_ptr,
+              op));
+            return cleanup(state, SRE_ERROR_ILLEGAL);
+        }
+    }
+
+backtrack:
+    /* need to backtrack */
+    TRACE(("BACKTRACK "));
+    top_backtrack(state);
+
+    op = state->backtrack_item->op;
+    switch (op) {
+    case SRE_OP_FAILURE:
+        /* pattern failed to match */
+        TRACE(("FAILURE\n"));
+        return cleanup(state, 0);
+    case SRE_OP_ASSERT:
+    {
+        /* assert subpattern */
+        /* <ASSERT> <skip_to_end> <back> pattern <END_ASSERT> */
+        TRACE(("ASSERT\n"));
+        /* the marks have already been popped by backtracking */
+
+        /* pop the state */
+        state->slice_start = state->backtrack_item->assert.slice_start;
+        state->slice_end = state->backtrack_item->assert.slice_end;
+
+        discard_backtrack(state);
+
+        /*
+          the subpattern of the positive lookaround failed, so the lookaround
+          failed
+         */
+        goto backtrack;
+    }
+    case SRE_OP_ASSERT_NOT:
+    {
+        /* assert not subpattern */
+        /* <ASSERT_NOT> <skip_to_end> <back> pattern <END_ASSERT_NOT> */
+        TRACE(("|%p|%p|ASSERT_NOT %d\n", state->pattern_ptr, state->text_ptr,
+          state->pattern_ptr[1]));
+        /* the marks have already been popped by backtracking */
+
+        /* pop the state */
+        state->slice_start = state->backtrack_item->assert.slice_start;
+        state->slice_end = state->backtrack_item->assert.slice_end;
+        state->text_ptr = state->backtrack_item->assert.text_ptr;
+        state->pattern_ptr = state->backtrack_item->assert.pattern_ptr;
+        state->pattern_ptr += state->pattern_ptr[1] + 1;
+
+        discard_backtrack(state);
+
+        /*
+          the subpattern of the negative lookaround failed, so the lookaround
+          succeeded
+         */
+        goto advance;
+    }
+    case SRE_OP_BRANCH:
+    {
+        /* alternation */
+        /* <BRANCH> [<skip> code <JUMP> <skip_to_tail>]+ <0> */
+        SRE_CODE* skip_ptr;
+        TRACE(("BRANCH\n"));
+
+        /* pop the state and point to the next branch */
+        state->text_ptr = state->backtrack_item->branch.text_ptr;
+        skip_ptr = state->backtrack_item->branch.skip_ptr;
+
+        /* look for a branch which could match */
+        while (!SRE_TRY_MATCH(state, skip_ptr + 1)) {
+            /* try the next branch */
+            skip_ptr += skip_ptr[0];
+            /* if there are no more branches then backtrack */
+            if (skip_ptr[0] == 0) {
+                discard_backtrack(state);
+                goto backtrack;
             }
-            ctx->u.rep->count = -1;
-            ctx->u.rep->pattern = ctx->pattern;
-            ctx->u.rep->prev = state->repeat;
-            ctx->u.rep->last_ptr = NULL;
-            state->repeat = ctx->u.rep;
-
-            state->ptr = ctx->ptr;
-            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
-            state->repeat = ctx->u.rep->prev;
-            PyObject_FREE(ctx->u.rep);
-
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
+        }
+
+        /* we'll try this branch */
+        state->pattern_ptr = skip_ptr + 1;
+
+        /* point to the _next_ skip */
+        skip_ptr += skip_ptr[0];
+
+        /* if the skip isn't 0 then there's another branch */
+        if (skip_ptr[0] != 0)
+            /* there's another branch, so save it */
+            state->backtrack_item->branch.skip_ptr = skip_ptr;
+        else
+            /* there isn't another branch to save */
+            discard_backtrack(state);
+
+        goto advance;
+    }
+    case SRE_OP_END_MAX_REPEAT:
+    {
+        SRE_BacktrackItem* inner_repeat;
+        TRACE(("END_MAX_REPEAT\n"));
+        /* greedy repeat */
+        /*
+          <MAX_REPEAT> <skip_to_end> <min> <max> pattern <END_MAX_REPEAT>
+          <skip_to_start>
+         */
+        inner_repeat = state->backtrack_item->repeat.repeat;
+        inner_repeat->repeat.start_ptr =
+          state->backtrack_item->repeat.start_ptr;
+        inner_repeat->repeat.count = state->backtrack_item->repeat.count;
+
+        if (state->backtrack_item->repeat.pattern_ptr != NULL) {
+            /* trying the tail */
+            SRE_BacktrackItem* outer_repeat;
+
+            outer_repeat = inner_repeat->repeat.repeat;
+
+            /* pop the state */
+            state->text_ptr = state->backtrack_item->repeat.text_ptr;
+            state->pattern_ptr = state->backtrack_item->repeat.pattern_ptr;
+
+            /*
+              if the tail fails then we'll need to backtrack into the body, so
+              fix the backtrack record to do that
+             */
+            state->backtrack_item->repeat.pattern_ptr = NULL;
+            --inner_repeat->repeat.count;
+
+            /* try the tail */
+            current_repeat = outer_repeat;
+            goto advance;
+        }
+        else
+        {
+            /* backtrack into the body */
+            discard_backtrack(state);
+
+            current_repeat = inner_repeat;
+            goto backtrack;
+        }
+    }
+    case SRE_OP_END_MIN_REPEAT:
+    {
+        SRE_BacktrackItem* inner_repeat;
+        TRACE(("END_MIN_REPEAT\n"));
+        /* lazy repeat */
+        /*
+          <MIN_REPEAT> <skip_to_end> <min> <max> pattern <END_MIN_REPEAT>
+          <skip_to_start>
+         */
+        inner_repeat = state->backtrack_item->repeat.repeat;
+
+        /* pop the state */
+        inner_repeat->repeat.start_ptr =
+          state->backtrack_item->repeat.start_ptr;
+        state->text_ptr = state->backtrack_item->repeat.text_ptr;
+        inner_repeat->repeat.count = state->backtrack_item->repeat.count;
+        state->pattern_ptr = state->backtrack_item->repeat.pattern_ptr;
+
+        discard_backtrack(state);
+
+        /* try the body */
+        current_repeat = inner_repeat;
+        goto advance;
+    }
+    case SRE_OP_MARK:
+        /* set mark */
+        /* <MARK> <mark_id> */
+        TRACE(("MARK\n"));
+
+        /* pop the mark */
+        state->mark[state->backtrack_item->mark.id] =
+          state->backtrack_item->mark.text_ptr;
+        state->lastindex = state->backtrack_item->mark.last_index;
+
+        discard_backtrack(state);
+
+        goto backtrack;
+    case SRE_OP_MAX_REPEAT:
+    {
+        SRE_BacktrackItem* outer_repeat;
+        TRACE(("MAX_REPEAT\n"));
+        /* greedy repeat */
+        /*
+          <MAX_REPEAT> <skip_to_end> <min> <max> pattern <END_MAX_REPEAT>
+          <skip_to_start>
+         */
+        outer_repeat = state->backtrack_item->repeat.repeat;
+
+        discard_backtrack(state);
+
+        current_repeat = outer_repeat;
+        goto backtrack;
+    }
+    case SRE_OP_MAX_REPEAT_ONE:
+    {
+        /* greedy repeat */
+        /* <MAX_REPEAT_ONE> <skip_to_tail> <min> <max> character_pattern */
+        SRE_BacktrackItem* inner_repeat;
+        SRE_CODE* repeat_ptr;
+        SRE_CODE* tail_ptr;
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* min_ptr;
+        TRACE(("MAX_REPEAT_ONE\n"));
+        /* the tail has failed */
+
+        inner_repeat = state->backtrack_item;
+        repeat_ptr = state->backtrack_item->repeat.pattern_ptr;
+        tail_ptr = repeat_ptr + repeat_ptr[1];
+
+        /* restore the state */
+        state->text_ptr = state->backtrack_item->repeat.text_ptr;
+
+        /* release one character */
+        --(SRE_CHAR*)state->text_ptr;
+
+        /* match down to the minimum until the tail could match */
+        start_ptr = inner_repeat->repeat.start_ptr;
+        min_ptr = start_ptr + inner_repeat->repeat.min;
+        while ((SRE_CHAR*)state->text_ptr >= min_ptr && !SRE_TRY_MATCH(state,
+          tail_ptr))
+            --(SRE_CHAR*)state->text_ptr;
+
+        /* did it match at least the minimum? */
+        if ((SRE_CHAR*)state->text_ptr < min_ptr) {
+            discard_backtrack(state);
+            goto backtrack;
+        }
+
+        /*
+          if we're now at the minimum then we can discard the backtracking
+          because if the tail fails we'll need to backtrack anyway and there's
+          nothing else to restore
+         */
+        if ((SRE_CHAR*)state->text_ptr > min_ptr)
+            inner_repeat->repeat.text_ptr = state->text_ptr;
+        else
+            discard_backtrack(state);
+
+        /* try the tail */
+        state->pattern_ptr = tail_ptr;
+        goto advance;
+    }
+    case SRE_OP_MIN_REPEAT:
+    {
+        SRE_BacktrackItem* outer_repeat;
+        TRACE(("MIN_REPEAT\n"));
+        /* lazy repeat */
+        /*
+          <MIN_REPEAT> <skip_to_end> <min> <max> pattern <END_MIN_REPEAT>
+          <skip_to_start>
+         */
+        outer_repeat = state->backtrack_item->repeat.repeat;
+
+        discard_backtrack(state);
+
+        current_repeat = outer_repeat;
+        goto backtrack;
+    }
+    case SRE_OP_MIN_REPEAT_ONE:
+    {
+        /* lazy repeat */
+        /* <MIN_REPEAT_ONE> <skip_to_tail> <min> <max> character_pattern */
+        SRE_BacktrackItem* inner_repeat;
+        SRE_CODE* repeat_ptr;
+        SRE_CODE* body_ptr;
+        SRE_CODE* tail_ptr;
+        SRE_CHAR* start_ptr;
+        SRE_CHAR* max_ptr;
+        TRACE(("MIN_REPEAT_ONE\n"));
+        /* the tail has failed */
+
+        inner_repeat = state->backtrack_item;
+        repeat_ptr = state->backtrack_item->repeat.pattern_ptr;
+        body_ptr = repeat_ptr + 4;
+        tail_ptr = repeat_ptr + repeat_ptr[1];
+
+        /* restore the state */
+        state->text_ptr = inner_repeat->repeat.text_ptr;
+
+        /* consume one character, if possible */
+        if (!SRE_TRY_MATCH(state, body_ptr)) {
+            discard_backtrack(state);
+            goto backtrack;
+        }
+
+        ++(SRE_CHAR*)state->text_ptr;
+
+        /* match up to the maximum until the tail could match */
+        start_ptr = inner_repeat->repeat.start_ptr;
+        max_ptr = start_ptr + inner_repeat->repeat.max;
+        while ((SRE_CHAR*)state->text_ptr < max_ptr && !SRE_TRY_MATCH(state,
+          tail_ptr)) {
+            /* the tail hasn't matched yet */
+            if (!SRE_TRY_MATCH(state, body_ptr)) {
+                /* the body failed to match, so backtrack */
+                discard_backtrack(state);
+                goto backtrack;
             }
-            RETURN_FAILURE;
-
-        case SRE_OP_MAX_UNTIL:
-            /* maximizing repeat */
-            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
-
-            /* FIXME: we probably need to deal with zero-width
-               matches in here... */
-
-            ctx->u.rep = state->repeat;
-            if (!ctx->u.rep)
-                RETURN_ERROR(SRE_ERROR_STATE);
-
-            state->ptr = ctx->ptr;
-
-            ctx->count = ctx->u.rep->count+1;
-
-            TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
-                   ctx->ptr, ctx->count));
-
-            if (ctx->count < ctx->u.rep->pattern[1]) {
-                /* not enough matches */
-                ctx->u.rep->count = ctx->count;
-                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
-                        ctx->u.rep->pattern+3);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-                RETURN_FAILURE;
-            }
-
-            if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
-                state->ptr != ctx->u.rep->last_ptr) {
-                /* we may have enough matches, but if we can
-                   match another item, do so */
-                ctx->u.rep->count = ctx->count;
-                LASTMARK_SAVE();
-                MARK_PUSH(ctx->lastmark);
-                /* zero-width match protection */
-                DATA_PUSH(&ctx->u.rep->last_ptr);
-                ctx->u.rep->last_ptr = state->ptr;
-                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
-                        ctx->u.rep->pattern+3);
-                DATA_POP(&ctx->u.rep->last_ptr);
-                if (ret) {
-                    MARK_POP_DISCARD(ctx->lastmark);
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                MARK_POP(ctx->lastmark);
-                LASTMARK_RESTORE();
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-            }
-
-            /* cannot match more repeated items here.  make sure the
-               tail matches */
-            state->repeat = ctx->u.rep->prev;
-            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
-            RETURN_ON_SUCCESS(ret);
-            state->repeat = ctx->u.rep;
-            state->ptr = ctx->ptr;
-            RETURN_FAILURE;
-
-        case SRE_OP_MIN_UNTIL:
-            /* minimizing repeat */
-            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
-
-            ctx->u.rep = state->repeat;
-            if (!ctx->u.rep)
-                RETURN_ERROR(SRE_ERROR_STATE);
-
-            state->ptr = ctx->ptr;
-
-            ctx->count = ctx->u.rep->count+1;
-
-            TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
-                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
-
-            if (ctx->count < ctx->u.rep->pattern[1]) {
-                /* not enough matches */
-                ctx->u.rep->count = ctx->count;
-                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
-                        ctx->u.rep->pattern+3);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_SUCCESS;
-                }
-                ctx->u.rep->count = ctx->count-1;
-                state->ptr = ctx->ptr;
-                RETURN_FAILURE;
-            }
-
-            LASTMARK_SAVE();
-
-            /* see if the tail matches */
-            state->repeat = ctx->u.rep->prev;
-            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
-            }
-
-            state->repeat = ctx->u.rep;
-            state->ptr = ctx->ptr;
-
-            LASTMARK_RESTORE();
-
-            if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
-                RETURN_FAILURE;
-
-            ctx->u.rep->count = ctx->count;
-            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
-                    ctx->u.rep->pattern+3);
-            if (ret) {
-                RETURN_ON_ERROR(ret);
-                RETURN_SUCCESS;
-            }
-            ctx->u.rep->count = ctx->count-1;
-            state->ptr = ctx->ptr;
-            RETURN_FAILURE;
-
-        case SRE_OP_GROUPREF:
-            /* match backreference */
-            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    RETURN_FAILURE;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p)
-                        RETURN_FAILURE;
-                    while (p < e) {
-                        if (ctx->ptr >= end || *ctx->ptr != *p)
-                            RETURN_FAILURE;
-                        p++; ctx->ptr++;
-                    }
-                }
-            }
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_GROUPREF_IGNORE:
-            /* match backreference */
-            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    RETURN_FAILURE;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p)
-                        RETURN_FAILURE;
-                    while (p < e) {
-                        if (ctx->ptr >= end ||
-                            state->lower(*ctx->ptr) != state->lower(*p))
-                            RETURN_FAILURE;
-                        p++; ctx->ptr++;
-                    }
-                }
-            }
-            ctx->pattern++;
-            break;
-
-        case SRE_OP_GROUPREF_EXISTS:
-            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[0]));
-            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
-            i = ctx->pattern[0];
-            {
-                Py_ssize_t groupref = i+i;
-                if (groupref >= state->lastmark) {
-                    ctx->pattern += ctx->pattern[1];
-                    break;
-                } else {
-                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
-                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
-                    if (!p || !e || e < p) {
-                        ctx->pattern += ctx->pattern[1];
-                        break;
-                    }
-                }
-            }
-            ctx->pattern += 2;
-            break;
-
-        case SRE_OP_ASSERT:
-            /* assert subpattern */
-            /* <ASSERT> <skip> <back> <pattern> */
-            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr < state->beginning)
-                RETURN_FAILURE;
-            DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
-            RETURN_ON_FAILURE(ret);
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_ASSERT_NOT:
-            /* assert not subpattern */
-            /* <ASSERT_NOT> <skip> <back> <pattern> */
-            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
-                   ctx->ptr, ctx->pattern[1]));
-            state->ptr = ctx->ptr - ctx->pattern[1];
-            if (state->ptr >= state->beginning) {
-                DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
-                if (ret) {
-                    RETURN_ON_ERROR(ret);
-                    RETURN_FAILURE;
-                }
-            }
-            ctx->pattern += ctx->pattern[0];
-            break;
-
-        case SRE_OP_FAILURE:
-            /* immediate failure */
-            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
-            RETURN_FAILURE;
-
-        default:
-            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
-                   ctx->pattern[-1]));
-            RETURN_ERROR(SRE_ERROR_ILLEGAL);
+            ++(SRE_CHAR*)state->text_ptr;
         }
-    }
-
-exit:
-    ctx_pos = ctx->last_ctx_pos;
-    jump = ctx->jump;
-    DATA_POP_DISCARD(ctx);
-    if (ctx_pos == -1)
-        return ret;
-    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
-
-    switch (jump) {
-        case JUMP_MAX_UNTIL_2:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_2;
-        case JUMP_MAX_UNTIL_3:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_3;
-        case JUMP_MIN_UNTIL_2:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_2;
-        case JUMP_MIN_UNTIL_3:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_3;
-        case JUMP_BRANCH:
-            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
-            goto jump_branch;
-        case JUMP_MAX_UNTIL_1:
-            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
-            goto jump_max_until_1;
-        case JUMP_MIN_UNTIL_1:
-            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
-            goto jump_min_until_1;
-        case JUMP_REPEAT:
-            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat;
-        case JUMP_REPEAT_ONE_1:
-            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat_one_1;
-        case JUMP_REPEAT_ONE_2:
-            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
-            goto jump_repeat_one_2;
-        case JUMP_MIN_REPEAT_ONE:
-            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
-            goto jump_min_repeat_one;
-        case JUMP_ASSERT:
-            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
-            goto jump_assert;
-        case JUMP_ASSERT_NOT:
-            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
-            goto jump_assert_not;
-        case JUMP_NONE:
-            TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
-            break;
-    }
-
-    return ret; /* should never get here */
-}
-
+
+        /*
+          if we're now at the maximum then we can discard the backtracking
+          because if the tail fails we'll need to backtrack anyway and there's
+          nothing else to restore
+         */
+        if ((SRE_CHAR*)state->text_ptr < max_ptr)
+            inner_repeat->repeat.text_ptr = state->text_ptr;
+        else
+            discard_backtrack(state);
+
+        /* try the tail */
+        state->pattern_ptr = tail_ptr;
+        goto advance;
+    }
+    default:
+        TRACE(("UNKNOWN %d\n", op));
+        return cleanup(state, SRE_ERROR_ILLEGAL);
+    }
+}
+
+/* searches the slice of text */
 LOCAL(Py_ssize_t)
 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
 {
-    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
-    SRE_CHAR* end = (SRE_CHAR *)state->end;
+    SRE_CODE* repeat_ptr;
+    SRE_CODE* tail_ptr;
+    SRE_CHAR* text_ptr = state->slice_start;
     Py_ssize_t status = 0;
-    Py_ssize_t prefix_len = 0;
-    Py_ssize_t prefix_skip = 0;
-    SRE_CODE* prefix = NULL;
-    SRE_CODE* charset = NULL;
-    SRE_CODE* overlap = NULL;
-    int flags = 0;
-
-    if (pattern[0] == SRE_OP_INFO) {
-        /* optimization info block */
-        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
-
-        flags = pattern[2];
-
-        if (pattern[3] > 1) {
-            /* adjust end point (but make sure we leave at least one
-               character in there, so literal search will work) */
-            end -= pattern[3]-1;
-            if (end <= ptr)
-                end = ptr+1;
-        }
-
-        if (flags & SRE_INFO_PREFIX) {
-            /* pattern starts with a known prefix */
-            /* <length> <skip> <prefix data> <overlap data> */
-            prefix_len = pattern[5];
-            prefix_skip = pattern[6];
-            prefix = pattern + 7;
-            overlap = prefix + prefix_len - 1;
-        } else if (flags & SRE_INFO_CHARSET)
-            /* pattern starts with a character from a known set */
-            /* <charset> */
-            charset = pattern + 5;
-
-        pattern += 1 + pattern[1];
-    }
-
-    TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
-    TRACE(("charset = %p\n", charset));
-
-#if defined(USE_FAST_SEARCH)
-    if (prefix_len > 1) {
-        /* pattern starts with a known prefix.  use the overlap
-           table to skip forward as fast as we possibly can */
-        Py_ssize_t i = 0;
-        end = (SRE_CHAR *)state->end;
-        while (ptr < end) {
-            for (;;) {
-                if ((SRE_CODE) ptr[0] != prefix[i]) {
-                    if (!i)
-                        break;
-                    else
-                        i = overlap[i];
-                } else {
-                    if (++i == prefix_len) {
-                        /* found a potential match */
-                        TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
-                        state->start = ptr + 1 - prefix_len;
-                        state->ptr = ptr + 1 - prefix_len + prefix_skip;
-                        if (flags & SRE_INFO_LITERAL)
-                            return 1; /* we got all of it */
-                        status = SRE_MATCH(state, pattern + 2*prefix_skip);
-                        if (status != 0)
-                            return status;
-                        /* close but no cigar -- try again */
-                        i = overlap[i];
-                    }
-                    break;
-                }
-            }
-            ptr++;
-        }
-        return 0;
-    }
-#endif
-
-    if (pattern[0] == SRE_OP_LITERAL) {
-        /* pattern starts with a literal character.  this is used
-           for short prefixes, and if fast search is disabled */
-        SRE_CODE chr = pattern[1];
-        end = (SRE_CHAR *)state->end;
-        for (;;) {
-            while (ptr < end && (SRE_CODE) ptr[0] != chr)
-                ptr++;
-            if (ptr >= end)
-                return 0;
-            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
-            state->start = ptr;
-            state->ptr = ++ptr;
-            if (flags & SRE_INFO_LITERAL)
-                return 1; /* we got all of it */
-            status = SRE_MATCH(state, pattern + 2);
-            if (status != 0)
-                break;
-        }
-    } else if (charset) {
-        /* pattern starts with a character from a known set */
-        end = (SRE_CHAR *)state->end;
-        for (;;) {
-            while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
-                ptr++;
-            if (ptr >= end)
-                return 0;
-            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
-            state->start = ptr;
-            state->ptr = ptr;
+
+    /*
+      point to the final newline if it's the final character
+
+      it's more efficient to do it here instead of every time we try to match
+     */
+    if (state->text_end > state->text_start &&
+      state->encoding->in_category(SRE_CAT_LINEBREAK,
+      SRE_PCHAR(state->text_end, -1)))
+        state->final_linebreak = (SRE_CHAR*)state->text_end - 1;
+    else
+        state->final_linebreak = NULL;
+
+    /* skip over any marks */
+    tail_ptr = pattern;
+    while (tail_ptr[0] == SRE_OP_MARK)
+        tail_ptr += SRE_MARK_OP_SIZE;
+
+    /*
+      if the pattern is anchored at the start of the string then do a match
+      instead of a search
+     */
+    if (tail_ptr[0] == SRE_OP_BEGINNING_STRING) {
+        /* where should we start the match? */
+        state->text_ptr = state->slice_start;
+        return SRE_MATCH(state, pattern);
+    }
+
+    /*
+      if a pattern starts with "c{m,n}", where "c" matches a single character,
+      and the pattern fails, then advancing by only one character before
+      retrying would be inefficient because if it failed to match k times when
+      it would certainly fail to match k-1 times!
+
+      does the pattern start with a repeated character?
+     */
+    repeat_ptr = pattern;
+    if (repeat_ptr[0] == SRE_OP_MAX_REPEAT_ONE || repeat_ptr[0] ==
+      SRE_OP_MIN_REPEAT_ONE)
+        /* point to what's repeated */
+        repeat_ptr += 4;
+    else
+        /* not repeated */
+        repeat_ptr = NULL;
+
+    while (text_ptr <= (SRE_CHAR*)state->slice_end) {
+        TRACE(("|%p|%p|SEARCH\n", pattern, text_ptr));
+
+        /* could the pattern match here? */
+        state->text_ptr = state->slice_start = text_ptr;
+        if (SRE_TRY_MATCH(state, tail_ptr)) {
+            /* try matching at the current position */
             status = SRE_MATCH(state, pattern);
             if (status != 0)
                 break;
-            ptr++;
-        }
-    } else
-        /* general case */
-        while (ptr <= end) {
-            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
-            state->start = state->ptr = ptr++;
-            status = SRE_MATCH(state, pattern);
-            if (status != 0)
-                break;
-        }
+
+            /* is there an initial repeat? */
+            if (repeat_ptr != NULL) {
+                unsigned int available;
+                unsigned int max_rep;
+                SRE_CHAR* max_ptr;
+                /*
+                  how many characters could the initial repeat match if
+                  unlimited?
+                 */
+                state->text_ptr = text_ptr;
+
+                /* how many characters are still available? */
+                available = (SRE_CHAR*)state->slice_end - text_ptr;
+
+                if (repeat_ptr[3] == SRE_MAXREPEAT)
+                    max_rep = available;
+                else
+                    max_rep = unsigned_min(repeat_ptr[3], available);
+
+                max_ptr = text_ptr + available;
+                while ((SRE_CHAR*)state->text_ptr < max_ptr &&
+                  SRE_TRY_MATCH(state, repeat_ptr))
+                    ++(SRE_CHAR*)state->text_ptr;
+
+                max_ptr = text_ptr + max_rep;
+                if ((SRE_CHAR*)state->text_ptr <= max_ptr)
+                    /*
+                      the initial repeat could have consumed all those
+                      available, but it still failed to match, so discard all
+                      of those, advance by one, and try again
+                     */
+                    text_ptr = (SRE_CHAR*)state->text_ptr + 1;
+                else
+                    /*
+                      the initial repeat couldn't have consumed all those
+                      available, so discard until it _could_ consume all those
+                      available and then try again
+                     */
+                    text_ptr = (SRE_CHAR*)state->text_ptr - max_rep;
+            } else
+                /* advance and try again. */
+                ++text_ptr;
+        } else
+            /* advance and try again. */
+            ++text_ptr;
+    }
 
     return status;
 }
@@ -1646,36 +2660,24 @@
     if (!PyArg_ParseTuple(args, "ii", &character, &flags))
         return NULL;
     if (flags & SRE_FLAG_LOCALE)
-        return Py_BuildValue("i", sre_lower_locale(character));
+        return Py_BuildValue("i", locale_lower(character));
     if (flags & SRE_FLAG_UNICODE)
 #if defined(HAVE_UNICODE)
-        return Py_BuildValue("i", sre_lower_unicode(character));
+        return Py_BuildValue("i", unicode_lower(character));
 #else
-        return Py_BuildValue("i", sre_lower_locale(character));
+        return Py_BuildValue("i", locale_lower(character));
 #endif
-    return Py_BuildValue("i", sre_lower(character));
-}
-
-LOCAL(void)
-state_reset(SRE_STATE* state)
-{
-    /* FIXME: dynamic! */
-    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
-
-    state->lastmark = -1;
-    state->lastindex = -1;
-
-    state->repeat = NULL;
-
-    data_stack_dealloc(state);
+    return Py_BuildValue("i", ascii_lower(character));
 }
 
 static void*
 getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
 {
-    /* given a python object, return a data pointer, a length (in
-       characters), and a character size.  return NULL if the object
-       is not a string (or not compatible) */
+    /*
+      given a python object, return a data pointer, a length (in characters),
+      and a character size.  return NULL if the object is not a string (or not
+      compatible)
+     */
 
     PyBufferProcs *buffer;
     Py_ssize_t size, bytes;
@@ -1748,12 +2750,28 @@
 
     memset(state, 0, sizeof(SRE_STATE));
 
-    state->lastmark = -1;
-    state->lastindex = -1;
-
     ptr = getstring(string, &length, &charsize);
     if (!ptr)
         return NULL;
+
+    state->backtrack_chunk =
+      (SRE_BacktrackChunk*)PyMem_MALLOC(sizeof(SRE_BacktrackChunk));
+    state->mark_chunk = (SRE_MarkChunk*)PyMem_MALLOC(sizeof(SRE_MarkChunk));
+    state->mark = (void**)PyMem_MALLOC(pattern->groups * 2 * sizeof(void*));
+    if (state->backtrack_chunk == NULL || state->mark_chunk == NULL ||
+      state->mark == NULL) {
+        PyMem_FREE(state->mark);
+        PyMem_FREE(state->first_backtrack_chunk);
+        PyMem_FREE(state->first_mark_chunk);
+        return NULL;
+    }
+
+    memset(state->backtrack_chunk, 0, sizeof(SRE_BacktrackChunk));
+    memset(state->mark_chunk, 0, sizeof(SRE_MarkChunk));
+    state->mark_count = pattern->groups * 2;
+
+    state->first_backtrack_chunk = state->backtrack_chunk;
+    state->first_mark_chunk = state->mark_chunk;
 
     /* adjust boundaries */
     if (start < 0)
@@ -1768,40 +2786,60 @@
 
     state->charsize = charsize;
 
-    state->beginning = ptr;
-
-    state->start = (void*) ((char*) ptr + start * state->charsize);
-    state->end = (void*) ((char*) ptr + end * state->charsize);
+    /* initialise the character encoding */
+    if (pattern->flags & SRE_FLAG_UNICODE)
+        state->encoding = &unicode_encoding;
+    else if (pattern->flags & SRE_FLAG_LOCALE)
+        state->encoding = &locale_encoding;
+    else
+        state->encoding = &ascii_encoding;
+
+    state->text_start = ptr;
+    state->text_end = (void*)((char*) ptr + length * state->charsize);
+
+    state->slice_start = (void*)((char*) ptr + start * state->charsize);
+    state->slice_end = (void*)((char*) ptr + end * state->charsize);
 
     Py_INCREF(string);
     state->string = string;
     state->pos = start;
     state->endpos = end;
 
-    if (pattern->flags & SRE_FLAG_LOCALE)
-        state->lower = sre_lower_locale;
-    else if (pattern->flags & SRE_FLAG_UNICODE)
-#if defined(HAVE_UNICODE)
-        state->lower = sre_lower_unicode;
-#else
-        state->lower = sre_lower_locale;
-#endif
-    else
-        state->lower = sre_lower;
-
     return string;
 }
 
 LOCAL(void)
 state_fini(SRE_STATE* state)
 {
+    SRE_BacktrackChunk* backtrack_chunk;
+    SRE_MarkChunk* mark_chunk;
+
     Py_XDECREF(state->string);
-    data_stack_dealloc(state);
+
+    backtrack_chunk = state->first_backtrack_chunk;
+    while (backtrack_chunk != NULL) {
+        SRE_BacktrackChunk* next;
+
+        next = backtrack_chunk->next;
+        PyMem_FREE(backtrack_chunk);
+        backtrack_chunk = next;
+    }
+
+    mark_chunk = state->first_mark_chunk;
+    while (mark_chunk != NULL) {
+        SRE_MarkChunk* next;
+
+        next = mark_chunk->next;
+        PyMem_FREE(mark_chunk);
+        mark_chunk = next;
+    }
+
+    PyMem_FREE(state->mark);
 }
 
 /* calculate offset from start of string */
 #define STATE_OFFSET(state, member)\
-    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
+    (((char*)(member) - (char*)(state)->text_start) / (state)->charsize)
 
 LOCAL(PyObject*)
 state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
@@ -1810,7 +2848,8 @@
 
     index = (index - 1) * 2;
 
-    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
+    if (string == Py_None || index >= state->lastmark || !state->mark[index] ||
+      !state->mark[index+1]) {
         if (empty)
             /* want empty string */
             i = j = 0;
@@ -1830,12 +2869,6 @@
 pattern_error(int status)
 {
     switch (status) {
-    case SRE_ERROR_RECURSION_LIMIT:
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "maximum recursion limit exceeded"
-            );
-        break;
     case SRE_ERROR_MEMORY:
         PyErr_NoMemory();
         break;
@@ -1867,6 +2900,7 @@
 {
     SRE_STATE state;
     int status;
+    PyObject* match;
 
     PyObject* string;
     Py_ssize_t start = 0;
@@ -1880,9 +2914,9 @@
     if (!string)
         return NULL;
 
-    state.ptr = state.start;
-
-    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
+    state.text_ptr = state.slice_start;
+
+    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.text_ptr));
 
     if (state.charsize == 1) {
         status = sre_match(&state, PatternObject_GetCode(self));
@@ -1892,13 +2926,15 @@
 #endif
     }
 
-    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.text_ptr));
     if (PyErr_Occurred())
         return NULL;
 
+    match = pattern_new_match(self, &state, status);
+
     state_fini(&state);
 
-    return pattern_new_match(self, &state, status);
+    return match;
 }
 
 static PyObject*
@@ -1906,6 +2942,7 @@
 {
     SRE_STATE state;
     int status;
+    PyObject* match;
 
     PyObject* string;
     Py_ssize_t start = 0;
@@ -1919,7 +2956,7 @@
     if (!string)
         return NULL;
 
-    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.text_ptr));
 
     if (state.charsize == 1) {
         status = sre_search(&state, PatternObject_GetCode(self));
@@ -1929,14 +2966,18 @@
 #endif
     }
 
-    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.text_ptr));
+
+    if (PyErr_Occurred()) {
+        state_fini(&state);
+        return NULL;
+    }
+
+    match = pattern_new_match(self, &state, status);
 
     state_fini(&state);
 
-    if (PyErr_Occurred())
-        return NULL;
-
-    return pattern_new_match(self, &state, status);
+    return match;
 }
 
 static PyObject*
@@ -2060,13 +3101,11 @@
         return NULL;
     }
 
-    while (state.start <= state.end) {
+    while (state.slice_start <= state.slice_end) {
 
         PyObject* item;
 
-        state_reset(&state);
-
-        state.ptr = state.start;
+        state.text_ptr = state.slice_start;
 
         if (state.charsize == 1) {
             status = sre_search(&state, PatternObject_GetCode(self));
@@ -2076,8 +3115,8 @@
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+        if (PyErr_Occurred())
+            goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2089,8 +3128,8 @@
         /* don't bother to build a match object */
         switch (self->groups) {
         case 0:
-            b = STATE_OFFSET(&state, state.start);
-            e = STATE_OFFSET(&state, state.ptr);
+            b = STATE_OFFSET(&state, state.slice_start);
+            e = STATE_OFFSET(&state, state.text_ptr);
             item = PySequence_GetSlice(string, b, e);
             if (!item)
                 goto error;
@@ -2120,10 +3159,11 @@
         if (status < 0)
             goto error;
 
-        if (state.ptr == state.start)
-            state.start = (void*) ((char*) state.ptr + state.charsize);
+        if (state.text_ptr == state.slice_start)
+            state.slice_start = (void*) ((char*) state.text_ptr +
+              state.charsize);
         else
-            state.start = state.ptr;
+            state.slice_start = state.text_ptr;
 
     }
 
@@ -2190,13 +3230,10 @@
     }
 
     n = 0;
-    last = state.start;
+    last = state.slice_start;
 
     while (!maxsplit || n < maxsplit) {
-
-        state_reset(&state);
-
-        state.ptr = state.start;
+        state.text_ptr = state.slice_start;
 
         if (state.charsize == 1) {
             status = sre_search(&state, PatternObject_GetCode(self));
@@ -2206,8 +3243,8 @@
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+    if (PyErr_Occurred())
+        goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2216,18 +3253,19 @@
             goto error;
         }
 
-        if (state.start == state.ptr) {
-            if (last == state.end)
+        if (state.slice_start == state.text_ptr) {
+            if (last == state.slice_end)
                 break;
             /* skip one character */
-            state.start = (void*) ((char*) state.ptr + state.charsize);
+            state.slice_start = (void*) ((char*) state.text_ptr +
+              state.charsize);
             continue;
         }
 
         /* get segment before this match */
         item = PySequence_GetSlice(
             string, STATE_OFFSET(&state, last),
-            STATE_OFFSET(&state, state.start)
+            STATE_OFFSET(&state, state.slice_start)
             );
         if (!item)
             goto error;
@@ -2249,7 +3287,7 @@
 
         n = n + 1;
 
-        last = state.start = state.ptr;
+        last = state.slice_start = state.text_ptr;
 
     }
 
@@ -2303,10 +3341,10 @@
         b = bint;
         if (ptr) {
             if (b == 1) {
-		    literal = sre_literal_template((unsigned char *)ptr, n);
+            literal = sre_literal_template((unsigned char *)ptr, n);
             } else {
 #if defined(HAVE_UNICODE)
-		    literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
+            literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
 #endif
             }
         } else {
@@ -2345,10 +3383,7 @@
     n = i = 0;
 
     while (!count || n < count) {
-
-        state_reset(&state);
-
-        state.ptr = state.start;
+        state.text_ptr = state.slice_start;
 
         if (state.charsize == 1) {
             status = sre_search(&state, PatternObject_GetCode(self));
@@ -2358,8 +3393,8 @@
 #endif
         }
 
-	if (PyErr_Occurred())
-	    goto error;
+    if (PyErr_Occurred())
+        goto error;
 
         if (status <= 0) {
             if (status == 0)
@@ -2368,8 +3403,8 @@
             goto error;
         }
 
-        b = STATE_OFFSET(&state, state.start);
-        e = STATE_OFFSET(&state, state.ptr);
+        b = STATE_OFFSET(&state, state.slice_start);
+        e = STATE_OFFSET(&state, state.text_ptr);
 
         if (i < b) {
             /* get segment before this match */
@@ -2419,10 +3454,11 @@
 
 next:
         /* move on */
-        if (state.ptr == state.start)
-            state.start = (void*) ((char*) state.ptr + state.charsize);
+        if (state.text_ptr == state.slice_start)
+            state.slice_start = (void*) ((char*) state.text_ptr +
+              state.charsize);
         else
-            state.start = state.ptr;
+            state.slice_start = state.text_ptr;
 
     }
 
@@ -2577,20 +3613,20 @@
 
 static PyMethodDef pattern_methods[] = {
     {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
-	pattern_match_doc},
+    pattern_match_doc},
     {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
-	pattern_search_doc},
+    pattern_search_doc},
     {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
-	pattern_sub_doc},
+    pattern_sub_doc},
     {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
-	pattern_subn_doc},
+    pattern_subn_doc},
     {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
-	pattern_split_doc},
+    pattern_split_doc},
     {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
-	pattern_findall_doc},
+    pattern_findall_doc},
 #if PY_VERSION_HEX >= 0x02020000
     {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
-	pattern_finditer_doc},
+    pattern_finditer_doc},
 #endif
     {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
     {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
@@ -2638,24 +3674,24 @@
     (destructor)pattern_dealloc, /*tp_dealloc*/
     0, /*tp_print*/
     (getattrfunc)pattern_getattr, /*tp_getattr*/
-    0,					/* tp_setattr */
-    0,					/* tp_compare */
-    0,					/* tp_repr */
-    0,					/* tp_as_number */
-    0,					/* tp_as_sequence */
-    0,					/* tp_as_mapping */
-    0,					/* tp_hash */
-    0,					/* tp_call */
-    0,					/* tp_str */
-    0,					/* tp_getattro */
-    0,					/* tp_setattro */
-    0,					/* tp_as_buffer */
-    Py_TPFLAGS_HAVE_WEAKREFS,		/* tp_flags */
-    pattern_doc,			/* tp_doc */
-    0,					/* tp_traverse */
-    0,					/* tp_clear */
-    0,					/* tp_richcompare */
-    offsetof(PatternObject, weakreflist),	/* tp_weaklistoffset */
+    0,                  /* tp_setattr */
+    0,                  /* tp_compare */
+    0,                  /* tp_repr */
+    0,                  /* tp_as_number */
+    0,                  /* tp_as_sequence */
+    0,                  /* tp_as_mapping */
+    0,                  /* tp_hash */
+    0,                  /* tp_call */
+    0,                  /* tp_str */
+    0,                  /* tp_getattro */
+    0,                  /* tp_setattro */
+    0,                  /* tp_as_buffer */
+    Py_TPFLAGS_HAVE_WEAKREFS,       /* tp_flags */
+    pattern_doc,            /* tp_doc */
+    0,                  /* tp_traverse */
+    0,                  /* tp_clear */
+    0,                  /* tp_richcompare */
+    offsetof(PatternObject, weakreflist),   /* tp_weaklistoffset */
 };
 
 static int _validate(PatternObject *self); /* Forward */
@@ -2674,9 +3710,8 @@
     Py_ssize_t groups = 0;
     PyObject* groupindex = NULL;
     PyObject* indexgroup = NULL;
-    if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
-                          &PyList_Type, &code, &groups,
-                          &groupindex, &indexgroup))
+    if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags, &PyList_Type,
+      &code, &groups, &groupindex, &indexgroup))
         return NULL;
 
     n = PyList_GET_SIZE(code);
@@ -2689,8 +3724,8 @@
 
     for (i = 0; i < n; i++) {
         PyObject *o = PyList_GET_ITEM(code, i);
-        unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
-                                              : PyLong_AsUnsignedLong(o);
+        unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) :
+          PyLong_AsUnsignedLong(o);
         self->code[i] = (SRE_CODE) value;
         if ((unsigned long) self->code[i] != value) {
             PyErr_SetString(PyExc_OverflowError,
@@ -2763,436 +3798,509 @@
 #define VTRACE(v)
 #endif
 
-/* Report failure */
-#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
-
-/* Extract opcode, argument, or skip count from code array */
-#define GET_OP                                          \
-    do {                                                \
-        VTRACE(("%p: ", code));                         \
-        if (code >= end) FAIL;                          \
-        op = *code++;                                   \
-        VTRACE(("%lu (op)\n", (unsigned long)op));      \
-    } while (0)
-#define GET_ARG                                         \
-    do {                                                \
-        VTRACE(("%p= ", code));                         \
-        if (code >= end) FAIL;                          \
-        arg = *code++;                                  \
-        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
-    } while (0)
-#define GET_SKIP_ADJ(adj)                               \
-    do {                                                \
-        VTRACE(("%p= ", code));                         \
-        if (code >= end) FAIL;                          \
-        skip = *code;                                   \
-        VTRACE(("%lu (skip to %p)\n",                   \
-               (unsigned long)skip, code+skip));        \
-        if (code+skip-adj < code || code+skip-adj > end)\
-            FAIL;                                       \
-        code++;                                         \
-    } while (0)
-#define GET_SKIP GET_SKIP_ADJ(0)
-
-static int
-_validate_charset(SRE_CODE *code, SRE_CODE *end)
-{
-    /* Some variables are manipulated by the macros above */
+typedef struct {
+    int mark_count;
+    int min_mark;
+    int max_mark;
+    int max_group_ref;
+} SRE_ValidateInfo;
+
+SRE_CODE* validate_pattern(SRE_CODE* pattern_start, SRE_CODE* pattern_end,
+  SRE_ValidateInfo* validate_info);
+
+/*
+  validates an assert
+
+  <ASSERT> <skip_to_end> <back> pattern <END_ASSERT>
+ */
+SRE_CODE* validate_assert(SRE_CODE* pattern_ptr, SRE_CODE* pattern_end,
+  SRE_ValidateInfo* validate_info) {
+    SRE_CODE* end_ptr = pattern_ptr + pattern_ptr[1];
+
+    /* end_ptr should point to <END_ASSERT> */
+    if (pattern_ptr[1] < 3 || end_ptr >= pattern_end || end_ptr[0] !=
+      sre_op_info[pattern_ptr[0]].terminator)
+        return NULL;
+
+    /* check the body of the assertion */
+    if (validate_pattern(pattern_ptr + 3, end_ptr, validate_info) != end_ptr)
+        return NULL;
+
+    return end_ptr + 1;
+}
+
+/*
+  validates a branch
+
+  <BRANCH> [<skip> code <JUMP> <skip_to_tail>]+ <0>
+ */
+SRE_CODE* validate_branch(SRE_CODE* pattern_ptr, SRE_CODE* pattern_end,
+  SRE_ValidateInfo* validate_info) {
+    SRE_CODE* skip_ptr;
+    SRE_CODE* tail_ptr;
+
+    /* all the jump should end in the same place, the tail */
+    tail_ptr = NULL;
+
+    /* point to the first skip */
+    skip_ptr = pattern_ptr + 1;
+    VTRACE(("skip_ptr is 0x%p\n", skip_ptr));
+
+    /* validate each of the branches */
+    while (skip_ptr[0] > 2) {
+        SRE_CODE* next_ptr;
+        SRE_CODE* ptr;
+
+        /* point to the next skip */
+        next_ptr = skip_ptr + skip_ptr[0];
+        VTRACE(("next_ptr is 0x%p\n", next_ptr));
+        if (next_ptr >= pattern_end)
+            return NULL;
+
+        /*
+          validate the branch
+
+          it'll return a pointer to the first codeword it doesn't understand,
+          which should be JUMP, or NULL if the pattern is invalid
+         */
+        VTRACE(("validate from 0x%p to 0x%p\n", skip_ptr + 1, next_ptr));
+        ptr = validate_pattern(skip_ptr + 1, next_ptr, validate_info);
+        VTRACE(("ptr is 0x%p\n", ptr));
+        VTRACE(("ptr[0] is %d\n", ptr[0]));
+        if (ptr != next_ptr - 2 && ptr[0] == SRE_OP_JUMP)
+            return NULL;
+
+        /* all the JUMPs should jump to the same place, the tail */
+        VTRACE(("JUMP to 0x%p\n", ptr + ptr[1]));
+        if (tail_ptr != NULL && ptr + ptr[1] != tail_ptr)
+            return NULL;
+
+        tail_ptr = ptr + ptr[1];
+
+        /* next branch */
+        skip_ptr = next_ptr;
+    }
+
+    /* the final skip should be 0 */
+    if (skip_ptr[0] != 0)
+        return NULL;
+
+    VTRACE(("tail_ptr is 0x%p\n", tail_ptr));
+    return tail_ptr;
+}
+
+/*
+  validates a category
+
+  <SRE_OP_CATEGORY> <code>
+ */
+SRE_CODE* validate_category(SRE_CODE* pattern_ptr) {
+    if (pattern_ptr[1] > SRE_MAXCAT)
+        return NULL;
+
+    return pattern_ptr + 2;
+}
+
+/*
+  validates a charset
+
+  the charset format is:
+
+  <CHARSET> skip_to_end max_char index... subset...
+ */
+SRE_CODE* validate_charset(SRE_CODE* pattern_ptr, SRE_CODE* pattern_end) {
+    SRE_CODE hi_bytes;
+    SRE_CODE index;
+    SRE_CODE next_index;
+    SRE_CODE i;
+    SRE_CODE* end_ptr;
+
+    /* get the high bytes of the maximum character code */
+    hi_bytes = pattern_ptr[2] >> 8;
+
+    if (pattern_ptr + 3 + hi_bytes / 2 > pattern_end)
+        return NULL;
+
+    /*
+      the charset is split into subsets, each of 256 characters
+
+      what is the index of the subset?
+
+      (there are 2 indexes in each codeword)
+     */
+    next_index = 0;
+    for (i = 0; i <= hi_bytes; i ++) {
+        index = pattern_ptr[3 + i / 2] >> (i % 2 * SRE_BITS_PER_CODE / 2);
+        if (index > next_index)
+            return NULL;
+
+        if (index == next_index)
+            next_index = index + 1;
+    }
+
+    /* point past the subsets, which should be the end of the charset */
+    end_ptr = pattern_ptr + 4 + (next_index - 1) / 2 * (256 /
+      SRE_BITS_PER_CODE);
+
+    if (end_ptr > pattern_end || end_ptr != pattern_ptr + pattern_ptr[1])
+        return NULL;
+
+    return end_ptr;
+}
+
+/*
+  validates a groupref
+
+  <GROUPREF> <group_id>
+ */
+SRE_CODE* validate_groupref(SRE_CODE* pattern_ptr, SRE_ValidateInfo*
+  validate_info) {
+    validate_info->max_group_ref = signed_max(validate_info->max_group_ref,
+      pattern_ptr[1]);
+
+    return pattern_ptr + 2;
+}
+
+/*
+  validates a groupref_exists
+
+  <GROUPREF_EXISTS> <group_id> <skip_to_codeno> codeyes [<JUMP> <skip_to_end>
+  codeno] ...
+ */
+SRE_CODE* validate_groupref_exists(SRE_CODE* pattern_ptr, SRE_CODE*
+  pattern_end, SRE_ValidateInfo* validate_info) {
+    SRE_CODE* codeno_ptr;
+    SRE_CODE* ptr;
+
+    validate_info->max_group_ref = signed_max(validate_info->max_group_ref,
+      pattern_ptr[1]);
+
+    /* where does codeno start? */
+    codeno_ptr = pattern_ptr + pattern_ptr[2];
+    if (pattern_ptr[2] < 3 || codeno_ptr > pattern_end)
+        return NULL;
+
+    /* validate codeyes */
+    ptr = validate_pattern(pattern_ptr + 3, codeno_ptr, validate_info);
+    if (ptr == codeno_ptr - 2) {
+        SRE_CODE* tail_ptr;
+        /* codeyes is valid and codeno exists */
+
+        /* codeyes should be followed by JUMP */
+        if (ptr[0] != SRE_OP_JUMP || ptr[1] < 2)
+            return NULL;
+
+        /* where does codeno finish? */
+        tail_ptr = ptr + ptr[1];
+        if (tail_ptr > pattern_end)
+            return NULL;
+
+        /* validate codeno */
+        ptr = validate_pattern(ptr + 2, tail_ptr, validate_info);
+        if (ptr != tail_ptr)
+            return NULL;
+    } else if (ptr != codeno_ptr)
+        /* codeyes is invalid */
+        return NULL;
+
+    return ptr;
+}
+
+/*
+  validates a mark
+
+  <MARK> <id>
+ */
+SRE_CODE* validate_mark(SRE_CODE* pattern_ptr, SRE_ValidateInfo* validate_info)
+  {
+    ++validate_info->mark_count;
+    validate_info->min_mark = signed_min(validate_info->min_mark,
+      pattern_ptr[1]);
+    validate_info->max_mark = signed_max(validate_info->max_mark,
+      pattern_ptr[1]);
+
+    return pattern_ptr + 2;
+}
+
+/* validates a range <RANGE> min max */
+SRE_CODE* validate_range(SRE_CODE* pattern_ptr) {
+    if (pattern_ptr[1] > pattern_ptr[2])
+        return NULL;
+
+    return pattern_ptr + 3;
+}
+
+/* validates a set */
+SRE_CODE* validate_set(SRE_CODE* pattern_ptr, SRE_CODE* pattern_end) {
+    SRE_CODE* tail_ptr = pattern_ptr + pattern_ptr[1];
+
+    if (pattern_ptr[1] < 2 || tail_ptr > pattern_end)
+        return NULL;
+
+    pattern_ptr += 2;
+
+    while (pattern_ptr < tail_ptr) {
+        SRE_CODE op = pattern_ptr[0];
+
+        if (op > SRE_MAXOP)
+            return NULL;
+
+        switch(sre_op_info[op].type) {
+        case SRE_TYPE_CATEGORY:
+            pattern_ptr = validate_category(pattern_ptr);
+            break;
+        case SRE_TYPE_CHARSET:
+            pattern_ptr = validate_charset(pattern_ptr, tail_ptr);
+            break;
+        case SRE_TYPE_LITERAL:
+            /* <LITERAL> <code> */
+            pattern_ptr += 2;
+            break;
+        case SRE_TYPE_RANGE:
+            pattern_ptr = validate_range(pattern_ptr);
+            break;
+        default:
+            return NULL;
+        }
+
+        if (pattern_ptr == NULL)
+            return NULL;
+    }
+
+    if (pattern_ptr != tail_ptr)
+        return NULL;
+
+    return pattern_ptr;
+}
+
+/*
+  validates a repeat
+
+  <REPEAT> <skip_to_end> <min> <max> pattern <END_REPEAT> <skip_to_start>
+ */
+SRE_CODE* validate_repeat(SRE_CODE* pattern_ptr, SRE_CODE* pattern_end,
+  SRE_ValidateInfo* validate_info) {
+    SRE_CODE* end_ptr = pattern_ptr + pattern_ptr[1];
+
+    /* where is the terminator and is it present? */
+    if (pattern_ptr[1] < 4 || end_ptr >= pattern_end || end_ptr[0] !=
+      sre_op_info[pattern_ptr[0]].terminator || end_ptr[1] != pattern_ptr[1])
+        return NULL;
+
+    /* are the repeat bounds valid? */
+    if (pattern_ptr[2] > pattern_ptr[3] || pattern_ptr[3] == 0)
+        return NULL;
+
+    /* check the body */
+    if (validate_pattern(pattern_ptr + 4, end_ptr, validate_info) != end_ptr)
+        return NULL;
+
+    return end_ptr + 2;
+}
+
+/*
+  validates a repeat_one
+
+  <REPEAT_ONE> <skip_to_end> <min> <max> character_pattern
+ */
+SRE_CODE* validate_repeat_one(SRE_CODE* pattern_ptr, SRE_CODE* pattern_end) {
+    SRE_CODE* tail_ptr = pattern_ptr + pattern_ptr[1];
     SRE_CODE op;
-    SRE_CODE arg;
-    SRE_CODE offset;
-    int i;
-
-    while (code < end) {
-        GET_OP;
-        switch (op) {
-
-        case SRE_OP_NEGATE:
-            break;
-
-        case SRE_OP_LITERAL:
-            GET_ARG;
-            break;
-
-        case SRE_OP_RANGE:
-            GET_ARG;
-            GET_ARG;
-            break;
-
-        case SRE_OP_CHARSET:
-            offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            code += offset;
-            break;
-
-        case SRE_OP_BIGCHARSET:
-            GET_ARG; /* Number of blocks */
-            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            /* Make sure that each byte points to a valid block */
-            for (i = 0; i < 256; i++) {
-                if (((unsigned char *)code)[i] >= arg)
-                    FAIL;
-            }
-            code += offset;
-            offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
-            if (code+offset < code || code+offset > end)
-                FAIL;
-            code += offset;
-            break;
-
-        case SRE_OP_CATEGORY:
-            GET_ARG;
-            switch (arg) {
-            case SRE_CATEGORY_DIGIT:
-            case SRE_CATEGORY_NOT_DIGIT:
-            case SRE_CATEGORY_SPACE:
-            case SRE_CATEGORY_NOT_SPACE:
-            case SRE_CATEGORY_WORD:
-            case SRE_CATEGORY_NOT_WORD:
-            case SRE_CATEGORY_LINEBREAK:
-            case SRE_CATEGORY_NOT_LINEBREAK:
-            case SRE_CATEGORY_LOC_WORD:
-            case SRE_CATEGORY_LOC_NOT_WORD:
-            case SRE_CATEGORY_UNI_DIGIT:
-            case SRE_CATEGORY_UNI_NOT_DIGIT:
-            case SRE_CATEGORY_UNI_SPACE:
-            case SRE_CATEGORY_UNI_NOT_SPACE:
-            case SRE_CATEGORY_UNI_WORD:
-            case SRE_CATEGORY_UNI_NOT_WORD:
-            case SRE_CATEGORY_UNI_LINEBREAK:
-            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-                break;
-            default:
-                FAIL;
-            }
-            break;
-
+
+    if (pattern_ptr[1] < 5 || tail_ptr > pattern_end)
+        return NULL;
+
+    /* are the repeat bounds valid? */
+    if (pattern_ptr[2] > pattern_ptr[3] || pattern_ptr[3] == 0)
+        return NULL;
+
+    /* check the body */
+    pattern_ptr += 4;
+
+    op = pattern_ptr[0];
+
+    if (op > SRE_MAXOP)
+        return NULL;
+
+    switch(sre_op_info[op].type) {
+    case SRE_TYPE_CATEGORY:
+        pattern_ptr = validate_category(pattern_ptr);
+        break;
+    case SRE_TYPE_CHARSET:
+        pattern_ptr = validate_charset(pattern_ptr, tail_ptr);
+        break;
+    case SRE_TYPE_LITERAL:
+        /* <LITERAL> <code> */
+        pattern_ptr += 2;
+        break;
+    case SRE_TYPE_RANGE:
+        pattern_ptr = validate_range(pattern_ptr);
+        break;
+    case SRE_TYPE_SET:
+        pattern_ptr = validate_set(pattern_ptr, tail_ptr);
+        break;
+    case SRE_TYPE_SIMPLE:
+        /* <SIMPLE> */
+        ++pattern_ptr;
+        break;
+    default:
+        return NULL;
+    }
+
+    if (pattern_ptr != tail_ptr)
+        return NULL;
+
+    return tail_ptr;
+}
+
+/*
+  validates a pattern
+
+  returns a pointer to the first codeword it doesn't understand or NULL if the
+  pattern is invalid
+ */
+SRE_CODE* validate_pattern(SRE_CODE* pattern_start, SRE_CODE* pattern_end,
+  SRE_ValidateInfo* validate_info) {
+    SRE_CODE* pattern_ptr = pattern_start;
+
+    while (pattern_ptr < pattern_end) {
+        SRE_CODE op = pattern_ptr[0];
+        VTRACE(("pattern_ptr is 0x%p, op is %d\n", pattern_ptr, op));
+
+        if (op > SRE_MAXOP)
+            return NULL;
+
+        switch(sre_op_info[op].type) {
+        case SRE_TYPE_ASSERT:
+            VTRACE(("TYPE_ASSERT\n"));
+            pattern_ptr = validate_assert(pattern_ptr, pattern_end,
+              validate_info);
+            break;
+        case SRE_TYPE_BRANCH:
+            VTRACE(("TYPE_BRANCH\n"));
+            pattern_ptr = validate_branch(pattern_ptr, pattern_end,
+              validate_info);
+            break;
+        case SRE_TYPE_CATEGORY:
+            VTRACE(("TYPE_CATEGORY\n"));
+            pattern_ptr = validate_category(pattern_ptr);
+            break;
+        case SRE_TYPE_CHARSET:
+            VTRACE(("TYPE_CHARSET\n"));
+            pattern_ptr = validate_charset(pattern_ptr, pattern_end);
+            break;
+        case SRE_TYPE_GROUPREF:
+            VTRACE(("TYPE_GROUPREF\n"));
+            pattern_ptr = validate_groupref(pattern_ptr, validate_info);
+            break;
+        case SRE_TYPE_GROUPREF_EXISTS:
+            VTRACE(("TYPE_GROUPREF_EXISTS\n"));
+            pattern_ptr = validate_groupref_exists(pattern_ptr, pattern_end,
+              validate_info);
+            break;
+        case SRE_TYPE_LITERAL:
+            /* <LITERAL> <code> */
+            VTRACE(("TYPE_LITERAL\n"));
+            pattern_ptr += 2;
+            break;
+        case SRE_TYPE_MARK:
+            VTRACE(("TYPE_MARK\n"));
+            pattern_ptr = validate_mark(pattern_ptr, validate_info);
+            break;
+        case SRE_TYPE_RANGE:
+            VTRACE(("TYPE_RANGE\n"));
+            pattern_ptr = validate_range(pattern_ptr);
+            break;
+        case SRE_TYPE_SET:
+            VTRACE(("TYPE_SET\n"));
+            pattern_ptr = validate_set(pattern_ptr, pattern_end);
+            break;
+        case SRE_TYPE_SIMPLE:
+            /* <SIMPLE> */
+            VTRACE(("TYPE_SIMPLE\n"));
+            ++pattern_ptr;
+            break;
+        case SRE_TYPE_REPEAT:
+            VTRACE(("TYPE_REPEAT\n"));
+            pattern_ptr = validate_repeat(pattern_ptr, pattern_end,
+              validate_info);
+            break;
+        case SRE_TYPE_REPEAT_ONE:
+            VTRACE(("TYPE_REPEAT_ONE\n"));
+            pattern_ptr = validate_repeat_one(pattern_ptr, pattern_end);
+            break;
         default:
-            FAIL;
-
+            VTRACE(("TYPE_UNKNOWN\n"));
+            return pattern_ptr;
         }
-    }
-
-    return 1;
-}
-
-static int
-_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
-{
-    /* Some variables are manipulated by the macros above */
-    SRE_CODE op;
-    SRE_CODE arg;
-    SRE_CODE skip;
-
-    VTRACE(("code=%p, end=%p\n", code, end));
-
-    if (code > end)
-        FAIL;
-
-    while (code < end) {
-        GET_OP;
-        switch (op) {
-
-        case SRE_OP_MARK:
-            /* We don't check whether marks are properly nested; the
-               sre_match() code is robust even if they don't, and the worst
-               you can get is nonsensical match results. */
-            GET_ARG;
-            if (arg > 2*groups+1) {
-                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
-                FAIL;
-            }
-            break;
-
-        case SRE_OP_LITERAL:
-        case SRE_OP_NOT_LITERAL:
-        case SRE_OP_LITERAL_IGNORE:
-        case SRE_OP_NOT_LITERAL_IGNORE:
-            GET_ARG;
-            /* The arg is just a character, nothing to check */
-            break;
-
-        case SRE_OP_SUCCESS:
-        case SRE_OP_FAILURE:
-            /* Nothing to check; these normally end the matching process */
-            break;
-
-        case SRE_OP_AT:
-            GET_ARG;
-            switch (arg) {
-            case SRE_AT_BEGINNING:
-            case SRE_AT_BEGINNING_STRING:
-            case SRE_AT_BEGINNING_LINE:
-            case SRE_AT_END:
-            case SRE_AT_END_LINE:
-            case SRE_AT_END_STRING:
-            case SRE_AT_BOUNDARY:
-            case SRE_AT_NON_BOUNDARY:
-            case SRE_AT_LOC_BOUNDARY:
-            case SRE_AT_LOC_NON_BOUNDARY:
-            case SRE_AT_UNI_BOUNDARY:
-            case SRE_AT_UNI_NON_BOUNDARY:
-                break;
-            default:
-                FAIL;
-            }
-            break;
-
-        case SRE_OP_ANY:
-        case SRE_OP_ANY_ALL:
-            /* These have no operands */
-            break;
-
-        case SRE_OP_IN:
-        case SRE_OP_IN_IGNORE:
-            GET_SKIP;
-            /* Stop 1 before the end; we check the FAILURE below */
-            if (!_validate_charset(code, code+skip-2))
-                FAIL;
-            if (code[skip-2] != SRE_OP_FAILURE)
-                FAIL;
-            code += skip-1;
-            break;
-
-        case SRE_OP_INFO:
-            {
-                /* A minimal info field is
-                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
-                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
-                   more follows. */
-                SRE_CODE flags, min, max, i;
-                SRE_CODE *newcode;
-                GET_SKIP;
-                newcode = code+skip-1;
-                GET_ARG; flags = arg;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                /* Check that only valid flags are present */
-                if ((flags & ~(SRE_INFO_PREFIX |
-                               SRE_INFO_LITERAL |
-                               SRE_INFO_CHARSET)) != 0)
-                    FAIL;
-                /* PREFIX and CHARSET are mutually exclusive */
-                if ((flags & SRE_INFO_PREFIX) &&
-                    (flags & SRE_INFO_CHARSET))
-                    FAIL;
-                /* LITERAL implies PREFIX */
-                if ((flags & SRE_INFO_LITERAL) &&
-                    !(flags & SRE_INFO_PREFIX))
-                    FAIL;
-                /* Validate the prefix */
-                if (flags & SRE_INFO_PREFIX) {
-                    SRE_CODE prefix_len, prefix_skip;
-                    GET_ARG; prefix_len = arg;
-                    GET_ARG; prefix_skip = arg;
-                    /* Here comes the prefix string */
-                    if (code+prefix_len < code || code+prefix_len > newcode)
-                        FAIL;
-                    code += prefix_len;
-                    /* And here comes the overlap table */
-                    if (code+prefix_len < code || code+prefix_len > newcode)
-                        FAIL;
-                    /* Each overlap value should be < prefix_len */
-                    for (i = 0; i < prefix_len; i++) {
-                        if (code[i] >= prefix_len)
-                            FAIL;
-                    }
-                    code += prefix_len;
-                }
-                /* Validate the charset */
-                if (flags & SRE_INFO_CHARSET) {
-                    if (!_validate_charset(code, newcode-1))
-                        FAIL;
-                    if (newcode[-1] != SRE_OP_FAILURE)
-                        FAIL;
-                    code = newcode;
-                }
-                else if (code != newcode) {
-                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
-                    FAIL;
-                }
-            }
-            break;
-
-        case SRE_OP_BRANCH:
-            {
-                SRE_CODE *target = NULL;
-                for (;;) {
-                    GET_SKIP;
-                    if (skip == 0)
-                        break;
-                    /* Stop 2 before the end; we check the JUMP below */
-                    if (!_validate_inner(code, code+skip-3, groups))
-                        FAIL;
-                    code += skip-3;
-                    /* Check that it ends with a JUMP, and that each JUMP
-                       has the same target */
-                    GET_OP;
-                    if (op != SRE_OP_JUMP)
-                        FAIL;
-                    GET_SKIP;
-                    if (target == NULL)
-                        target = code+skip-1;
-                    else if (code+skip-1 != target)
-                        FAIL;
-                }
-            }
-            break;
-
-        case SRE_OP_REPEAT_ONE:
-        case SRE_OP_MIN_REPEAT_ONE:
-            {
-                SRE_CODE min, max;
-                GET_SKIP;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                if (min > max)
-                    FAIL;
-#ifdef Py_UNICODE_WIDE
-                if (max > 65535)
-                    FAIL;
-#endif
-                if (!_validate_inner(code, code+skip-4, groups))
-                    FAIL;
-                code += skip-4;
-                GET_OP;
-                if (op != SRE_OP_SUCCESS)
-                    FAIL;
-            }
-            break;
-
-        case SRE_OP_REPEAT:
-            {
-                SRE_CODE min, max;
-                GET_SKIP;
-                GET_ARG; min = arg;
-                GET_ARG; max = arg;
-                if (min > max)
-                    FAIL;
-#ifdef Py_UNICODE_WIDE
-                if (max > 65535)
-                    FAIL;
-#endif
-                if (!_validate_inner(code, code+skip-3, groups))
-                    FAIL;
-                code += skip-3;
-                GET_OP;
-                if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
-                    FAIL;
-            }
-            break;
-
-        case SRE_OP_GROUPREF:
-        case SRE_OP_GROUPREF_IGNORE:
-            GET_ARG;
-            if (arg >= groups)
-                FAIL;
-            break;
-
-        case SRE_OP_GROUPREF_EXISTS:
-            /* The regex syntax for this is: '(?(group)then|else)', where
-               'group' is either an integer group number or a group name,
-               'then' and 'else' are sub-regexes, and 'else' is optional. */
-            GET_ARG;
-            if (arg >= groups)
-                FAIL;
-            GET_SKIP_ADJ(1);
-            code--; /* The skip is relative to the first arg! */
-            /* There are two possibilities here: if there is both a 'then'
-               part and an 'else' part, the generated code looks like:
-
-               GROUPREF_EXISTS
-               <group>
-               <skipyes>
-               ...then part...
-               JUMP
-               <skipno>
-               (<skipyes> jumps here)
-               ...else part...
-               (<skipno> jumps here)
-
-               If there is only a 'then' part, it looks like:
-
-               GROUPREF_EXISTS
-               <group>
-               <skip>
-               ...then part...
-               (<skip> jumps here)
-
-               There is no direct way to decide which it is, and we don't want
-               to allow arbitrary jumps anywhere in the code; so we just look
-               for a JUMP opcode preceding our skip target.
-            */
-            if (skip >= 3 && code+skip-3 >= code &&
-                code[skip-3] == SRE_OP_JUMP)
-            {
-                VTRACE(("both then and else parts present\n"));
-                if (!_validate_inner(code+1, code+skip-3, groups))
-                    FAIL;
-                code += skip-2; /* Position after JUMP, at <skipno> */
-                GET_SKIP;
-                if (!_validate_inner(code, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
-            }
-            else {
-                VTRACE(("only a then part present\n"));
-                if (!_validate_inner(code+1, code+skip-1, groups))
-                    FAIL;
-                code += skip-1;
-            }
-            break;
-
-        case SRE_OP_ASSERT:
-        case SRE_OP_ASSERT_NOT:
-            GET_SKIP;
-            GET_ARG; /* 0 for lookahead, width for lookbehind */
-            code--; /* Back up over arg to simplify math below */
-            if (arg & 0x80000000)
-                FAIL; /* Width too large */
-            /* Stop 1 before the end; we check the SUCCESS below */
-            if (!_validate_inner(code+1, code+skip-2, groups))
-                FAIL;
-            code += skip-2;
-            GET_OP;
-            if (op != SRE_OP_SUCCESS)
-                FAIL;
-            break;
-
-        default:
-            FAIL;
-
-        }
-    }
-
-    VTRACE(("okay\n"));
-    return 1;
-}
-
-static int
-_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
-{
-    if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
-        FAIL;
-    if (groups == 0)  /* fix for simplejson */
-        groups = 100; /* 100 groups should always be safe */
-    return _validate_inner(code, end-1, groups);
+
+        if (pattern_ptr == NULL)
+            return NULL;
+    }
+
+    if (pattern_ptr > pattern_end)
+        return NULL;
+
+    return pattern_ptr;
 }
 
 static int
 _validate(PatternObject *self)
 {
-    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
-    {
-        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
-        return 0;
-    }
-    else
-        VTRACE(("Success!\n"));
-    return 1;
+    SRE_CODE* pattern_start;
+    SRE_CODE* pattern_end;
+    SRE_CODE* pattern_ptr;
+    SRE_ValidateInfo validate_info;
+
+    /* is there any code at all? */
+    if (self->codesize < 1)
+        goto error;
+
+    pattern_start = self->code;
+    pattern_end = pattern_start + self->codesize;
+    //printf("self->codesize is %d\n", self->codesize);
+
+    validate_info.mark_count = 0;
+    validate_info.min_mark = 0x7FFF;
+    validate_info.max_mark = 0;
+    validate_info.max_group_ref = -1;
+
+    /*
+      check the pattern
+
+      it'll return a pointer to the first codeword it doesn't understand, which
+      should be SUCCESS, or NULL if the pattern is invalid
+     */
+    VTRACE(("pattern_start is 0x%p, pattern_end is 0x%p\n", pattern_start,
+      pattern_end));
+    pattern_ptr = validate_pattern(pattern_start, pattern_end, &validate_info);
+    VTRACE(("pattern_ptr is 0x%p\n", pattern_ptr));
+    if (pattern_ptr != pattern_end - 1 || pattern_ptr[0] != SRE_OP_SUCCESS)
+        goto error;
+
+    /* there should be an even number of marks */
+    if (validate_info.mark_count % 2 != 0)
+        goto error;
+    VTRACE(("mark_count is %d\n", validate_info.mark_count));
+    if (validate_info.mark_count > 0) {
+        VTRACE(("min_mark is %d, max_mark is %d\n", validate_info.min_mark,
+          validate_info.max_mark));
+        if (validate_info.min_mark != 0 || validate_info.max_mark % 2 != 1)
+            goto error;
+    }
+
+    self->groups = validate_info.mark_count / 2;
+    if (validate_info.max_group_ref >= self->groups)
+        goto error;
+
+    VTRACE(("self->groups is %d\n", self->groups));
+
+    VTRACE(("Success!\n"));
+    return TRUE;
+
+error:
+    PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
+    return FALSE;
 }
 
 /* -------------------------------------------------------------------- */
@@ -3449,8 +4557,7 @@
     PyTuple_SET_ITEM(pair, 1, item);
 
     return pair;
-
-  error:
+error:
     Py_DECREF(pair);
     return NULL;
 }
@@ -3677,11 +4784,11 @@
 
         /* fill in group slices */
 
-        base = (char*) state->beginning;
+        base = (char*) state->text_start;
         n = state->charsize;
 
-        match->mark[0] = ((char*) state->start - base) / n;
-        match->mark[1] = ((char*) state->ptr - base) / n;
+        match->mark[0] = ((char*) state->slice_start - base) / n;
+        match->mark[1] = ((char*) state->text_ptr - base) / n;
 
         for (i = j = 0; i < pattern->groups; i++, j+=2)
             if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
@@ -3729,9 +4836,7 @@
     PyObject* match;
     int status;
 
-    state_reset(state);
-
-    state->ptr = state->start;
+    state->text_ptr = state->slice_start;
 
     if (state->charsize == 1) {
         status = sre_match(state, PatternObject_GetCode(self->pattern));
@@ -3746,10 +4851,11 @@
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
-        state->start = (void*) ((char*) state->ptr + state->charsize);
+    if (status == 0 || state->text_ptr == state->slice_start)
+        state->slice_start = (void*) ((char*) state->text_ptr +
+          state->charsize);
     else
-        state->start = state->ptr;
+        state->slice_start = state->text_ptr;
 
     return match;
 }
@@ -3762,9 +4868,7 @@
     PyObject* match;
     int status;
 
-    state_reset(state);
-
-    state->ptr = state->start;
+    state->text_ptr = state->slice_start;
 
     if (state->charsize == 1) {
         status = sre_search(state, PatternObject_GetCode(self->pattern));
@@ -3779,10 +4883,11 @@
     match = pattern_new_match((PatternObject*) self->pattern,
                                state, status);
 
-    if (status == 0 || state->ptr == state->start)
-        state->start = (void*) ((char*) state->ptr + state->charsize);
+    if (status == 0 || state->text_ptr == state->slice_start)
+        state->slice_start = (void*) ((char*) state->text_ptr +
+          state->charsize);
     else
-        state->start = state->ptr;
+        state->slice_start = state->text_ptr;
 
     return match;
 }
@@ -3876,7 +4981,7 @@
 
     m = Py_InitModule("_" SRE_MODULE, _functions);
     if (m == NULL)
-    	return;
+        return;
     d = PyModule_GetDict(m);
 
     x = PyInt_FromLong(SRE_MAGIC);
=== modified file Modules/sre.h
--- Modules/sre.h 2006-06-12 03:05:40 +0000
+++ Modules/sre.h 2009-04-10 23:57:07 +0000
@@ -12,14 +12,6 @@
 #define SRE_INCLUDED
 
 #include "sre_constants.h"
-
-/* size of a code word (must be unsigned short or larger, and
-   large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
-#define SRE_CODE Py_UCS4
-#else
-#define SRE_CODE unsigned short
-#endif
 
 typedef struct {
     PyObject_VAR_HEAD
@@ -48,24 +40,76 @@
     Py_ssize_t mark[1];
 } MatchObject;
 
-typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
+typedef enum {FALSE, TRUE} BOOL;
 
-/* FIXME: <fl> shouldn't be a constant, really... */
-#define SRE_MARK_SIZE 200
+/* handlers for ascii, locale and unicode */
+typedef struct {
+    BOOL (*in_category)(SRE_CODE category, SRE_CODE ch);
+    BOOL (*lower)(SRE_CODE ch);
+    BOOL (*upper)(SRE_CODE ch);
+    BOOL (*title)(SRE_CODE ch);
+} SRE_EncodingTable;
 
-typedef struct SRE_REPEAT_T {
-    Py_ssize_t count;
-    SRE_CODE* pattern; /* points to REPEAT operator arguments */
-    void* last_ptr; /* helper to check for infinite loops */
-    struct SRE_REPEAT_T *prev; /* points to previous repeat context */
-} SRE_REPEAT;
+#define SRE_MARK_CHUNK_SIZE 1024
+
+/* storage for marks */
+typedef struct SRE_MarkChunk {
+    struct SRE_MarkChunk* previous;
+    struct SRE_MarkChunk* next;
+    int count;
+    void* items[SRE_MARK_CHUNK_SIZE];
+} SRE_MarkChunk;
+
+/* storage for backtracking points */
+typedef struct SRE_BacktrackItem {
+    SRE_CODE op;
+    int mark_count;
+    union {
+        struct {
+            void* slice_start;
+            void* slice_end;
+            void* text_ptr;
+            SRE_CODE* pattern_ptr;
+        } assert;
+        struct {
+            void* text_ptr;
+            SRE_CODE* skip_ptr;
+        } branch;
+        struct {
+            SRE_CODE id;
+            void* text_ptr;
+            Py_ssize_t last_index;
+        } mark;
+        struct {
+            unsigned int min;
+            unsigned int max;
+            unsigned int count;
+            void* start_ptr; 
+            void* text_ptr;
+            SRE_CODE* pattern_ptr;
+            struct SRE_BacktrackItem* repeat;
+        } repeat;
+    };
+} SRE_BacktrackItem;
+
+#define SRE_BACKTRACK_CHUNK_SIZE 1024
+
+/* chunks of backtracking points */
+typedef struct SRE_BacktrackChunk {
+    struct SRE_BacktrackChunk* previous;
+    struct SRE_BacktrackChunk* next;
+    int count;
+    SRE_BacktrackItem items[SRE_BACKTRACK_CHUNK_SIZE];
+} SRE_BacktrackChunk;
 
 typedef struct {
     /* string pointers */
-    void* ptr; /* current position (also end of current slice) */
-    void* beginning; /* start of original string */
-    void* start; /* start of current slice */
-    void* end; /* end of original string */
+    void* text_start; /* start of original string */
+    void* text_end; /* end of original string */
+    void* slice_start; /* start of current slice */
+    void* slice_end; /* end of current slice */
+    void* text_ptr; /* current position in the text */
+    void* final_linebreak;
     /* attributes for the match object */
     PyObject* string;
     Py_ssize_t pos, endpos;
@@ -74,15 +118,16 @@
     /* registers */
     Py_ssize_t lastindex;
     Py_ssize_t lastmark;
-    void* mark[SRE_MARK_SIZE];
+    SRE_CODE* pattern_ptr;
     /* dynamically allocated stuff */
-    char* data_stack;
-    size_t data_stack_size;
-    size_t data_stack_base;
-    /* current repeat context */
-    SRE_REPEAT *repeat;
-    /* hooks */
-    SRE_TOLOWER_HOOK lower;
+    SRE_EncodingTable* encoding;
+    SRE_BacktrackChunk* first_backtrack_chunk;
+    SRE_BacktrackChunk* backtrack_chunk;
+    SRE_BacktrackItem* backtrack_item;
+    SRE_MarkChunk* first_mark_chunk;
+    SRE_MarkChunk* mark_chunk;
+    int mark_count;
+    void** mark;
 } SRE_STATE;
 
 typedef struct {
=== modified file Modules/sre_constants.h
--- Modules/sre_constants.h 2003-10-17 22:13:16 +0000
+++ Modules/sre_constants.h 2009-04-16 14:52:44 +0000
@@ -11,69 +11,69 @@
  * See the _sre.c file for information on usage and redistribution.
  */
 
-#define SRE_MAGIC 20031017
+#define SRE_MAGIC 20090329
+
+/* size of a code word (must be unsigned short or larger, and
+   large enough to hold a Py_UNICODE character) */
+
+#define SRE_CODE Py_UCS4
+#define SRE_MAXREPEAT 0xFFFFFFFF
+
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2
 #define SRE_OP_ANY_ALL 3
 #define SRE_OP_ASSERT 4
 #define SRE_OP_ASSERT_NOT 5
-#define SRE_OP_AT 6
-#define SRE_OP_BRANCH 7
-#define SRE_OP_CALL 8
-#define SRE_OP_CATEGORY 9
-#define SRE_OP_CHARSET 10
-#define SRE_OP_BIGCHARSET 11
-#define SRE_OP_GROUPREF 12
-#define SRE_OP_GROUPREF_EXISTS 13
-#define SRE_OP_GROUPREF_IGNORE 14
-#define SRE_OP_IN 15
-#define SRE_OP_IN_IGNORE 16
-#define SRE_OP_INFO 17
-#define SRE_OP_JUMP 18
-#define SRE_OP_LITERAL 19
-#define SRE_OP_LITERAL_IGNORE 20
-#define SRE_OP_MARK 21
-#define SRE_OP_MAX_UNTIL 22
-#define SRE_OP_MIN_UNTIL 23
-#define SRE_OP_NOT_LITERAL 24
-#define SRE_OP_NOT_LITERAL_IGNORE 25
-#define SRE_OP_NEGATE 26
-#define SRE_OP_RANGE 27
-#define SRE_OP_REPEAT 28
-#define SRE_OP_REPEAT_ONE 29
-#define SRE_OP_SUBPATTERN 30
-#define SRE_OP_MIN_REPEAT_ONE 31
-#define SRE_AT_BEGINNING 0
-#define SRE_AT_BEGINNING_LINE 1
-#define SRE_AT_BEGINNING_STRING 2
-#define SRE_AT_BOUNDARY 3
-#define SRE_AT_NON_BOUNDARY 4
-#define SRE_AT_END 5
-#define SRE_AT_END_LINE 6
-#define SRE_AT_END_STRING 7
-#define SRE_AT_LOC_BOUNDARY 8
-#define SRE_AT_LOC_NON_BOUNDARY 9
-#define SRE_AT_UNI_BOUNDARY 10
-#define SRE_AT_UNI_NON_BOUNDARY 11
-#define SRE_CATEGORY_DIGIT 0
-#define SRE_CATEGORY_NOT_DIGIT 1
-#define SRE_CATEGORY_SPACE 2
-#define SRE_CATEGORY_NOT_SPACE 3
-#define SRE_CATEGORY_WORD 4
-#define SRE_CATEGORY_NOT_WORD 5
-#define SRE_CATEGORY_LINEBREAK 6
-#define SRE_CATEGORY_NOT_LINEBREAK 7
-#define SRE_CATEGORY_LOC_WORD 8
-#define SRE_CATEGORY_LOC_NOT_WORD 9
-#define SRE_CATEGORY_UNI_DIGIT 10
-#define SRE_CATEGORY_UNI_NOT_DIGIT 11
-#define SRE_CATEGORY_UNI_SPACE 12
-#define SRE_CATEGORY_UNI_NOT_SPACE 13
-#define SRE_CATEGORY_UNI_WORD 14
-#define SRE_CATEGORY_UNI_NOT_WORD 15
-#define SRE_CATEGORY_UNI_LINEBREAK 16
-#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17
+#define SRE_OP_BEGINNING_LINE 6
+#define SRE_OP_BEGINNING_STRING 7
+#define SRE_OP_BOUNDARY 8
+#define SRE_OP_BRANCH 9
+#define SRE_OP_CATEGORY 10
+#define SRE_OP_CHARSET 11
+#define SRE_OP_CHARSET_IGNORE 12
+#define SRE_OP_END_ASSERT 13
+#define SRE_OP_END_ASSERT_NOT 14
+#define SRE_OP_END_LINE 15
+#define SRE_OP_END_MAX_REPEAT 16
+#define SRE_OP_END_MIN_REPEAT 17
+#define SRE_OP_END_STRING 18
+#define SRE_OP_END_STRING_LINE 19
+#define SRE_OP_GROUPREF 20
+#define SRE_OP_GROUPREF_EXISTS 21
+#define SRE_OP_GROUPREF_IGNORE 22
+#define SRE_OP_JUMP 23
+#define SRE_OP_LITERAL 24
+#define SRE_OP_LITERAL_IGNORE 25
+#define SRE_OP_MARK 26
+#define SRE_OP_MAX_REPEAT 27
+#define SRE_OP_MAX_REPEAT_ONE 28
+#define SRE_OP_MIN_REPEAT 29
+#define SRE_OP_MIN_REPEAT_ONE 30
+#define SRE_OP_NOT_BOUNDARY 31
+#define SRE_OP_NOT_CATEGORY 32
+#define SRE_OP_NOT_CHARSET 33
+#define SRE_OP_NOT_CHARSET_IGNORE 34
+#define SRE_OP_NOT_LITERAL 35
+#define SRE_OP_NOT_LITERAL_IGNORE 36
+#define SRE_OP_NOT_RANGE 37
+#define SRE_OP_NOT_RANGE_IGNORE 38
+#define SRE_OP_NOT_SET 39
+#define SRE_OP_NOT_SET_IGNORE 40
+#define SRE_OP_RANGE 41
+#define SRE_OP_RANGE_IGNORE 42
+#define SRE_OP_SET 43
+#define SRE_OP_SET_IGNORE 44
+#define SRE_MAXOP 44
+
+#define SRE_CAT_ALPHA 0
+#define SRE_CAT_ALNUM 1
+#define SRE_CAT_DIGIT 2
+#define SRE_CAT_LINEBREAK 3
+#define SRE_CAT_SPACE 4
+#define SRE_CAT_WORD 5
+#define SRE_MAXCAT 5
+
 #define SRE_FLAG_TEMPLATE 1
 #define SRE_FLAG_IGNORECASE 2
 #define SRE_FLAG_LOCALE 4
@@ -81,6 +81,70 @@
 #define SRE_FLAG_DOTALL 16
 #define SRE_FLAG_UNICODE 32
 #define SRE_FLAG_VERBOSE 64
-#define SRE_INFO_PREFIX 1
-#define SRE_INFO_LITERAL 2
-#define SRE_INFO_CHARSET 4
+
+#define SRE_TYPE_ASSERT 1
+#define SRE_TYPE_BRANCH 2
+#define SRE_TYPE_CATEGORY 3
+#define SRE_TYPE_CHARSET 4
+#define SRE_TYPE_GROUPREF 5
+#define SRE_TYPE_GROUPREF_EXISTS 6
+#define SRE_TYPE_LITERAL 7
+#define SRE_TYPE_MARK 8
+#define SRE_TYPE_RANGE 9
+#define SRE_TYPE_REPEAT 10
+#define SRE_TYPE_REPEAT_ONE 11
+#define SRE_TYPE_SET 12
+#define SRE_TYPE_SIMPLE 13
+
+typedef struct {
+    int type;
+    SRE_CODE terminator;
+} SRE_OP_INFO;
+
+SRE_OP_INFO sre_op_info[] = {
+    {0, 0}, /* FAILURE */
+    {0, 0}, /* SUCCESS */
+    {13, 0}, /* ANY */
+    {13, 0}, /* ANY_ALL */
+    {1, 13}, /* ASSERT */
+    {1, 14}, /* ASSERT_NOT */
+    {13, 0}, /* BEGINNING_LINE */
+    {13, 0}, /* BEGINNING_STRING */
+    {13, 0}, /* BOUNDARY */
+    {2, 0}, /* BRANCH */
+    {3, 0}, /* CATEGORY */
+    {4, 0}, /* CHARSET */
+    {4, 0}, /* CHARSET_IGNORE */
+    {0, 0}, /* END_ASSERT */
+    {0, 0}, /* END_ASSERT_NOT */
+    {13, 0}, /* END_LINE */
+    {0, 0}, /* END_MAX_REPEAT */
+    {0, 0}, /* END_MIN_REPEAT */
+    {13, 0}, /* END_STRING */
+    {13, 0}, /* END_STRING_LINE */
+    {5, 0}, /* GROUPREF */
+    {6, 0}, /* GROUPREF_EXISTS */
+    {5, 0}, /* GROUPREF_IGNORE */
+    {0, 0}, /* JUMP */
+    {7, 0}, /* LITERAL */
+    {7, 0}, /* LITERAL_IGNORE */
+    {8, 0}, /* MARK */
+    {10, 16}, /* MAX_REPEAT */
+    {11, 0}, /* MAX_REPEAT_ONE */
+    {10, 17}, /* MIN_REPEAT */
+    {11, 0}, /* MIN_REPEAT_ONE */
+    {13, 0}, /* NOT_BOUNDARY */
+    {3, 0}, /* NOT_CATEGORY */
+    {4, 0}, /* NOT_CHARSET */
+    {4, 0}, /* NOT_CHARSET_IGNORE */
+    {7, 0}, /* NOT_LITERAL */
+    {7, 0}, /* NOT_LITERAL_IGNORE */
+    {9, 0}, /* NOT_RANGE */
+    {9, 0}, /* NOT_RANGE_IGNORE */
+    {12, 0}, /* NOT_SET */
+    {12, 0}, /* NOT_SET_IGNORE */
+    {9, 0}, /* RANGE */
+    {9, 0}, /* RANGE_IGNORE */
+    {12, 0}, /* SET */
+    {12, 0}, /* SET_IGNORE */
+};