# HG changeset patch # User Andrew Kuchling # Date 1262051752 21600 # Branch py3k # Node ID 86ec1d04c8b157e345dc0123478a14fbbd513974 # Parent 61b94494604e4873a00fb187f3b2337b94904a40 [mq]: re-computed-goto diff -r 61b94494604e -r 86ec1d04c8b1 Modules/_sre.c --- a/Modules/_sre.c Thu Dec 24 15:21:55 2009 +0100 +++ b/Modules/_sre.c Mon Dec 28 19:55:52 2009 -0600 @@ -819,6 +819,20 @@ ctx->pattern = pattern; ctx_pos = alloc_pos; +#define SIGCHECK() ++sigcount; \ + if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) \ + RETURN_ERROR(SRE_ERROR_INTERRUPTED); + +#ifdef USE_COMPUTED_GOTOS +# include "re_opcodes.h" +# define TARGET(op) RE_TARGET_##op: case op: + +# define DISPATCH() SIGCHECK(); goto *re_opcode_targets[*ctx->pattern++]; +#else +# define TARGET(op) case op: +# define DISPATCH() break +#endif + entrance: ctx->ptr = (SRE_CHAR *)state->ptr; @@ -835,13 +849,11 @@ } for (;;) { - ++sigcount; - if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) - RETURN_ERROR(SRE_ERROR_INTERRUPTED); + SIGCHECK(); switch (*ctx->pattern++) { - case SRE_OP_MARK: + TARGET(SRE_OP_MARK) /* set mark */ /* */ TRACE(("|%p|%p|MARK %d\n", ctx->pattern, @@ -861,9 +873,9 @@ } state->mark[i] = ctx->ptr; ctx->pattern++; - break; - - case SRE_OP_LITERAL: + DISPATCH(); + + TARGET(SRE_OP_LITERAL) /* match literal string */ /* */ TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, @@ -872,9 +884,9 @@ RETURN_FAILURE; ctx->pattern++; ctx->ptr++; - break; - - case SRE_OP_NOT_LITERAL: + DISPATCH(); + + TARGET(SRE_OP_NOT_LITERAL) /* match anything that is not literal character */ /* */ TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, @@ -883,24 +895,24 @@ RETURN_FAILURE; ctx->pattern++; ctx->ptr++; - break; - - case SRE_OP_SUCCESS: + DISPATCH(); + + TARGET(SRE_OP_SUCCESS) /* end of pattern */ TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); state->ptr = ctx->ptr; RETURN_SUCCESS; - case SRE_OP_AT: + TARGET(SRE_OP_AT) /* match at given position */ /* */ TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); if (!SRE_AT(state, ctx->ptr, *ctx->pattern)) RETURN_FAILURE; ctx->pattern++; - break; - - case SRE_OP_CATEGORY: + DISPATCH(); + + TARGET(SRE_OP_CATEGORY) /* match at given category */ /* */ TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, @@ -909,27 +921,27 @@ RETURN_FAILURE; ctx->pattern++; ctx->ptr++; - break; - - case SRE_OP_ANY: + DISPATCH(); + + TARGET(SRE_OP_ANY) /* match anything (except a newline) */ /* */ TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) RETURN_FAILURE; ctx->ptr++; - break; - - case SRE_OP_ANY_ALL: + DISPATCH(); + + TARGET(SRE_OP_ANY_ALL) /* match anything */ /* */ TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); if (ctx->ptr >= end) RETURN_FAILURE; ctx->ptr++; - break; - - case SRE_OP_IN: + DISPATCH(); + + TARGET(SRE_OP_IN) /* match set member (or non_member) */ /* */ TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); @@ -937,9 +949,9 @@ RETURN_FAILURE; ctx->pattern += ctx->pattern[0]; ctx->ptr++; - break; - - case SRE_OP_LITERAL_IGNORE: + DISPATCH(); + + TARGET(SRE_OP_LITERAL_IGNORE) TRACE(("|%p|%p|LITERAL_IGNORE %d\n", ctx->pattern, ctx->ptr, ctx->pattern[0])); if (ctx->ptr >= end || @@ -947,9 +959,9 @@ RETURN_FAILURE; ctx->pattern++; ctx->ptr++; - break; - - case SRE_OP_NOT_LITERAL_IGNORE: + DISPATCH(); + + TARGET(SRE_OP_NOT_LITERAL_IGNORE) TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); if (ctx->ptr >= end || @@ -957,9 +969,9 @@ RETURN_FAILURE; ctx->pattern++; ctx->ptr++; - break; - - case SRE_OP_IN_IGNORE: + DISPATCH(); + + TARGET(SRE_OP_IN_IGNORE) TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern+1, @@ -967,18 +979,18 @@ RETURN_FAILURE; ctx->pattern += ctx->pattern[0]; ctx->ptr++; - break; - - case SRE_OP_JUMP: - case SRE_OP_INFO: + DISPATCH(); + + TARGET(SRE_OP_JUMP) + TARGET(SRE_OP_INFO) /* jump forward */ /* */ TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, ctx->ptr, ctx->pattern[0])); ctx->pattern += ctx->pattern[0]; - break; - - case SRE_OP_BRANCH: + DISPATCH(); + + TARGET(SRE_OP_BRANCH) /* alternation */ /* <0=skip> code ... */ TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); @@ -1011,7 +1023,7 @@ MARK_POP_DISCARD(ctx->lastmark); RETURN_FAILURE; - case SRE_OP_REPEAT_ONE: + TARGET(SRE_OP_REPEAT_ONE) /* match repeated sequence (maximizing regexp) */ /* this operator only works if the repeated item is @@ -1094,7 +1106,7 @@ } RETURN_FAILURE; - case SRE_OP_MIN_REPEAT_ONE: + TARGET(SRE_OP_MIN_REPEAT_ONE) /* match repeated sequence (minimizing regexp) */ /* this operator only works if the repeated item is @@ -1158,7 +1170,7 @@ } RETURN_FAILURE; - case SRE_OP_REPEAT: + TARGET(SRE_OP_REPEAT) /* create repeat context. all the hard work is done by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ /* <1=min> <2=max> item tail */ @@ -1188,7 +1200,7 @@ } RETURN_FAILURE; - case SRE_OP_MAX_UNTIL: + TARGET(SRE_OP_MAX_UNTIL) /* maximizing repeat */ /* <1=min> <2=max> item tail */ @@ -1254,7 +1266,7 @@ state->ptr = ctx->ptr; RETURN_FAILURE; - case SRE_OP_MIN_UNTIL: + TARGET(SRE_OP_MIN_UNTIL) /* minimizing repeat */ /* <1=min> <2=max> item tail */ @@ -1313,7 +1325,7 @@ state->ptr = ctx->ptr; RETURN_FAILURE; - case SRE_OP_GROUPREF: + TARGET(SRE_OP_GROUPREF) /* match backreference */ TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, ctx->ptr, ctx->pattern[0])); @@ -1335,9 +1347,9 @@ } } ctx->pattern++; - break; - - case SRE_OP_GROUPREF_IGNORE: + DISPATCH(); + + TARGET(SRE_OP_GROUPREF_IGNORE) /* match backreference */ TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, ctx->ptr, ctx->pattern[0])); @@ -1360,9 +1372,9 @@ } } ctx->pattern++; - break; - - case SRE_OP_GROUPREF_EXISTS: + DISPATCH(); + + TARGET(SRE_OP_GROUPREF_EXISTS) TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, ctx->ptr, ctx->pattern[0])); /* codeyes codeno ... */ @@ -1382,9 +1394,9 @@ } } ctx->pattern += 2; - break; - - case SRE_OP_ASSERT: + DISPATCH(); + + TARGET(SRE_OP_ASSERT) /* assert subpattern */ /* */ TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, @@ -1395,9 +1407,9 @@ DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2); RETURN_ON_FAILURE(ret); ctx->pattern += ctx->pattern[0]; - break; - - case SRE_OP_ASSERT_NOT: + DISPATCH(); + + TARGET(SRE_OP_ASSERT_NOT) /* assert not subpattern */ /* */ TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, @@ -1411,13 +1423,16 @@ } } ctx->pattern += ctx->pattern[0]; - break; - - case SRE_OP_FAILURE: + DISPATCH(); + + TARGET(SRE_OP_FAILURE) /* immediate failure */ TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); RETURN_FAILURE; +#ifdef USE_COMPUTED_GOTOS + _unknown_opcode: +#endif default: TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, ctx->pattern[-1])); diff -r 61b94494604e -r 86ec1d04c8b1 Modules/makereopcodes.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Modules/makereopcodes.py Mon Dec 28 19:55:52 2009 -0600 @@ -0,0 +1,32 @@ + +import re + +opcode_pat = re.compile('#define SRE_OP_(.*)\s+(\d+)') + +def main(): + mnem2int = {} + for line in open('sre_constants.h'): + m = opcode_pat.match(line) + if m: + mnem2int[m.group(1)] = int(m.group(2)) + + # Delete some opcodes that are not used in the main RE engine loop. + for k in ('SUBPATTERN', 'RANGE', 'NEGATE', 'CHARSET', 'BIGCHARSET', + 'CALL'): + del mnem2int[k] + + # Output table + max_opcode = max(mnem2int.values()) + int2mnem = dict((v,k) for (k,v) in mnem2int.items()) + assert len(int2mnem) == len(mnem2int), "Opcode codes are not unique" + + print 'static void *re_opcode_targets[%i] = {' % (max_opcode+2) + for i in range(0, max_opcode+1): + if i in int2mnem: + print ' &&RE_TARGET_SRE_OP_%s,' % (int2mnem[i]) + else: + print ' &&_unknown_opcode,' + print '};' + +if __name__ == '__main__': + main() diff -r 61b94494604e -r 86ec1d04c8b1 Modules/re_opcodes.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Modules/re_opcodes.h Mon Dec 28 19:55:52 2009 -0600 @@ -0,0 +1,34 @@ +static void *re_opcode_targets[33] = { + &&RE_TARGET_SRE_OP_FAILURE, + &&RE_TARGET_SRE_OP_SUCCESS, + &&RE_TARGET_SRE_OP_ANY, + &&RE_TARGET_SRE_OP_ANY_ALL, + &&RE_TARGET_SRE_OP_ASSERT, + &&RE_TARGET_SRE_OP_ASSERT_NOT, + &&RE_TARGET_SRE_OP_AT, + &&RE_TARGET_SRE_OP_BRANCH, + &&_unknown_opcode, + &&RE_TARGET_SRE_OP_CATEGORY, + &&_unknown_opcode, + &&_unknown_opcode, + &&RE_TARGET_SRE_OP_GROUPREF, + &&RE_TARGET_SRE_OP_GROUPREF_EXISTS, + &&RE_TARGET_SRE_OP_GROUPREF_IGNORE, + &&RE_TARGET_SRE_OP_IN, + &&RE_TARGET_SRE_OP_IN_IGNORE, + &&RE_TARGET_SRE_OP_INFO, + &&RE_TARGET_SRE_OP_JUMP, + &&RE_TARGET_SRE_OP_LITERAL, + &&RE_TARGET_SRE_OP_LITERAL_IGNORE, + &&RE_TARGET_SRE_OP_MARK, + &&RE_TARGET_SRE_OP_MAX_UNTIL, + &&RE_TARGET_SRE_OP_MIN_UNTIL, + &&RE_TARGET_SRE_OP_NOT_LITERAL, + &&RE_TARGET_SRE_OP_NOT_LITERAL_IGNORE, + &&_unknown_opcode, + &&_unknown_opcode, + &&RE_TARGET_SRE_OP_REPEAT, + &&RE_TARGET_SRE_OP_REPEAT_ONE, + &&_unknown_opcode, + &&RE_TARGET_SRE_OP_MIN_REPEAT_ONE, +};