Index: Modules/sre_constants.h =================================================================== --- Modules/sre_constants.h (revision 62337) +++ Modules/sre_constants.h (working copy) @@ -12,6 +12,7 @@ */ #define SRE_MAGIC 20031017 +#define SRE_MAX_REPEAT 65535 #define SRE_OP_FAILURE 0 #define SRE_OP_SUCCESS 1 #define SRE_OP_ANY 2 @@ -44,6 +45,10 @@ #define SRE_OP_REPEAT_ONE 29 #define SRE_OP_SUBPATTERN 30 #define SRE_OP_MIN_REPEAT_ONE 31 +#define SRE_OP_ATOMIC_START 32 +#define SRE_OP_ATOMIC_END 33 +#define SRE_OP_POSSESSIVE 34 +#define SRE_OP__COUNT 35 #define SRE_AT_BEGINNING 0 #define SRE_AT_BEGINNING_LINE 1 #define SRE_AT_BEGINNING_STRING 2 @@ -56,6 +61,7 @@ #define SRE_AT_LOC_NON_BOUNDARY 9 #define SRE_AT_UNI_BOUNDARY 10 #define SRE_AT_UNI_NON_BOUNDARY 11 +#define SRE_AT__COUNT 12 #define SRE_CATEGORY_DIGIT 0 #define SRE_CATEGORY_NOT_DIGIT 1 #define SRE_CATEGORY_SPACE 2 @@ -74,6 +80,7 @@ #define SRE_CATEGORY_UNI_NOT_WORD 15 #define SRE_CATEGORY_UNI_LINEBREAK 16 #define SRE_CATEGORY_UNI_NOT_LINEBREAK 17 +#define SRE_CATEGORY__COUNT 18 #define SRE_FLAG_TEMPLATE 1 #define SRE_FLAG_IGNORECASE 2 #define SRE_FLAG_LOCALE 4 Index: Modules/_sre.c =================================================================== --- Modules/_sre.c (revision 62337) +++ Modules/_sre.c (working copy) @@ -95,12 +95,19 @@ #endif /* error codes */ + #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ #define SRE_ERROR_STATE -2 /* illegal state */ #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ #define SRE_ERROR_MEMORY -9 /* out of memory */ #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ +static const Py_ssize_t SRE_MATCH_PASS = 0; /* Success */ +static const Py_ssize_t SRE_MATCH_FAIL = (Py_ssize_t)-1; /* Failure */ + +static const Py_ssize_t SRE_MATCH_GOOD = (Py_ssize_t)-1; /* Continue */ +static const Py_ssize_t SRE_MATCH_EXIT = 0; /* Abort / Exit */ + #if defined(VERBOSE) #define TRACE(v) printf v #else @@ -112,18 +119,18 @@ /* default character predicates (run sre_chars.py to regenerate tables) */ -#define SRE_DIGIT_MASK 1 -#define SRE_SPACE_MASK 2 -#define SRE_LINEBREAK_MASK 4 -#define SRE_ALNUM_MASK 8 -#define SRE_WORD_MASK 16 +static const unsigned char SRE_DIGIT_MASK = 1; +static const unsigned char SRE_SPACE_MASK = 2; +static const unsigned char SRE_LINEBREAK_MASK = 4; +static const unsigned char SRE_ALNUM_MASK = 8; +static const unsigned char SRE_WORD_MASK = 16; /* FIXME: this assumes ASCII. create tables in init_sre() instead */ -static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, -2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, -25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +static unsigned char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, +2, 6, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 }; @@ -138,117 +145,207 @@ 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127 }; -#define SRE_IS_DIGIT(ch)\ - ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0) -#define SRE_IS_SPACE(ch)\ - ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0) -#define SRE_IS_LINEBREAK(ch)\ - ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0) -#define SRE_IS_ALNUM(ch)\ - ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0) -#define SRE_IS_WORD(ch)\ - ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0) - static unsigned int sre_lower(unsigned int ch) { return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch); } +static unsigned int sre_lower_locale(unsigned int ch) +{ + return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); +} + +#if defined(HAVE_UNICODE) +/* unicode-specific character predicates */ +static unsigned int sre_lower_unicode(unsigned int ch) +{ + return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch)); +} + +#endif /* defined(HAVE_UNICODE) */ +Py_LOCAL_INLINE(int) +sre_category_is_digit(unsigned int ch) +{ + if (ch < 128) { + return sre_char_info[ch] & SRE_DIGIT_MASK; + } + else { + return 0; + } +} + +Py_LOCAL_INLINE(int) +sre_category_is_not_digit(unsigned int ch) +{ + return !sre_category_is_digit(ch); +} + +Py_LOCAL_INLINE(int) +sre_category_is_space(unsigned int ch) +{ + if (ch < 128) { + return sre_char_info[ch] & SRE_SPACE_MASK; + } + else { + return 0; + } +} + +Py_LOCAL_INLINE(int) +sre_category_is_not_space(unsigned int ch) +{ + return !sre_category_is_space(ch); +} + +Py_LOCAL_INLINE(int) +sre_category_is_word(unsigned int ch) +{ + if (ch < 128) { + return sre_char_info[ch] & SRE_WORD_MASK; + } + else { + return 0; + } +} + +Py_LOCAL_INLINE(int) +sre_category_is_not_word(unsigned int ch) +{ + return !sre_category_is_word(ch); +} + +Py_LOCAL_INLINE(int) +sre_category_is_linebreak(unsigned int ch) +{ + if (ch < 128) { + return sre_char_info[ch] & SRE_LINEBREAK_MASK; + } + else { + return 0; + } +} + +Py_LOCAL_INLINE(int) +sre_category_is_not_linebreak(unsigned int ch) +{ + return !sre_category_is_linebreak(ch); +} + /* locale-specific character predicates */ /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids * warnings when c's type supports only numbers < N+1 */ -#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0) -#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0) -#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n') -#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) -#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') +Py_LOCAL_INLINE(int) +sre_category_is_loc_word(unsigned int ch) +{ + /* Extended Localized characters must be 8-bit */ + if (!(ch & ~255)) { + return isalnum(ch) || ch == '_'; + } + else { + return ch == '_'; + } +} -static unsigned int sre_lower_locale(unsigned int ch) +Py_LOCAL_INLINE(int) +sre_category_is_not_loc_word(unsigned int ch) { - return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); + return !sre_category_is_loc_word(ch); } -/* unicode-specific character predicates */ - #if defined(HAVE_UNICODE) +Py_LOCAL_INLINE(int) +sre_category_unicode_is_digit(unsigned int ch) +{ + return Py_UNICODE_ISDIGIT((Py_UNICODE)ch); +} -#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch)) -#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch)) -#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch)) -#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch)) -#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_') +Py_LOCAL_INLINE(int) +sre_category_unicode_is_not_digit(unsigned int ch) +{ + return !sre_category_unicode_is_digit(ch); +} -static unsigned int sre_lower_unicode(unsigned int ch) +Py_LOCAL_INLINE(int) +sre_category_unicode_is_space(unsigned int ch) { - return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch)); + return Py_UNICODE_ISSPACE((Py_UNICODE)ch); } -#endif +Py_LOCAL_INLINE(int) +sre_category_unicode_is_not_space(unsigned int ch) +{ + return !sre_category_unicode_is_space(ch); +} -LOCAL(int) -sre_category(SRE_CODE category, unsigned int ch) +Py_LOCAL_INLINE(int) +sre_category_unicode_is_word(unsigned int ch) { - switch (category) { + return Py_UNICODE_ISALNUM((Py_UNICODE)ch) || ch == '_'; +} - case SRE_CATEGORY_DIGIT: - return SRE_IS_DIGIT(ch); - case SRE_CATEGORY_NOT_DIGIT: - return !SRE_IS_DIGIT(ch); - case SRE_CATEGORY_SPACE: - return SRE_IS_SPACE(ch); - case SRE_CATEGORY_NOT_SPACE: - return !SRE_IS_SPACE(ch); - case SRE_CATEGORY_WORD: - return SRE_IS_WORD(ch); - case SRE_CATEGORY_NOT_WORD: - return !SRE_IS_WORD(ch); - case SRE_CATEGORY_LINEBREAK: - return SRE_IS_LINEBREAK(ch); - case SRE_CATEGORY_NOT_LINEBREAK: - return !SRE_IS_LINEBREAK(ch); +Py_LOCAL_INLINE(int) +sre_category_unicode_is_not_word(unsigned int ch) +{ + return !sre_category_unicode_is_word(ch); +} - case SRE_CATEGORY_LOC_WORD: - return SRE_LOC_IS_WORD(ch); - case SRE_CATEGORY_LOC_NOT_WORD: - return !SRE_LOC_IS_WORD(ch); +Py_LOCAL_INLINE(int) +sre_category_unicode_is_linebreak(unsigned int ch) +{ + return Py_UNICODE_ISLINEBREAK((Py_UNICODE)ch); +} +Py_LOCAL_INLINE(int) +sre_category_unicode_is_not_linebreak(unsigned int ch) +{ + return !sre_category_unicode_is_linebreak(ch); +} + +#endif /* defined(HAVE_UNICODE) */ +int (*sre_category_lookup_table[SRE_CATEGORY__COUNT]) (unsigned int) = { + sre_category_is_digit, /* SRE_CATEGORY_DIGIT */ + sre_category_is_not_digit, /* SRE_CATEGORY_NOT_DIGIT */ + sre_category_is_space, /* SRE_CATEGORY_SPACE */ + sre_category_is_not_space, /* SRE_CATEGORY_NOT_SPACE */ + sre_category_is_word, /* SRE_CATEGORY_WORD */ + sre_category_is_not_word, /* SRE_CATEGORY_NOT_WORD */ + sre_category_is_linebreak, /* SRE_CATEGORY_LINEBREAK */ + sre_category_is_not_linebreak, /* SRE_CATEGORY_NOT_LINEBREAK */ + sre_category_is_loc_word, /* SRE_CATEGORY_LOC_WORD */ + sre_category_is_not_loc_word, /* SRE_CATEGORY_LOC_NOT_WORD */ #if defined(HAVE_UNICODE) - case SRE_CATEGORY_UNI_DIGIT: - return SRE_UNI_IS_DIGIT(ch); - case SRE_CATEGORY_UNI_NOT_DIGIT: - return !SRE_UNI_IS_DIGIT(ch); - case SRE_CATEGORY_UNI_SPACE: - return SRE_UNI_IS_SPACE(ch); - case SRE_CATEGORY_UNI_NOT_SPACE: - return !SRE_UNI_IS_SPACE(ch); - case SRE_CATEGORY_UNI_WORD: - return SRE_UNI_IS_WORD(ch); - case SRE_CATEGORY_UNI_NOT_WORD: - return !SRE_UNI_IS_WORD(ch); - case SRE_CATEGORY_UNI_LINEBREAK: - return SRE_UNI_IS_LINEBREAK(ch); - case SRE_CATEGORY_UNI_NOT_LINEBREAK: - return !SRE_UNI_IS_LINEBREAK(ch); -#else - case SRE_CATEGORY_UNI_DIGIT: - return SRE_IS_DIGIT(ch); - case SRE_CATEGORY_UNI_NOT_DIGIT: - return !SRE_IS_DIGIT(ch); - case SRE_CATEGORY_UNI_SPACE: - return SRE_IS_SPACE(ch); - case SRE_CATEGORY_UNI_NOT_SPACE: - return !SRE_IS_SPACE(ch); - case SRE_CATEGORY_UNI_WORD: - return SRE_LOC_IS_WORD(ch); - case SRE_CATEGORY_UNI_NOT_WORD: - return !SRE_LOC_IS_WORD(ch); - case SRE_CATEGORY_UNI_LINEBREAK: - return SRE_IS_LINEBREAK(ch); - case SRE_CATEGORY_UNI_NOT_LINEBREAK: - return !SRE_IS_LINEBREAK(ch); -#endif + sre_category_unicode_is_digit, /* SRE_CATEGORY_UNI_DIGIT */ + sre_category_unicode_is_not_digit, /* SRE_CATEGORY_UNI_NOT_DIGIT */ + sre_category_unicode_is_space, /* SRE_CATEGORY_UNI_SPACE */ + sre_category_unicode_is_not_space, /* SRE_CATEGORY_UNI_NOT_SPACE */ + sre_category_unicode_is_word, /* SRE_CATEGORY_UNI_WORD */ + sre_category_unicode_is_not_word, /* SRE_CATEGORY_UNI_NOT_WORD */ + sre_category_unicode_is_linebreak, /* SRE_CATEGORY_UNI_LINEBREAK */ + /* SRE_CATEGORY_UNI_NOT_LINEBREAK */ + sre_category_unicode_is_not_linebreak +#else /* defined(HAVE_UNICODE) */ + sre_category_is_digit, /* SRE_CATEGORY_UNI_DIGIT */ + sre_category_is_not_digit, /* SRE_CATEGORY_UNI_NOT_DIGIT */ + sre_category_is_space, /* SRE_CATEGORY_UNI_SPACE */ + sre_category_is_not_space, /* SRE_CATEGORY_UNI_NOT_SPACE */ + sre_category_is_loc_word, /* SRE_CATEGORY_UNI_WORD */ + sre_category_is_not_loc_word, /* SRE_CATEGORY_UNI_NOT_WORD */ + sre_category_is_linebreak, /* SRE_CATEGORY_UNI_LINEBREAK */ + sre_category_is_not_linebreak /* SRE_CATEGORY_UNI_NOT_LINEBREAK */ +#endif /* defined(HAVE_UNICODE) */ +}; + +Py_LOCAL_INLINE(int) +sre_category(SRE_CODE category, unsigned int ch) +{ + unsigned int code = (unsigned int)category; + + if (code < SRE_CATEGORY__COUNT) { + return sre_category_lookup_table[code](ch); } - return 0; + else { + return 0; + } } /* helpers */ @@ -296,6 +393,98 @@ #define SRE_SEARCH sre_search #define SRE_LITERAL_TEMPLATE sre_literal_template +#define SRE_AT_LOOKUP_TABLE sre_at_lookup_table +#define SRE_IS_AT_BEGINNING_LINE sre_is_at_beginning_line +#define SRE_IS_AT_BEGINNING_STRING sre_is_at_beginning_string +#define SRE_IS_AT_BOUNDARY sre_is_at_boundary +#define SRE_IS_AT_NON_BOUNDARY sre_is_at_non_boundary +#define SRE_IS_AT_END sre_is_at_end +#define SRE_IS_AT_END_LINE sre_is_at_end_line +#define SRE_IS_AT_END_STRING sre_is_at_end_string +#define SRE_IS_AT_LOC_BOUNDARY sre_is_at_loc_boundary +#define SRE_IS_AT_LOC_NOT_BOUNDARY sre_is_at_loc_not_boundary +#define SRE_IS_AT_UNI_BOUNDARY sre_is_at_uni_boundary +#define SRE_IS_AT_UNI_NON_BOUNDARY sre_is_at_uni_non_boundary +#define SRE_IS_AT_UNI_FAIL sre_is_at_uni_fail +#define SRE_MATCH_GLOBAL_CONTEXT sre_match_global_context +#define SRE_MATCH_LASTMARK_SAVE sre_match_lastmark_save +#define SRE_MATCH_LASTMARK_RESTORE sre_match_lastmark_restore +#define SRE_MATCH_FORCE_EXIT sre_match_force_exit +#define SRE_MATCH_RETURN_ERROR sre_match_return_error +#define SRE_MATCH_RETURN_SUCCESS sre_match_return_success +#define SRE_MATCH_RETURN_FAILURE sre_match_return_failure +#define SRE_MATCH_RETURN_ON_ERROR sre_match_return_on_error +#define SRE_MATCH_RETURN_ON_SUCCESS sre_match_return_on_success +#define SRE_MATCH_RETURN_ON_FAILURE sre_match_return_on_failure +#define SRE_MATCH_DATA_STACK_LOOKUP_AT sre_match_data_stack_lookup_at +#define SRE_MATCH_DATA_STACK_ALLOC sre_match_data_stack_alloc +#define SRE_MATCH_DATA_STACK_PUSH sre_match_data_stack_push +#define SRE_MATCH_DATA_STACK_POP sre_match_data_stack_pop +#define SRE_MATCH_DATA_STACK_POP_DISCARD sre_match_data_stack_pop_discard +#define SRE_MATCH_DATA_PUSH sre_match_data_push +#define SRE_MATCH_DATA_POP sre_match_data_pop +#define SRE_MATCH_DATA_POP_DISCARD sre_match_data_pop_discard +#define SRE_MATCH_DATA_ALLOC sre_match_data_alloc +#define SRE_MATCH_DATA_LOOKUP_AT sre_match_data_lookup_at +#define SRE_MATCH_MARK_PUSH sre_match_mark_push +#define SRE_MATCH_MARK_POP sre_match_mark_pop +#define SRE_MATCH_MARK_POP_KEEP sre_match_mark_pop_keep +#define SRE_MATCH_MARK_POP_DISCARD sre_match_mark_pop_discard +#define SRE_MATCH_PARSE_HEADER sre_match_parse_header +#define SRE_MATCH_ON_FAILURE sre_match_on_failure +#define SRE_MATCH_ON_SUCCESS sre_match_on_success +#define SRE_MATCH_ON_ANY sre_match_on_any +#define SRE_MATCH_ON_ANY_ALL sre_match_on_any_all +#define SRE_MATCH_ON_ASSERT sre_match_on_assert +#define SRE_MATCH_ON_ASSERT_NOT sre_match_on_assert_not +#define SRE_MATCH_ON_AT sre_match_on_at +#define SRE_MATCH_ON_BRANCH sre_match_on_branch +#define SRE_MATCH_ON_CALL sre_match_on_call +#define SRE_MATCH_ON_CATEGORY sre_match_on_category +#define SRE_MATCH_ON_CHARSET sre_match_on_charset +#define SRE_MATCH_ON_BIGCHARSET sre_match_on_bigcharset +#define SRE_MATCH_ON_GROUPREF sre_match_on_groupref +#define SRE_MATCH_ON_GROUPREF_EXISTS sre_match_on_groupref_exists +#define SRE_MATCH_ON_GROUPREF_IGNORE sre_match_on_groupref_ignore +#define SRE_MATCH_ON_IN sre_match_on_in +#define SRE_MATCH_ON_IN_IGNORE sre_match_on_in_ignore +#define SRE_MATCH_ON_JUMP sre_match_on_jump +#define SRE_MATCH_ON_LITERAL sre_match_on_literal +#define SRE_MATCH_ON_LITERAL_IGNORE sre_match_on_literal_ignore +#define SRE_MATCH_ON_MARK sre_match_on_mark +#define SRE_MATCH_ON_MAX_UNTIL sre_match_on_max_until +#define SRE_MATCH_ON_MIN_UNTIL sre_match_on_min_until +#define SRE_MATCH_ON_NOT_LITERAL sre_match_on_not_literal +#define SRE_MATCH_ON_NOT_LITERAL_IGNORE sre_match_on_not_literal_ignore +#define SRE_MATCH_ON_NEGATE sre_match_on_negate +#define SRE_MATCH_ON_RANGE sre_match_on_range +#define SRE_MATCH_ON_REPEAT sre_match_on_repeat +#define SRE_MATCH_ON_REPEAT_ONE sre_match_on_repeat_one +#define SRE_MATCH_ON_SUBPATTERN sre_match_on_subpattern +#define SRE_MATCH_ON_MIN_REPEAT_ONE sre_match_on_min_repeat_one +#define SRE_MATCH_ON_ATOMIC_START sre_match_on_atomic_start +#define SRE_MATCH_ON_ATOMIC_END sre_match_on_atomic_end +#define SRE_MATCH_ON_POSSESSIVE sre_match_on_possessive +#define SRE_MATCH_ON_DEFAULT sre_match_on_default +#define SRE_MATCH_ON_JUMP_NONE sre_match_on_jump_none +#define SRE_MATCH_ON_JUMP_MAX_UNTIL_1 sre_match_on_jump_max_until_1 +#define SRE_MATCH_ON_JUMP_MAX_UNTIL_2 sre_match_on_jump_max_until_2 +#define SRE_MATCH_ON_JUMP_MAX_UNTIL_3 sre_match_on_jump_max_until_3 +#define SRE_MATCH_ON_JUMP_MIN_UNTIL_1 sre_match_on_jump_min_until_1 +#define SRE_MATCH_ON_JUMP_MIN_UNTIL_2 sre_match_on_jump_min_until_2 +#define SRE_MATCH_ON_JUMP_MIN_UNTIL_3 sre_match_on_jump_min_until_3 +#define SRE_MATCH_ON_JUMP_REPEAT sre_match_on_jump_repeat +#define SRE_MATCH_ON_JUMP_REPEAT_ONE_1 sre_match_on_jump_repeat_one_1 +#define SRE_MATCH_ON_JUMP_REPEAT_ONE_2 sre_match_on_jump_repeat_one_2 +#define SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE sre_match_on_jump_min_repeat_one +#define SRE_MATCH_ON_JUMP_BRANCH sre_match_on_jump_branch +#define SRE_MATCH_ON_JUMP_ASSERT sre_match_on_jump_assert +#define SRE_MATCH_ON_JUMP_ASSERT_NOT sre_match_on_jump_assert_not +#define SRE_MATCH_LOOKUP_TABLE sre_match_lookup_table +#define SRE_MATCH_UNWIND_LOOKUP_TABLE sre_match_unwind_lookup_table +#define SRE_MATCH_ON_PUSH_CTX_AND_POSITION sre_match_on_push_ctx_and_position +#define SRE_MATCH_ON_POP_CTX_AND_POSITION sre_match_on_pop_ctx_and_position + #if defined(HAVE_UNICODE) #define SRE_RECURSIVE @@ -312,6 +501,98 @@ #undef SRE_AT #undef SRE_CHAR +#undef SRE_AT_LOOKUP_TABLE +#undef SRE_IS_AT_BEGINNING_LINE +#undef SRE_IS_AT_BEGINNING_STRING +#undef SRE_IS_AT_BOUNDARY +#undef SRE_IS_AT_NON_BOUNDARY +#undef SRE_IS_AT_END +#undef SRE_IS_AT_END_LINE +#undef SRE_IS_AT_END_STRING +#undef SRE_IS_AT_LOC_BOUNDARY +#undef SRE_IS_AT_LOC_NOT_BOUNDARY +#undef SRE_IS_AT_UNI_BOUNDARY +#undef SRE_IS_AT_UNI_NON_BOUNDARY +#undef SRE_IS_AT_UNI_FAIL +#undef SRE_MATCH_GLOBAL_CONTEXT +#undef SRE_MATCH_LASTMARK_SAVE +#undef SRE_MATCH_LASTMARK_RESTORE +#undef SRE_MATCH_FORCE_EXIT +#undef SRE_MATCH_RETURN_ERROR +#undef SRE_MATCH_RETURN_SUCCESS +#undef SRE_MATCH_RETURN_FAILURE +#undef SRE_MATCH_RETURN_ON_ERROR +#undef SRE_MATCH_RETURN_ON_SUCCESS +#undef SRE_MATCH_RETURN_ON_FAILURE +#undef SRE_MATCH_DATA_STACK_LOOKUP_AT +#undef SRE_MATCH_DATA_STACK_ALLOC +#undef SRE_MATCH_DATA_STACK_PUSH +#undef SRE_MATCH_DATA_STACK_POP +#undef SRE_MATCH_DATA_STACK_POP_DISCARD +#undef SRE_MATCH_DATA_PUSH +#undef SRE_MATCH_DATA_POP +#undef SRE_MATCH_DATA_POP_DISCARD +#undef SRE_MATCH_DATA_ALLOC +#undef SRE_MATCH_DATA_LOOKUP_AT +#undef SRE_MATCH_MARK_PUSH +#undef SRE_MATCH_MARK_POP +#undef SRE_MATCH_MARK_POP_KEEP +#undef SRE_MATCH_MARK_POP_DISCARD +#undef SRE_MATCH_PARSE_HEADER +#undef SRE_MATCH_ON_FAILURE +#undef SRE_MATCH_ON_SUCCESS +#undef SRE_MATCH_ON_ANY +#undef SRE_MATCH_ON_ANY_ALL +#undef SRE_MATCH_ON_ASSERT +#undef SRE_MATCH_ON_ASSERT_NOT +#undef SRE_MATCH_ON_AT +#undef SRE_MATCH_ON_BRANCH +#undef SRE_MATCH_ON_CALL +#undef SRE_MATCH_ON_CATEGORY +#undef SRE_MATCH_ON_CHARSET +#undef SRE_MATCH_ON_BIGCHARSET +#undef SRE_MATCH_ON_GROUPREF +#undef SRE_MATCH_ON_GROUPREF_EXISTS +#undef SRE_MATCH_ON_GROUPREF_IGNORE +#undef SRE_MATCH_ON_IN +#undef SRE_MATCH_ON_IN_IGNORE +#undef SRE_MATCH_ON_JUMP +#undef SRE_MATCH_ON_LITERAL +#undef SRE_MATCH_ON_LITERAL_IGNORE +#undef SRE_MATCH_ON_MARK +#undef SRE_MATCH_ON_MAX_UNTIL +#undef SRE_MATCH_ON_MIN_UNTIL +#undef SRE_MATCH_ON_NOT_LITERAL +#undef SRE_MATCH_ON_NOT_LITERAL_IGNORE +#undef SRE_MATCH_ON_NEGATE +#undef SRE_MATCH_ON_RANGE +#undef SRE_MATCH_ON_REPEAT +#undef SRE_MATCH_ON_REPEAT_ONE +#undef SRE_MATCH_ON_SUBPATTERN +#undef SRE_MATCH_ON_MIN_REPEAT_ONE +#undef SRE_MATCH_ON_ATOMIC_START +#undef SRE_MATCH_ON_ATOMIC_END +#undef SRE_MATCH_ON_POSSESSIVE +#undef SRE_MATCH_ON_DEFAULT +#undef SRE_MATCH_ON_JUMP_NONE +#undef SRE_MATCH_ON_JUMP_MAX_UNTIL_1 +#undef SRE_MATCH_ON_JUMP_MAX_UNTIL_2 +#undef SRE_MATCH_ON_JUMP_MAX_UNTIL_3 +#undef SRE_MATCH_ON_JUMP_MIN_UNTIL_1 +#undef SRE_MATCH_ON_JUMP_MIN_UNTIL_2 +#undef SRE_MATCH_ON_JUMP_MIN_UNTIL_3 +#undef SRE_MATCH_ON_JUMP_REPEAT +#undef SRE_MATCH_ON_JUMP_REPEAT_ONE_1 +#undef SRE_MATCH_ON_JUMP_REPEAT_ONE_2 +#undef SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE +#undef SRE_MATCH_ON_JUMP_BRANCH +#undef SRE_MATCH_ON_JUMP_ASSERT +#undef SRE_MATCH_ON_JUMP_ASSERT_NOT +#undef SRE_MATCH_LOOKUP_TABLE +#undef SRE_MATCH_UNWIND_LOOKUP_TABLE +#undef SRE_MATCH_ON_PUSH_CTX_AND_POSITION +#undef SRE_MATCH_ON_POP_CTX_AND_POSITION + /* generate 16-bit unicode version */ #define SRE_CHAR Py_UNICODE @@ -323,6 +604,98 @@ #define SRE_MATCH_CONTEXT sre_umatch_context #define SRE_SEARCH sre_usearch #define SRE_LITERAL_TEMPLATE sre_uliteral_template + +#define SRE_AT_LOOKUP_TABLE sre_uat_lookup_table +#define SRE_IS_AT_BEGINNING_LINE sre_uis_at_beginning_line +#define SRE_IS_AT_BEGINNING_STRING sre_uis_at_beginning_string +#define SRE_IS_AT_BOUNDARY sre_uis_at_boundary +#define SRE_IS_AT_NON_BOUNDARY sre_uis_at_non_boundary +#define SRE_IS_AT_END sre_uis_at_end +#define SRE_IS_AT_END_LINE sre_uis_at_end_line +#define SRE_IS_AT_END_STRING sre_uis_at_end_string +#define SRE_IS_AT_LOC_BOUNDARY sre_uis_at_loc_boundary +#define SRE_IS_AT_LOC_NOT_BOUNDARY sre_uis_at_loc_not_boundary +#define SRE_IS_AT_UNI_BOUNDARY sre_uis_at_uni_boundary +#define SRE_IS_AT_UNI_NON_BOUNDARY sre_uis_at_uni_non_boundary +#define SRE_IS_AT_UNI_FAIL sre_uis_at_uni_fail +#define SRE_MATCH_GLOBAL_CONTEXT sre_umatch_global_context +#define SRE_MATCH_LASTMARK_SAVE sre_umatch_lastmark_save +#define SRE_MATCH_LASTMARK_RESTORE sre_umatch_lastmark_restore +#define SRE_MATCH_FORCE_EXIT sre_umatch_force_exit +#define SRE_MATCH_RETURN_ERROR sre_umatch_return_error +#define SRE_MATCH_RETURN_SUCCESS sre_umatch_return_success +#define SRE_MATCH_RETURN_FAILURE sre_umatch_return_failure +#define SRE_MATCH_RETURN_ON_ERROR sre_umatch_return_on_error +#define SRE_MATCH_RETURN_ON_SUCCESS sre_umatch_return_on_success +#define SRE_MATCH_RETURN_ON_FAILURE sre_umatch_return_on_failure +#define SRE_MATCH_DATA_STACK_LOOKUP_AT sre_umatch_data_stack_lookup_at +#define SRE_MATCH_DATA_STACK_ALLOC sre_umatch_data_stack_alloc +#define SRE_MATCH_DATA_STACK_PUSH sre_umatch_data_stack_push +#define SRE_MATCH_DATA_STACK_POP sre_umatch_data_stack_pop +#define SRE_MATCH_DATA_STACK_POP_DISCARD sre_umatch_data_stack_pop_discard +#define SRE_MATCH_DATA_PUSH sre_umatch_data_push +#define SRE_MATCH_DATA_POP sre_umatch_data_pop +#define SRE_MATCH_DATA_POP_DISCARD sre_umatch_data_pop_discard +#define SRE_MATCH_DATA_ALLOC sre_umatch_data_alloc +#define SRE_MATCH_DATA_LOOKUP_AT sre_umatch_data_lookup_at +#define SRE_MATCH_MARK_PUSH sre_umatch_mark_push +#define SRE_MATCH_MARK_POP sre_umatch_mark_pop +#define SRE_MATCH_MARK_POP_KEEP sre_umatch_mark_pop_keep +#define SRE_MATCH_MARK_POP_DISCARD sre_umatch_mark_pop_discard +#define SRE_MATCH_PARSE_HEADER sre_umatch_parse_header +#define SRE_MATCH_ON_FAILURE sre_umatch_on_failure +#define SRE_MATCH_ON_SUCCESS sre_umatch_on_success +#define SRE_MATCH_ON_ANY sre_umatch_on_any +#define SRE_MATCH_ON_ANY_ALL sre_umatch_on_any_all +#define SRE_MATCH_ON_ASSERT sre_umatch_on_assert +#define SRE_MATCH_ON_ASSERT_NOT sre_umatch_on_assert_not +#define SRE_MATCH_ON_AT sre_umatch_on_at +#define SRE_MATCH_ON_BRANCH sre_umatch_on_branch +#define SRE_MATCH_ON_CALL sre_umatch_on_call +#define SRE_MATCH_ON_CATEGORY sre_umatch_on_category +#define SRE_MATCH_ON_CHARSET sre_umatch_on_charset +#define SRE_MATCH_ON_BIGCHARSET sre_umatch_on_bigcharset +#define SRE_MATCH_ON_GROUPREF sre_umatch_on_groupref +#define SRE_MATCH_ON_GROUPREF_EXISTS sre_umatch_on_groupref_exists +#define SRE_MATCH_ON_GROUPREF_IGNORE sre_umatch_on_groupref_ignore +#define SRE_MATCH_ON_IN sre_umatch_on_in +#define SRE_MATCH_ON_IN_IGNORE sre_umatch_on_in_ignore +#define SRE_MATCH_ON_JUMP sre_umatch_on_jump +#define SRE_MATCH_ON_LITERAL sre_umatch_on_literal +#define SRE_MATCH_ON_LITERAL_IGNORE sre_umatch_on_literal_ignore +#define SRE_MATCH_ON_MARK sre_umatch_on_mark +#define SRE_MATCH_ON_MAX_UNTIL sre_umatch_on_max_until +#define SRE_MATCH_ON_MIN_UNTIL sre_umatch_on_min_until +#define SRE_MATCH_ON_NOT_LITERAL sre_umatch_on_not_literal +#define SRE_MATCH_ON_NOT_LITERAL_IGNORE sre_umatch_on_not_literal_ignore +#define SRE_MATCH_ON_NEGATE sre_umatch_on_negate +#define SRE_MATCH_ON_RANGE sre_umatch_on_range +#define SRE_MATCH_ON_REPEAT sre_umatch_on_repeat +#define SRE_MATCH_ON_REPEAT_ONE sre_umatch_on_repeat_one +#define SRE_MATCH_ON_SUBPATTERN sre_umatch_on_subpattern +#define SRE_MATCH_ON_MIN_REPEAT_ONE sre_umatch_on_min_repeat_one +#define SRE_MATCH_ON_ATOMIC_START sre_umatch_on_atomic_start +#define SRE_MATCH_ON_ATOMIC_END sre_umatch_on_atomic_end +#define SRE_MATCH_ON_POSSESSIVE sre_umatch_on_possessive +#define SRE_MATCH_ON_DEFAULT sre_umatch_on_default +#define SRE_MATCH_ON_JUMP_NONE sre_umatch_on_jump_none +#define SRE_MATCH_ON_JUMP_MAX_UNTIL_1 sre_umatch_on_jump_max_until_1 +#define SRE_MATCH_ON_JUMP_MAX_UNTIL_2 sre_umatch_on_jump_max_until_2 +#define SRE_MATCH_ON_JUMP_MAX_UNTIL_3 sre_umatch_on_jump_max_until_3 +#define SRE_MATCH_ON_JUMP_MIN_UNTIL_1 sre_umatch_on_jump_min_until_1 +#define SRE_MATCH_ON_JUMP_MIN_UNTIL_2 sre_umatch_on_jump_min_until_2 +#define SRE_MATCH_ON_JUMP_MIN_UNTIL_3 sre_umatch_on_jump_min_until_3 +#define SRE_MATCH_ON_JUMP_REPEAT sre_umatch_on_jump_repeat +#define SRE_MATCH_ON_JUMP_REPEAT_ONE_1 sre_umatch_on_jump_repeat_one_1 +#define SRE_MATCH_ON_JUMP_REPEAT_ONE_2 sre_umatch_on_jump_repeat_one_2 +#define SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE sre_umatch_on_jump_min_repeat_one +#define SRE_MATCH_ON_JUMP_BRANCH sre_umatch_on_jump_branch +#define SRE_MATCH_ON_JUMP_ASSERT sre_umatch_on_jump_assert +#define SRE_MATCH_ON_JUMP_ASSERT_NOT sre_umatch_on_jump_assert_not +#define SRE_MATCH_LOOKUP_TABLE sre_umatch_lookup_table +#define SRE_MATCH_UNWIND_LOOKUP_TABLE sre_umatch_unwind_lookup_table +#define SRE_MATCH_ON_PUSH_CTX_AND_POSITION sre_umatch_on_push_ctx_and_position +#define SRE_MATCH_ON_POP_CTX_AND_POSITION sre_umatch_on_pop_ctx_and_position #endif #endif /* SRE_RECURSIVE */ @@ -333,96 +706,245 @@ /* the following section is compiled twice, with different character settings */ -LOCAL(int) -SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) +Py_LOCAL_INLINE(int) +SRE_IS_AT_BEGINNING_STRING(SRE_STATE *state, SRE_CHAR *ptr) { - /* check if pointer is at given position */ + return (void *)ptr == state->beginning; +} - Py_ssize_t thisp, thatp; +Py_LOCAL_INLINE(int) +SRE_IS_AT_BEGINNING_LINE(SRE_STATE *state, SRE_CHAR *ptr) +{ + return (void *)ptr == state->beginning || + sre_category_is_linebreak((unsigned int)ptr[-1]); +} - switch (at) { +Py_LOCAL_INLINE(int) +SRE_IS_AT_BOUNDARY(SRE_STATE *state, SRE_CHAR *ptr) +{ + if (state->beginning == state->end) { + return 0; + } + else { + Py_ssize_t thisp, thatp; - case SRE_AT_BEGINNING: - case SRE_AT_BEGINNING_STRING: - return ((void*) ptr == state->beginning); + if ((void *)ptr > state->beginning) { + thatp = sre_category_is_word((unsigned int)ptr[-1]); + } + else { + thatp = 0; + } - case SRE_AT_BEGINNING_LINE: - return ((void*) ptr == state->beginning || - SRE_IS_LINEBREAK((int) ptr[-1])); + if ((void *)ptr < state->end) { + thisp = sre_category_is_word((unsigned int)ptr[0]); + } + else { + thisp = 0; + } - case SRE_AT_END: - return (((void*) (ptr+1) == state->end && - SRE_IS_LINEBREAK((int) ptr[0])) || - ((void*) ptr == state->end)); + return thisp != thatp; + } +} - case SRE_AT_END_LINE: - return ((void*) ptr == state->end || - SRE_IS_LINEBREAK((int) ptr[0])); +Py_LOCAL_INLINE(int) +SRE_IS_AT_NON_BOUNDARY(SRE_STATE *state, SRE_CHAR *ptr) +{ + if (state->beginning == state->end) { + return 0; + } + else { + Py_ssize_t thisp, thatp; - case SRE_AT_END_STRING: - return ((void*) ptr == state->end); + if ((void *)ptr > state->beginning) { + thatp = sre_category_is_word((unsigned int)ptr[-1]); + } + else { + thatp = 0; + } - case SRE_AT_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_IS_WORD((int) ptr[0]) : 0; - return thisp != thatp; + if ((void*) ptr < state->end) { + thisp = sre_category_is_word((unsigned int)ptr[0]); + } + else { + thisp = 0; + } - case SRE_AT_NON_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_IS_WORD((int) ptr[0]) : 0; return thisp == thatp; + } +} - case SRE_AT_LOC_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_LOC_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_LOC_IS_WORD((int) ptr[0]) : 0; +Py_LOCAL_INLINE(int) +SRE_IS_AT_END_STRING(SRE_STATE *state, SRE_CHAR *ptr) +{ + return (void *)ptr == state->end; +} + +Py_LOCAL_INLINE(int) +SRE_IS_AT_END(SRE_STATE *state, SRE_CHAR *ptr) +{ + return ((void *)(ptr + 1) == state->end && + sre_category_is_linebreak((unsigned int)ptr[0])) || + (void *)ptr == state->end; +} + +Py_LOCAL_INLINE(int) +SRE_IS_AT_END_LINE(SRE_STATE *state, SRE_CHAR *ptr) +{ + return (void *)ptr == state->end || + sre_category_is_linebreak((unsigned int)ptr[0]); +} + +Py_LOCAL_INLINE(int) +SRE_IS_AT_LOC_BOUNDARY(SRE_STATE *state, SRE_CHAR *ptr) +{ + if (state->beginning == state->end) { + return 0; + } + else { + Py_ssize_t thisp, thatp; + + if ((void *)ptr > state->beginning) { + thatp = sre_category_is_loc_word((unsigned int)ptr[-1]); + } + else { + thatp = 0; + } + + if ((void *)ptr < state->end) { + thisp = sre_category_is_loc_word((unsigned int)ptr[0]); + } + else { + thisp = 0; + } + return thisp != thatp; + } +} - case SRE_AT_LOC_NON_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_LOC_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_LOC_IS_WORD((int) ptr[0]) : 0; +Py_LOCAL_INLINE(int) +SRE_IS_AT_LOC_NOT_BOUNDARY(SRE_STATE *state, SRE_CHAR *ptr) +{ + if (state->beginning == state->end) { + return 0; + } + else { + Py_ssize_t thisp, thatp; + + if ((void *)ptr > state->beginning) { + thatp = sre_category_is_loc_word((unsigned int)ptr[-1]); + } + else { + thatp = 0; + } + + if ((void *)ptr < state->end) { + thisp = sre_category_is_loc_word((unsigned int)ptr[0]); + } + else { + thisp = 0; + } + return thisp == thatp; + } +} #if defined(HAVE_UNICODE) - case SRE_AT_UNI_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_UNI_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_UNI_IS_WORD((int) ptr[0]) : 0; +Py_LOCAL_INLINE(int) +SRE_IS_AT_UNI_BOUNDARY(SRE_STATE *state, SRE_CHAR *ptr) +{ + if (state->beginning == state->end) { + return 0; + } + else { + Py_ssize_t thisp, thatp; + + if ((void *)ptr > state->beginning) { + thatp = sre_category_unicode_is_word((unsigned int)ptr[-1]); + } + else { + thatp = 0; + } + + if ((void *)ptr < state->end) { + thisp = sre_category_unicode_is_word((unsigned int)ptr[0]); + } + else { + thisp = 0; + } + return thisp != thatp; + } +} - case SRE_AT_UNI_NON_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_UNI_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_UNI_IS_WORD((int) ptr[0]) : 0; +Py_LOCAL_INLINE(int) +SRE_IS_AT_UNI_NON_BOUNDARY(SRE_STATE *state, SRE_CHAR *ptr) +{ + if (state->beginning == state->end) { + return 0; + } + else { + Py_ssize_t thisp, thatp; + + if ((void *)ptr > state->beginning) { + thatp = sre_category_unicode_is_word((unsigned int)ptr[-1]); + } + else { + thatp = 0; + } + + if ((void *)ptr < state->end) { + thisp = sre_category_unicode_is_word((unsigned int)ptr[0]); + } + else { + thisp = 0; + } + return thisp == thatp; -#endif - } +} +#else /* defined(HAVE_UNICODE) */ +Py_LOCAL_INLINE(int) +SRE_IS_AT_UNI_FAIL(SRE_STATE *, SRE_CHAR *) +{ return 0; } +#endif /* defined(HAVE_UNICODE) */ + +int (*SRE_AT_LOOKUP_TABLE[SRE_AT__COUNT])(SRE_STATE *, SRE_CHAR *) = { + SRE_IS_AT_BEGINNING_STRING, /* SRE_AT_BEGINNING */ + SRE_IS_AT_BEGINNING_LINE, /* SRE_AT_BEGINNING_LINE */ + SRE_IS_AT_BEGINNING_STRING, /* SRE_AT_BEGINNING_STRING */ + SRE_IS_AT_BOUNDARY, /* SRE_AT_BOUNDARY */ + SRE_IS_AT_NON_BOUNDARY, /* SRE_AT_NON_BOUNDARY */ + SRE_IS_AT_END, /* SRE_AT_END */ + SRE_IS_AT_END_LINE, /* SRE_AT_END_LINE */ + SRE_IS_AT_END_STRING, /* SRE_AT_END_STRING */ + SRE_IS_AT_LOC_BOUNDARY, /* SRE_AT_LOC_BOUNDARY */ + SRE_IS_AT_LOC_NOT_BOUNDARY, /* SRE_AT_LOC_NON_BOUNDARY */ +#if defined(HAVE_UNICODE) + SRE_IS_AT_UNI_BOUNDARY, /* SRE_AT_UNI_BOUNDARY */ + SRE_IS_AT_UNI_NON_BOUNDARY /* SRE_AT_UNI_NON_BOUNDARY */ +#else /* defined(HAVE_UNICODE) */ + SRE_IS_AT_UNI_FAIL, /* SRE_AT_UNI_BOUNDARY */ + SRE_IS_AT_UNI_FAIL /* SRE_AT_UNI_NON_BOUNDARY */ +#endif /* defined(HAVE_UNICODE) */ +}; + +Py_LOCAL_INLINE(int) +SRE_AT(SRE_STATE *state, SRE_CHAR *ptr, SRE_CODE at) +{ + unsigned int code = (unsigned int)at; + + if (code < SRE_AT__COUNT) { + return SRE_AT_LOOKUP_TABLE[code](state, ptr); + } + else { + return 0; + } +} + LOCAL(int) SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) { @@ -524,7 +1046,7 @@ Py_ssize_t i; /* adjust end */ - if (maxcount < end - ptr && maxcount != 65535) + if (maxcount < end - ptr && maxcount != SRE_MAX_REPEAT) end = ptr + maxcount; switch (pattern[0]) { @@ -539,7 +1061,7 @@ case SRE_OP_ANY: /* repeated dot wildcard. */ TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); - while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) + while (ptr < end && !sre_category_is_linebreak(*ptr)) ptr++; break; @@ -657,6 +1179,7 @@ * * For more information, check the discussion at SF patch #712900. */ +#if !REMOVE_SRE_MATCH_MACROS #define LASTMARK_SAVE() \ do { \ ctx->lastmark = state->lastmark; \ @@ -761,6 +1284,7 @@ DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ } while (0) +#endif /* !REMOVE_SRE_MATCH_MACROS */ #define JUMP_NONE 0 #define JUMP_MAX_UNTIL_1 1 #define JUMP_MAX_UNTIL_2 2 @@ -775,7 +1299,9 @@ #define JUMP_BRANCH 11 #define JUMP_ASSERT 12 #define JUMP_ASSERT_NOT 13 +#define JUMP__COUNT 14 +#if !REMOVE_SRE_MATCH_MACROS #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \ nextctx->last_ctx_pos = ctx_pos; \ @@ -787,6 +1313,7 @@ jumplabel: \ while (0) /* gcc doesn't like labels at end of scopes */ \ +#endif /* !REMOVE_SRE_MATCH_MACROS */ typedef struct { Py_ssize_t last_ctx_pos; Py_ssize_t jump; @@ -803,6 +1330,7 @@ /* check if string matches the given pattern. returns <0 for error, 0 for failure, and 1 for success */ +#if !REMOVE_SRE_MATCH_MACROS LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) { @@ -810,7 +1338,7 @@ Py_ssize_t alloc_pos, ctx_pos = -1; Py_ssize_t i, ret = 0; Py_ssize_t jump; - unsigned int sigcount=0; + unsigned int sigcount=0; /* Iteration Counter; for signalling */ SRE_MATCH_CONTEXT* ctx; SRE_MATCH_CONTEXT* nextctx; @@ -919,7 +1447,7 @@ /* match anything (except a newline) */ /* */ TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) + if (ctx->ptr >= end || sre_category_is_linebreak(ctx->ptr[0])) RETURN_FAILURE; ctx->ptr++; break; @@ -1485,6 +2013,3003 @@ return ret; /* should never get here */ } +#else /* REMOVE_SRE_MATCH_MACROS */ +/* TODO: Look into making alloc_pos a local varaible as it should really be a return value for the ALLOC functions. */ +/* TODO: Look into removing op_code as it is only used when a bad op code is passed and only for debugging */ +typedef struct { + SRE_CHAR *end; /* Pointer to the end of the input state->ptr */ + Py_ssize_t alloc_pos; /* Offset in state->data_state of newly */ + /* allocated ctx */ + Py_ssize_t ctx_pos; /* Offset in state->data_stack of current ctx */ + Py_ssize_t ret; /* Return value */ + unsigned int jump; /* The current stack unwind position */ + SRE_MATCH_CONTEXT *ctx; /* Current Context */ +#if TRIPLE_LOOP + Py_ssize_t good; /* 0 means change stack level, 1 means continue */ + Py_ssize_t restart; /* 0 means exit stack level, 1 means enter */ +#endif /* TRIPLE_LOOP */ + unsigned int op_code; /* The current Op Code */ +} SRE_MATCH_GLOBAL_CONTEXT; + +#if !TRIPLE_LOOP +Py_LOCAL(Py_ssize_t) +SRE_MATCH_ON_PUSH_CTX_AND_POSITION(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state, Py_ssize_t jump, + SRE_CODE *pattern); +Py_LOCAL(Py_ssize_t) +SRE_MATCH_ON_POP_CTX_AND_POSITION(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state); + +#endif /* !TRIPLE_LOOP */ +Py_LOCAL_INLINE(void) +SRE_MATCH_LASTMARK_SAVE(SRE_MATCH_GLOBAL_CONTEXT *self, + const SRE_STATE *state) +{ + /* Save Mark State */ + self->ctx->lastmark = state->lastmark; + self->ctx->lastindex = state->lastindex; +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_LASTMARK_RESTORE(const SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* Restore Mark State */ + state->lastmark = self->ctx->lastmark; + state->lastindex = self->ctx->lastindex; +} + +/* Basic Return Functionality */ +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_RETURN_ERROR(SRE_MATCH_GLOBAL_CONTEXT *self, + Py_ssize_t ret) +{ + /* Set the return code to error /ret/ and force exit */ + /* TODO: when this is called, should return right away! */ + self->ret = ret; + return SRE_MATCH_FAIL; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_RETURN_FAILURE(SRE_MATCH_GLOBAL_CONTEXT *self) +{ + /* Set the return code to 0 (Failure) and pop stack */ + self->ret = 0; + +#if TRIPLE_LOOP + /* Set the loop veriables to exit two levels */ + /* goto exit */ + self->good = SRE_MATCH_EXIT; + self->restart = SRE_MATCH_EXIT; + + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + /* Return immediately with error and */ + /* Call the code to Pop a Stack Frame */ + return SRE_MATCH_FAIL; +#endif /* TRIPLE_LOOP */ +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_RETURN_SUCCESS(SRE_MATCH_GLOBAL_CONTEXT *self) +{ + /* Set the return code to 1 (Success) and pop stack */ + self->ret = 1; + +#if TRIPLE_LOOP + /* Set the loop veriables to exit two levels */ + /* goto exit */ + self->good = SRE_MATCH_EXIT; + self->restart = SRE_MATCH_EXIT; + + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + /* Return immediately with error and */ + /* Call the code to Pop a Stack Frame */ + return SRE_MATCH_FAIL; +#endif /* TRIPLE_LOOP */ +} + +/* More Return Functionality */ +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_RETURN_ON_ERROR(SRE_MATCH_GLOBAL_CONTEXT *self, + Py_ssize_t ret) +{ + /* IFF /ret/ is an error code, force exit */ + if (ret < 0) { + return SRE_MATCH_RETURN_ERROR(self, ret); + /* TODO: when this is called, should return right away! */ + } + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_RETURN_ON_SUCCESS(SRE_MATCH_GLOBAL_CONTEXT *self, + Py_ssize_t ret) +{ + /* IFF /ret/ is an error code, force exit; otherwise set the */ + /* return code to Success and force exit. */ + Py_ssize_t result = SRE_MATCH_RETURN_ON_ERROR(self, ret); + + if (result == SRE_MATCH_PASS && ret > 0) { + return SRE_MATCH_RETURN_SUCCESS(self); + } + + /* TODO: Return on error */ + return result; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_RETURN_ON_FAILURE(SRE_MATCH_GLOBAL_CONTEXT *self, + Py_ssize_t ret) +{ + /* IFF /ret/ is an error code, force exit; otherwise set the */ + /* return code to Success and force exit. */ + Py_ssize_t result = SRE_MATCH_RETURN_ON_ERROR(self, ret); + + /* TODO: Return on error */ + if (result == SRE_MATCH_PASS && ret == 0) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + return result; +} + +/* Stack Code */ +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_STACK_LOOKUP_AT(const SRE_STATE *state, + SRE_MATCH_CONTEXT **ptr, Py_ssize_t pos); + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_DATA_STACK_ALLOC(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state, SRE_MATCH_CONTEXT **ptr) +{ + self->alloc_pos = state->data_stack_base; + + TRACE(("allocating %s in %d (%d)\n", "SRE_MATCH_CONTEXT", + self->alloc_pos, sizeof(**ptr))); + + if (state->data_stack_size < self->alloc_pos + sizeof(**ptr)) { + int j = data_stack_grow(state, sizeof(**ptr)); + + if (j < 0) { + return SRE_MATCH_RETURN_ERROR(self, j); + /* TODO: when this is called, should return right away! */ + } + + else if (self->ctx_pos != -1) { + SRE_MATCH_DATA_STACK_LOOKUP_AT(state, &self->ctx, + self->ctx_pos); + } + } + + *ptr = (SRE_MATCH_CONTEXT *)(state->data_stack + self->alloc_pos); + state->data_stack_base += sizeof(**ptr); + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_STACK_LOOKUP_AT(const SRE_STATE *state, + SRE_MATCH_CONTEXT **ptr, Py_ssize_t pos) +{ + TRACE(("looking up %s at %d\n", "SRE_MATCH_CONTEXT", pos)); + *ptr = (SRE_MATCH_CONTEXT *)(state->data_stack + pos); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_DATA_STACK_PUSH(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state, void **data, + Py_ssize_t size) +{ + TRACE(("copy data in %p to %d (%d)\n", data, state->data_stack_base, + size)); + + if (state->data_stack_size < state->data_stack_base + size) { + int j = data_stack_grow(state, size); + + if (j < 0) { + return SRE_MATCH_RETURN_ERROR(self, j); + /* TODO: when this is called, should return right away! */ + } + + else if (self->ctx_pos != -1) { + SRE_MATCH_DATA_STACK_LOOKUP_AT(state, &self->ctx, + self->ctx_pos); + } + } + + memcpy(state->data_stack + state->data_stack_base, data, size); + state->data_stack_base += size; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_STACK_POP(SRE_STATE *state, void **data, + Py_ssize_t size, Py_ssize_t discard) +{ + TRACE(("copy data to %p from %d (%d)\n", data, + state->data_stack_base - size, size)); + + memcpy(data, state->data_stack + state->data_stack_base - size, + size); + + if (discard) { + state->data_stack_base -= size; + } +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_STACK_POP_DISCARD(SRE_STATE *state, Py_ssize_t size) +{ + TRACE(("discard data from %d (%d)\n", state->data_stack_base - size, + size)); + state->data_stack_base -= size; +} + +/* Stack Function Wrappers */ +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_DATA_PUSH(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state, + void **x) +{ + return SRE_MATCH_DATA_STACK_PUSH(self, state, x, sizeof(*x)); +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_POP(SRE_STATE *state, void **x) +{ + SRE_MATCH_DATA_STACK_POP(state, x, sizeof(*x), 1); +} + +/* Discard is specific for Contexts */ +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_POP_DISCARD(SRE_STATE *state) +{ + SRE_MATCH_DATA_STACK_POP_DISCARD(state, sizeof(SRE_MATCH_CONTEXT)); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_DATA_ALLOC(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state, + SRE_MATCH_CONTEXT **p) +{ + return SRE_MATCH_DATA_STACK_ALLOC(self, state, p); +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_DATA_LOOKUP_AT(const SRE_STATE *state, + SRE_MATCH_CONTEXT **p, Py_ssize_t pos) +{ + SRE_MATCH_DATA_STACK_LOOKUP_AT(state, p, pos); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_MARK_PUSH(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state, + Py_ssize_t lastmark) +{ + if (lastmark > 0) { + int i = lastmark; /* ctx->lastmark may change if reallocated */ + + return SRE_MATCH_DATA_STACK_PUSH(self, state, state->mark, + (i + 1) * sizeof(void *)); + } + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_MARK_POP(SRE_STATE *state, Py_ssize_t lastmark) +{ + if (lastmark > 0) { + SRE_MATCH_DATA_STACK_POP(state, state->mark, + (lastmark + 1) * sizeof(void *), 1); + } +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_MARK_POP_KEEP(SRE_STATE *state, Py_ssize_t lastmark) +{ + if (lastmark > 0) { + SRE_MATCH_DATA_STACK_POP(state, state->mark, + (lastmark + 1) * sizeof(void *), 0); + } +} + +Py_LOCAL_INLINE(void) +SRE_MATCH_MARK_POP_DISCARD(SRE_STATE *state, Py_ssize_t lastmark) +{ + if (lastmark > 0) { + SRE_MATCH_DATA_STACK_POP_DISCARD(state, (lastmark + 1) * + sizeof(void *)); + } +} + +#if INCLUDE_SRE_MATCH_PARSE_HEADER +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_PARSE_HEADER(SRE_MATCH_GLOBAL_CONTEXT *self, + const SRE_STATE *state) +{ + /* TODO: Clean Up */ + self->good = 1; + self->ctx->ptr = (SRE_CHAR *)state->ptr; + + if (self->ctx->pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> ... */ + /* Info gives the minimum charaters needt to match in *OP + 3 */ + /* So if the input string is less than that, fail. */ + if (self->ctx->pattern[3] && (self->end - self->ctx->ptr) < self->ctx->pattern[3]) { + TRACE(("reject (got %d chars, need %d)\n", + (self->end - self->ctx->ptr), self->ctx->pattern[3])); + return SRE_MATCH_RETURN_FAILURE(self); + } + + self->ctx->pattern += self->ctx->pattern[1] + 1; + } + + return SRE_MATCH_PASS; +} + +#endif /* SRE_MATCH_PARSE_HEADER */ +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_FAILURE(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* immediate failure */ + /* */ + + TRACE(("|%p|%p|FAILURE\n", self->ctx->pattern, self->ctx->ptr)); + + return SRE_MATCH_RETURN_FAILURE(self); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_SUCCESS(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* end of pattern */ + /* */ + TRACE(("|%p|%p|SUCCESS\n", self->ctx->pattern, self->ctx->ptr)); + + /* Increment the state pointer to point to the new input offset */ + state->ptr = self->ctx->ptr; + + return SRE_MATCH_RETURN_SUCCESS(self); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_ANY(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match anything (except a newline) */ + /* */ + TRACE(("|%p|%p|ANY\n", self->ctx->pattern, self->ctx->ptr)); + + if (self->ctx->ptr >= self->end || + sre_category_is_linebreak(self->ctx->ptr[0])) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Character */ + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_ANY_ALL(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match anything */ + /* */ + TRACE(("|%p|%p|ANY_ALL\n", self->ctx->pattern, self->ctx->ptr)); + + if (self->ctx->ptr >= self->end) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Character */ + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_ASSERT(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* assert subpattern */ + /* tail */ + /* points to tail */ + /* is 0 for look-ahead */ + /* skip is not used */ + int back = (int)self->ctx->pattern[1]; + SRE_CODE *pattern = &self->ctx->pattern[2]; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|ASSERT %d\n", self->ctx->pattern, self->ctx->ptr, + back)); + + /* Roll back the input stream by back */ + state->ptr = self->ctx->ptr - back; + + /* Did we roll past the beginning? */ + if (state->ptr < state->beginning) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Examine the input stream in the region represented by back */ + /* Move the match pattern to the pattern parameter of the Assert */ + /* and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_ASSERT when stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_ASSERT; + nextctx->pattern = pattern; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_assert: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, JUMP_ASSERT, + pattern); +#endif /* TRIPLE_LOOP */ +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_ASSERT_NOT(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* assert not subpattern */ + /* tail */ + /* points to tail */ + /* is 0 for look-ahead */ + int skip = (int)self->ctx->pattern[0]; + int back = (int)self->ctx->pattern[1]; + SRE_CODE *pattern = &self->ctx->pattern[2]; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|ASSERT_NOT %d\n", self->ctx->pattern, self->ctx->ptr, + back)); + + /* Roll back the input stream by back */ + state->ptr = self->ctx->ptr - back; + + /* Did we roll past the beginning? */ + if (state->ptr >= state->beginning) { + /* Examine the input stream in the region represented by back */ + /* Move the match pattern to the pattern parameter of the Not */ + /* Assert and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_ASSERT_NOT when stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_ASSERT_NOT; + nextctx->pattern = pattern; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_assert_not: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_ASSERT_NOT, + pattern); +#endif /* TRIPLE_LOOP */ + } + else { + /* Advance Pattern to Next Op Code */ + self->ctx->pattern += skip; + + /* Successful Return */ + return SRE_MATCH_PASS; + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_AT(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match at given position */ + /* */ + SRE_CHAR code = self->ctx->pattern[0]; + + TRACE(("|%p|%p|AT %d\n", self->ctx->pattern, self->ctx->ptr, code)); + + /* Check the position of the input buffer position in */ + /* self->ctx->ptr and if it is not where code indicates, fail. */ + if (!SRE_AT(state, self->ctx->ptr, code)) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next op code */ + self->ctx->pattern++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_BRANCH(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* alternation */ + /* <0=skip> code */ + /* ( code )* tail */ + /* Each points to the one passed the next , */ + /* i.e. the next or if the last one */ + /* Each always points to the end of the Branching, */ + /* i.e. tail */ + int skip = (int)self->ctx->pattern[0]; + SRE_CODE *p_next_op = &self->ctx->pattern[1]; + SRE_CHAR literal = (SRE_CHAR)self->ctx->pattern[2]; + SRE_CODE *set = &self->ctx->pattern[3]; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|BRANCH\n", self->ctx->pattern, self->ctx->ptr)); + + /* Save the state's current Mark */ + SRE_MATCH_LASTMARK_SAVE(self, state); + + /* Save the current Repeat Context in the current Match Context */ + self->ctx->u.rep = state->repeat; + + /* Verfiy that there is a current Repeat Context */ + if (self->ctx->u.rep) { + /* There is a prior Repeat context; save the current */ + /* Context's previous Mark in the current State */ + SRE_MATCH_MARK_PUSH(self, state, self->ctx->lastmark); + } + + /* Skip simple match cases; e.g. Literal and In op codes */ + /* If skip is the op code, it will be zero and the */ + /* loop will exit */ + while (skip && + + /* Check for Literal Miss-Match */ + ((*p_next_op == SRE_OP_LITERAL && + /* Check for End of Input Stream */ + (self->ctx->ptr >= self->end || + /* Check for Literal Miss-match */ + (SRE_CODE)*self->ctx->ptr != literal)) || + + /* Check for Character Class Miss-Match */ + (*p_next_op == SRE_OP_IN && + /* Check for End of Input Stream */ + (self->ctx->ptr >= self->end || + /* Check for Character not in Class */ + !SRE_CHARSET(set, (SRE_CODE)*self->ctx->ptr))))) { + + /* This branch option will not match */ + /* Increment the pattern to the next branch point */ + self->ctx->pattern += skip; + + /* Update the named aliases list */ + skip = (int)self->ctx->pattern[0]; + p_next_op = &self->ctx->pattern[1]; + literal = (SRE_CHAR)self->ctx->pattern[2]; + set = &self->ctx->pattern[3]; + } + + /* If skip is the op code, it will be zero and the */ + /* pseudo-recursion will be skipped. */ + if (skip) { + /* Set the current input buffer position to that stored in */ + /* the Current Context */ + state->ptr = self->ctx->ptr; + + /* Move the match pattern to the next code block's op code */ + /* and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_BRANCH when stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + /* Return non-zero on error */ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_BRANCH; + nextctx->pattern = p_next_op; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_branch: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_BRANCH, + p_next_op); +#endif /* TRIPLE_LOOP */ + } + else { + /* There are no more codes sections in the Branch, and none */ + /* of the previous ones matched, so fail and pop stack. */ + + if (self->ctx->u.rep) { + /* A mark was left in the Context's Repeat Context; */ + /* discard it */ + SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark); + } + + /* Fail and Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +/* TODO: SRE_MATCH_ON_CALL */ + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_CATEGORY(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match at given category */ + /* */ + SRE_CHAR code = self->ctx->pattern[0]; + + TRACE(("|%p|%p|CATEGORY %d\n", self->ctx->pattern, self->ctx->ptr, + code)); + + /* If not at the end of input, check the next character to see if */ + /* it is in the given Character Class */ + if (self->ctx->ptr >= self->end || + !sre_category(code, self->ctx->ptr[0])) { + + /* Failed to match; Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern++; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +/* TODO: SRE_MATCH_ON_CHARSET */ +/* TODO: SRE_MATCH_ON_BIGCHARSET */ + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_GROUPREF(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match backreference */ + + /* ... */ + Py_ssize_t group = (Py_ssize_t)self->ctx->pattern[0]; + + /* For every group ID, there are 2 grouprefs, one for beginning, */ + /* one for end; so groupref is twice group */ + Py_ssize_t groupref = group + group; + + TRACE(("|%p|%p|GROUPREF %d\n", self->ctx->pattern, self->ctx->ptr, + group)); + + /* Check if the groupref is in the valid range */ + if (groupref >= state->lastmark) { + /* Failed to Match; Pop Stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + else { + /* Get the beginning and end of the Group captured from the */ + /* Input Stream */ + + SRE_CHAR *b = (SRE_CHAR *)state->mark[groupref]; + /* b is the beginning position in the Input Stream of the */ + /* previously matched group */ + + SRE_CHAR *e = (SRE_CHAR *)state->mark[groupref + 1]; + /* e is the ending position in the Input Stream of the */ + /* previously matched group */ + + /* Verify that the beginning and ending pointers are valid */ + /* and that the beginning occurs before the ending */ + if (!b || !e || e < b) { + /* Failed to Match; Pop Stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + else { + /* For each matched character of the corresponding group: */ + while (b < e) { + /* Check it against the current Input Stream to */ + /* verify that it matches */ + /* Stop if the Input Stream is at its end */ + if (self->ctx->ptr >= self->end || + *self->ctx->ptr != *b) { + + /* Failed to Match; Pop Stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Increment the group match reference */ + b++; + + /* Increment the Input Stream */ + self->ctx->ptr++; + } + } + } + + /* Move to next Op Code */ + self->ctx->pattern++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_GROUPREF_EXISTS(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* Match codeyes if group matched, otherwise match codeno */ + + /* codeyes ( */ + /* codeno)? tail */ + /* points to codeno, one after the , if codeno */ + /* exists, otherwise to tail */ + /* points to tail */ + /* Note: because is the SECOND parameter to */ + /* , it contains one more than its proper offset */ + /* from its position and is thus relative to the current value of */ + /* self->ctx->pattern */ + Py_ssize_t group = (Py_ssize_t)self->ctx->pattern[0]; + int skipyes = (int)self->ctx->pattern[1]; + + /* For every group ID, there are 2 grouprefs, one for beginning, */ + /* one for end; so groupref is twice group */ + Py_ssize_t groupref = group + group; + + TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", self->ctx->pattern, + self->ctx->ptr, group)); + + /* Check if the groupref is in the valid range */ + if (groupref >= state->lastmark) { + /* Skip the codeyes and go directly to codeno */ + self->ctx->pattern += skipyes; + + /* Continue to codeno */ + return SRE_MATCH_PASS; + } + + else { + /* Check to see if the indicated groupref has previously been */ + /* captured */ + + SRE_CHAR *b = (SRE_CHAR *)state->mark[groupref]; + /* b is the beginning position in the Input Stream of the */ + /* previously matched group */ + + SRE_CHAR *e = (SRE_CHAR *)state->mark[groupref + 1]; + /* e is the ending position in the Input Stream of the */ + /* previously matched group */ + + /* Verify that the beginning and ending pointers are valid */ + /* and that the beginning occurs before the ending */ + if (!b || !e || e < b) { + /* Skip the codeyes and go directly to codeno */ + self->ctx->pattern += skipyes; + + /* Continue to codeno */ + return SRE_MATCH_PASS; + } + } + + /* Move to next Op Code, i.e. codeyes (skipping the 2 parameters) */ + self->ctx->pattern += 2; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_GROUPREF_IGNORE(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* match backreference, ignoring case */ + + /* ... */ + Py_ssize_t group = (Py_ssize_t)self->ctx->pattern[0]; + + /* For every group ID, there are 2 grouprefs, one for beginning, */ + /* one for end; so groupref is twice group */ + Py_ssize_t groupref = group + group; + + TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", self->ctx->pattern, + self->ctx->ptr, group)); + + /* Check if the groupref is in the valid range */ + if (groupref >= state->lastmark) { + /* Failed to Match; Pop Stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + else { + /* Get the beginning and end of the Group captured from the */ + /* Input Stream */ + + SRE_CHAR *b = (SRE_CHAR *)state->mark[groupref]; + /* b is the beginning position in the Input Stream of the */ + /* previously matched group */ + + SRE_CHAR *e = (SRE_CHAR *)state->mark[groupref + 1]; + /* e is the ending position in the Input Stream of the */ + /* previously matched group */ + + /* Verify that the beginning and ending pointers are valid */ + /* and that the beginning occurs before the ending */ + if (!b || !e || e < b) { + /* Failed to Match; Pop Stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + else { + /* For each matched character of the corresponding group: */ + while (b < e) { + /* Check it against the current Input Stream to */ + /* verify that it matches case-insensitvily */ + /* Stop if the Input Stream is at its end */ + if (self->ctx->ptr >= self->end || + state->lower(*self->ctx->ptr) != state->lower(*b)) { + + /* Failed to Match; Pop Stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Increment the group match reference */ + b++; + + /* Increment the Input Stream */ + self->ctx->ptr++; + } + } + } + + /* Move to next Op Code */ + self->ctx->pattern++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_IN(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match set member (or non_member) */ + /* tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + SRE_CODE *set = &self->ctx->pattern[1]; + + TRACE(("|%p|%p|IN\n", self->ctx->pattern, self->ctx->ptr)); + + /* If not at the end of input, check the next character to see if */ + /* it is in the given Character Set */ + if (self->ctx->ptr >= self->end || + !SRE_CHARSET(set, *self->ctx->ptr)) { + + /* Failed to Match; Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern += skip; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_IN_IGNORE(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match set member (or non_member), ignoring case */ + /* tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + SRE_CODE *set = &self->ctx->pattern[1]; + + TRACE(("|%p|%p|IN_IGNORE\n", self->ctx->pattern, self->ctx->ptr)); + + /* If not at the end of input, check the next character to see if */ + /* it is in the given Character Set, without regard to case */ + if (self->ctx->ptr >= self->end || + !SRE_CHARSET(set, (SRE_CODE)state->lower(*self->ctx->ptr))) { + + /* Failed to Match; Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern += skip; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +/* Note: The SRE_OP_INFO handler is actually SRE_MATCH_ON_JUMP */ +/* since if this occurs in the middle of an expression, it is */ +/* treated as if it represents a skip. See SRE_MATCH_ON_JUMP */ +/* for more information. */ + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* jump forward */ + /* ... tail */ + /* points to tail */ + int offset = (int)self->ctx->pattern[0]; + + /* Note: This function can be used for any OpCode of the form */ + /* [*] if the skip is always to be taken */ + /* I.e. . */ + TRACE(("|%p|%p|JUMP %d\n", self->ctx->pattern, self->ctx->ptr, + offset)); + + /* Move to next Op Code */ + self->ctx->pattern += offset; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_LITERAL(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* match literal string */ + /* */ + SRE_CODE code = self->ctx->pattern[0]; + + TRACE(("|%p|%p|LITERAL %d\n", self->ctx->pattern, self->ctx->ptr, + code)); + + /* If there are no more characters or the current chatacter does */ + /* NOT match */ + if (self->ctx->ptr >= self->end || + (SRE_CODE)self->ctx->ptr[0] != code) { + + /* Failed to Match; Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern++; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_LITERAL_IGNORE(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* match literal string, ignoring case */ + /* */ + SRE_CODE code = self->ctx->pattern[0]; + + TRACE(("|%p|%p|LITERAL_IGNORE %d\n", self->ctx->pattern, + self->ctx->ptr, code)); + + /* If there are no more characters or the current chatacter does */ + /* NOT match, without regard to case */ + if (self->ctx->ptr >= self->end || + state->lower(*self->ctx->ptr) != state->lower(code)) { + + /* Failed to Match; Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern++; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_MARK(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* set mark */ + /* */ + /* is even for starting Mark and odd for ending mark */ + /* Thus, each unique gid is separated by 2 */ + Py_ssize_t gid = self->ctx->pattern[0]; + + TRACE(("|%p|%p|MARK %d\n", self->ctx->pattern, self->ctx->ptr, + gid)); + + if (gid & 1) { + /* Closing gid found, normalize it because they come in pairs */ + /* and add 1 to get its offset in the last index list */ + state->lastindex = gid / 2 + 1; + } + + /* Make sure the mark array is big enough to hold gid */ + if (gid > state->lastmark) { + /* state->lastmark is the highest valid index in the + state->mark array. If it is increased by more than 1, + the intervening marks must be set to NULL to signal + that these marks have not been encountered. */ + Py_ssize_t j = state->lastmark + 1; + + /* Insert enough elements */ + while (j < gid) { + state->mark[j++] = NULL; + } + + /* Set the new size of the array */ + state->lastmark = gid; + } + + /* Update the mark array with the given gid */ + state->mark[gid] = self->ctx->ptr; + + /* Move to next op code */ + self->ctx->pattern++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_MAX_UNTIL(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* maximizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + int min; /* Initialized Later */ + int max; /* Initialized Later */ + SRE_CODE *item; /* Initialized Later */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + /* FIXME: we probably need to deal with zero-width + matches in here... */ + + /* Get the Repeat Context from the one created by the */ + /* corresponding (in terms of nesting, i.e. if it goes repeat */ + /* repeat until repeat until until, ther outermost until */ + /* corresponding to the first repeat) repeat, which should have */ + /* been stored in the global state when the corresponding Repeat */ + /* was encountered */ + /* Store current Repeat Context in this Context */ + /* Note: This is done because although the Repeat Context is */ + /* created in the call before the Push Stack (i.e. in the Repeat */ + /* handler), because Push Stack is done generically, the current */ + /* Repeat Context is instead stored in the global State and then */ + /* placed in the newly created context here after the context has */ + /* been created or when returned to after another repeat match. */ + self->ctx->u.rep = state->repeat; + + /* Check to see if the Repeat Context is valid; if not, we have */ + /* Until without Repeat, and need to return an error immediately */ + /* as the pattern is invalid */ + if (!self->ctx->u.rep) { + return SRE_MATCH_RETURN_ERROR(self, SRE_ERROR_STATE); + } + + /* Set the current input stream position to the one saved in the */ + /* Current Context */ + state->ptr = self->ctx->ptr; + + /* Set the count of sub-matches found (if first time, increments */ + /* from -1 to 0) to the current context */ + self->ctx->count = self->ctx->u.rep->count + 1; + + TRACE(("|%p|%p|MAX_UNTIL %d\n", self->ctx->pattern, self->ctx->ptr, + self->ctx->count)); + + /* The pattern in the Context's Repeat Context points to just */ + /* past the corresponding REPEAT op code; set the name aliases */ + /* based on this */ + min = (int)self->ctx->u.rep->pattern[1]; + max = (int)self->ctx->u.rep->pattern[2]; + item = &self->ctx->u.rep->pattern[3]; + + if (self->ctx->count < min) { + /* not enough matches */ + + /* Put the number of matches back in the Repeat Context */ + self->ctx->u.rep->count = self->ctx->count; + + /* Parse the Repeated Pattern once again */ + /* Move the match pattern to item to be repeatedly matched */ + /* and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_1 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MAX_UNTIL_1; + nextctx->pattern = item; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_max_until_1: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MAX_UNTIL_1, + item); +#endif /* TRIPLE_LOOP */ + } + else if ((self->ctx->count < max || max == SRE_MAX_REPEAT) && + state->ptr != self->ctx->u.rep->last_ptr) { + /* we may have enough matches, but if we can + match another item, do so */ + + /* Put the number of matches back in the Repeat Context */ + self->ctx->u.rep->count = self->ctx->count; + + /* Save the state's current Mark */ + SRE_MATCH_LASTMARK_SAVE(self, state); + + /* Save the current Context's previous Mark into the current */ + /* State's Stack */ + SRE_MATCH_MARK_PUSH(self, state, self->ctx->lastmark); + + /* zero-width match protection */ + /* Save the previous Input Stream position stored in the */ + /* Context's Repeat Context into the current State's Stack */ + SRE_MATCH_DATA_PUSH(self, state, &self->ctx->u.rep->last_ptr); + + /* Set the Current Repeat Context's previous Input Stream to */ + /* the Current Input Stream position */ + self->ctx->u.rep->last_ptr = state->ptr; + + /* Parse the Repeated Pattern once again */ + /* Move the match pattern to item to be repeatedly matched */ + /* and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_2 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MAX_UNTIL_2; + nextctx->pattern = item; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_max_until_2: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MAX_UNTIL_2, + item); +#endif /* TRIPLE_LOOP */ + } + else { + /* cannot match more repeated items here. make sure the + tail matches */ + + /* Restore the previous Repeat Context to be the current */ + /* Repeat Context */ + state->repeat = self->ctx->u.rep->prev; + + /* Parse the tail */ + /* Move the match pattern to tail to verify that matches and */ + /* create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_3 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MAX_UNTIL_3; + nextctx->pattern = self->ctx->pattern; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_max_until_3: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MAX_UNTIL_3, + self->ctx->pattern); +#endif /* TRIPLE_LOOP */ + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_MIN_UNTIL(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* minimizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + int min; /* Initialized Later */ + /* max is not used */ + SRE_CODE *item; /* Initialized Later */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + /* Get the Repeat Context from the one created by the */ + /* corresponding (in terms of nesting, i.e. if it goes repeat */ + /* repeat until repeat until until, ther outermost until */ + /* corresponding to the first repeat) repeat, which should have */ + /* been stored in the global state when the corresponding Repeat */ + /* was encountered */ + /* Store current Repeat Context in this Context */ + /* Note: This is done because although the Repeat Context is */ + /* created in the call before the Push Stack (i.e. in the Repeat */ + /* handler), because Push Stack is done generically, the current */ + /* Repeat Context is instead stored in the global State and then */ + /* placed in the newly created context here after the context has */ + /* been created or when returned to after another repeat match. */ + self->ctx->u.rep = state->repeat; + + /* Check to see if the Repeat Context is valid; if not, we have */ + /* Until without Repeat, and need to return an error immediately */ + /* as the pattern is invalid */ + if (!self->ctx->u.rep) { + return SRE_MATCH_RETURN_ERROR(self, SRE_ERROR_STATE); + } + + /* Set the current input stream position to the one saved in the */ + /* Current Context */ + state->ptr = self->ctx->ptr; + + /* Set the count of sub-matches found (if first time, increments */ + /* from -1 to 0) to the current context */ + self->ctx->count = self->ctx->u.rep->count + 1; + + TRACE(("|%p|%p|MIN_UNTIL %d %p\n", self->ctx->pattern, + self->ctx->ptr, self->ctx->count, + self->ctx->u.rep->pattern)); + + /* The pattern in the Context's Repeat Context points to just */ + /* past the corresponding REPEAT op code; set the name aliases */ + /* based on this */ + min = (int)self->ctx->u.rep->pattern[1]; + item = &self->ctx->u.rep->pattern[3]; + + if (self->ctx->count < min) { + /* not enough matches */ + + /* Put the number of matches back in the Repeat Context */ + self->ctx->u.rep->count = self->ctx->count; + + /* Parse the Repeated Pattern once again */ + /* Move the match pattern to item to be repeatedly matched */ + /* and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_1 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MIN_UNTIL_1; + nextctx->pattern = item; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_min_until_1: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MIN_UNTIL_1, + item); +#endif /* TRIPLE_LOOP */ + } + + /* Save the state's current Mark */ + SRE_MATCH_LASTMARK_SAVE(self, state); + + /* see if the tail matches */ + + /* Restore the previous Repeat Context to be the current Repeat */ + /* Context */ + state->repeat = self->ctx->u.rep->prev; + +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MIN_UNTIL_2; + nextctx->pattern = self->ctx->pattern; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_min_until_2: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MIN_UNTIL_2, + self->ctx->pattern); +#endif /* TRIPLE_LOOP */ +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_NOT_LITERAL(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* match anything that is not literal character */ + /* */ + SRE_CODE code = self->ctx->pattern[0]; + + TRACE(("|%p|%p|NOT_LITERAL %d\n", self->ctx->pattern, + self->ctx->ptr, code)); + + /* If there are no more characters or the current chatacter does */ + /* match */ + if (self->ctx->ptr >= self->end || + (SRE_CODE)self->ctx->ptr[0] == code) { + + /* Failed to Match; Pop Context */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern++; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_NOT_LITERAL_IGNORE(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* match literal string, ignoring case */ + /* */ + SRE_CODE code = self->ctx->pattern[0]; + + TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", self->ctx->pattern, + self->ctx->ptr, code)); + + if (self->ctx->ptr >= self->end || + state->lower(*self->ctx->ptr) == state->lower(code)) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Move to next Op Code and Character */ + self->ctx->pattern++; + self->ctx->ptr++; + + return SRE_MATCH_PASS; +} + +/* TODO: SRE_MATCH_ON_NEGATE */ +/* TODO: SRE_MATCH_ON_RANGE */ + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_REPEAT(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + /* create repeat context. all the hard work is done + by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ + /* <1=min> <2=max> item tail */ + /* points to */ + int skip = (int)self->ctx->pattern[0]; + /* min is not used */ + /* max is not used */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|REPEAT %d %d\n", self->ctx->pattern, self->ctx->ptr, + min, max)); + + /* install new repeat context */ + /* Allocate Repeat Context */ + self->ctx->u.rep = (SRE_REPEAT *)PyObject_MALLOC(sizeof(*self->ctx-> + u.rep)); + + /* Was the memory allocated */ + if (!self->ctx->u.rep) { + PyErr_NoMemory(); + + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Set the Repeat Context attributes and make it the current */ + /* repeat */ + /* Number of Repeats encountered; initialize to -1 */ + self->ctx->u.rep->count = -1; + + /* Store the location of the repeat parameters in the Repeat */ + /* object */ + self->ctx->u.rep->pattern = self->ctx->pattern; + + /* Back up the current Repeat Context as the previous Repeat */ + /* Context in the Repeat object */ + self->ctx->u.rep->prev = state->repeat; + + /* Clear the Previous Input Stream pointer in the Repeat object */ + self->ctx->u.rep->last_ptr = NULL; + + /* Update the current Repeat Context to be the one just created */ + state->repeat = self->ctx->u.rep; + + /* Update the global input stream pointer ptr from the current */ + /* Context */ + state->ptr = self->ctx->ptr; + + /* Parse the Repeated Pattern */ + /* Move the match pattern to the end of the Pseudo-Recursion */ + /* group, denoted by Repeat's parameter, which should be */ + /* either a Min Until or a Max Until op code and create a new */ + /* parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_REPEAT when stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_REPEAT; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_repeat: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, JUMP_REPEAT, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_REPEAT_ONE(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* match repeated sequence (maximizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MAX_REPEAT operator */ + + /* <1=min> <2=max> item tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + int min = (int)self->ctx->pattern[1]; + int max = (int)self->ctx->pattern[2]; + SRE_CODE *item = &self->ctx->pattern[3]; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|REPEAT_ONE %d %d\n", self->ctx->pattern, + self->ctx->ptr, min, max)); + + /* Not Enough Characters remain */ + if (self->ctx->ptr + min > self->end) { + return SRE_MATCH_RETURN_FAILURE(self); /* cannot match */ + } + + /* Update the global input stream pointer ptr */ + state->ptr = self->ctx->ptr; + + /* Get maximum matching characters */ + self->ret = SRE_COUNT(state, item, max); + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + /* Otherwise, self->ret contains offset in characters that can */ + /* subsequently be matched in the input. */ + + /* FIXME: SRE_COUNT may have disturbed the current context?? */ + /* Re-retrieve the current ctx pointed to by self->ctx_pos */ + SRE_MATCH_DATA_LOOKUP_AT(state, &self->ctx, self->ctx_pos); + + /* Verify that at least the minimum number of characterrs matched */ + if (self->ret < (Py_ssize_t)min) { + /* didn't match minimum number of times */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* Set the count and input ptr to the end of the matching */ + /* sequence */ + self->ctx->count = self->ret; + self->ctx->ptr += self->ctx->count; + + /* when we arrive here, count contains the number of + matches, and self->ctx->ptr points to the tail of the target + string. check if the rest of the pattern matches, + and backtrack if not. */ + if (self->ctx->pattern[skip] == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + state->ptr = self->ctx->ptr; + + return SRE_MATCH_RETURN_SUCCESS(self); + } + + /* Store the previous Mark into the Current Context */ + SRE_MATCH_LASTMARK_SAVE(self, state); + + /* Check for Simple Case */ + if (self->ctx->pattern[skip] == SRE_OP_LITERAL) { + /* tail starts with a literal. skip positions where + the rest of the pattern cannot possibly match */ + + /* Next op code is Literal, at , whose first parameter, */ + /* at + 1, is the literal code to match; store it */ + /* literal */ + SRE_CHAR literal = (SRE_CHAR)self->ctx->pattern[skip + 1]; + + /* Cache the literal in the context's repeat area */ + self->ctx->u.chr = literal; + + /* Roll back matched characters until the subsequent */ + /* character literal can be matched */ + while (self->ctx->count >= (Py_ssize_t)min && + (self->ctx->ptr >= self->end || + *self->ctx->ptr != literal)) { + self->ctx->ptr--; + self->ctx->count--; + } + + /* Make sure we've matched at least min characters */ + if (self->ctx->count >= (Py_ssize_t)min) { + + /* Update the current input offset */ + state->ptr = self->ctx->ptr; + + /* This string matches so far, so add a new stack frame */ + /* and continue to try and match */ + /* Move the match pattern to the end of the */ + /* Pseudo-Recursion group, just passed the corresponding */ + /* Success op code and denoted by Repeat One's */ + /* parameter and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_REPEAT_ONE_1 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_REPEAT_ONE_1; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_repeat_one_1: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_REPEAT_ONE_1, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ + } + else { + /* Subsequent literal could not be matched */ + return SRE_MATCH_RETURN_FAILURE(self); + } + } + else { + /* general case (the next character in pattern is not a */ + /* literal) */ + /* This code will act like a while because it will spawn and */ + /* push a new stack frame until it either matches or runs out */ + /* of characters */ + if (self->ctx->count >= (Py_ssize_t)min) { + /* Update the current input offset */ + state->ptr = self->ctx->ptr; + + /* This string matches so far, so add a new stack frame */ + /* and continue to try and match */ + /* Move the match pattern to the end of the */ + /* Pseudo-Recursion group, just passed the corresponding */ + /* Success op code and denoted by Repeat One's */ + /* parameter and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_REPEAT_ONE_2 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_REPEAT_ONE_2; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_repeat_one_2: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_REPEAT_ONE_2, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ + } + else { + /* Could not match */ + return SRE_MATCH_RETURN_FAILURE(self); + } + } +} + +/* TODO: SRE_MATCH_ON_SUBPATTERN */ + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_MIN_REPEAT_ONE(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* match repeated sequence (minimizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MIN_REPEAT operator */ + + /* <1=min> <2=max> item tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + int min = (int)self->ctx->pattern[1]; + int max = (int)self->ctx->pattern[2]; + SRE_CODE *item = &self->ctx->pattern[3]; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", self->ctx->pattern, + self->ctx->ptr, min, max)); + + /* Not Enough Characters remain */ + if (self->ctx->ptr + min > self->end) { + return SRE_MATCH_RETURN_FAILURE(self); /* cannot match */ + } + + /* Update the global input stream pointer ptr */ + state->ptr = self->ctx->ptr; + + /* Get minimum matching characters */ + if (min == 0) + /* Can always match 0 characters */ + self->ctx->count = 0; + else { + /* count using pattern min as the maximum */ + self->ret = SRE_COUNT(state, item, min); + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + /* Otherwise, self->ret contains offset in characters that */ + /* can subsequently be matched in the input. */ + + /* FIXME: SRE_COUNT may have disturbed the current context?? */ + /* Re-retrieve the current ctx pointed to by self->ctx_pos */ + SRE_MATCH_DATA_LOOKUP_AT(state, &self->ctx, self->ctx_pos); + + /* Verify that at least the minimum number of characterrs */ + /* matched */ + if (self->ret < (Py_ssize_t)min) { + /* didn't match minimum number of times */ + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* advance past minimum matches of repeat */ + self->ctx->count = self->ret; + self->ctx->ptr += self->ctx->count; + } + + /* when we arrive here, count contains the minimum number of + matches, and self->ctx->ptr points to the first character not + required to match in the target string. check if the rest of + the pattern matches, and consume more matches if not. */ + if (self->ctx->pattern[skip] == SRE_OP_SUCCESS) { + /* tail is empty. we're finished */ + state->ptr = self->ctx->ptr; + + return SRE_MATCH_RETURN_SUCCESS(self); + } + else { + /* general case */ + /* Store the previous Mark into the Current Context */ + SRE_MATCH_LASTMARK_SAVE(self, state); + + /* This code will act like a while because it will spawn and */ + /* push a new stack frame until it either matches or runs out */ + /* of characters */ + if ((Py_ssize_t)max == SRE_MAX_REPEAT || + self->ctx->count <= (Py_ssize_t)max) { + /* Update the current input offset */ + state->ptr = self->ctx->ptr; + + /* This string matches so far, so add a new stack frame */ + /* and continue to try and match */ + /* Move the match pattern to the end of the */ + /* Pseudo-Recursion group, just passed the corresponding */ + /* Success op code and denoted by Min Repeat One's */ + /* parameter and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MIN_REPEAT_ONE; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_min_repeat_one: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return + SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MIN_REPEAT_ONE, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ + } + else { + /* Could not match */ + return SRE_MATCH_RETURN_FAILURE(self); + } + } +} + +/* TODO: SRE_MATCH_ON_ATOMIC_START */ +/* TODO: SRE_MATCH_ON_ATOMIC_END */ +/* TODO: SRE_MATCH_ON_POSSESSIVE */ + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_DEFAULT(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + TRACE(("|%p|%p|UNKNOWN %d\n", self->ctx->pattern, self->ctx->ptr, + self->op_code)); + + /* Set an Error code and return right away! */ + return SRE_MATCH_RETURN_ERROR(self, SRE_ERROR_ILLEGAL); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_NONE(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) +{ + TRACE(("|%p|%p|RETURN %d\n", self->ctx->pattern, self->ctx->ptr, + self->ret)); + + /* Force Exit based on End of Context */ + self->ctx_pos = -1; + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MAX_UNTIL_1(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* TODO: This code is EXACTLY the same as SRE_MATCH_ON_JUMP_MIN_UNTIL_1; merge functions! */ + /* maximizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + /* max is not used */ + /* item is not used */ + + TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for Success or Error */ + if (self->ret) { + if (self->ret < 0) { + /* Error, Return immediately */ + return SRE_MATCH_FAIL; + } + else { + /* Pop stack with Success */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* We did not find a match, so Decrement the Match count and */ + /* Pop Stack with failure. */ + /* Decrement Match Cont */ + self->ctx->u.rep->count = self->ctx->count - 1; + + /* Restore Input Stream position to be the one saved in this */ + /* Context; this will roll back the Input Stream to the last */ + /* point that matched. */ + state->ptr = self->ctx->ptr; + + /* Pop stack with Failure */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MAX_UNTIL_2(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* maximizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + /* max is not used */ + /* item is not used */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Failed to match, roll back */ + /* Restore the previous Input Stream position stored in the */ + /* current State's Stack into the Context's Repeat Context */ + SRE_MATCH_DATA_POP(state, &self->ctx->u.rep->last_ptr); + + /* Check for Success or Error */ + if (self->ret) { + /* Remove the previously saved Mark from the current */ + /* State's Stack */ + SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark); + + if (self->ret < 0) { + /* Error, Return immediately */ + return SRE_MATCH_FAIL; + } + else { + /* Pop stack with Success */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* Failed to Match So Far... */ + + /* Restore the current Context's previous Mark from the */ + /* current State's Stack */ + SRE_MATCH_MARK_POP(state, self->ctx->lastmark); + + /* Restore the state's current Mark */ + SRE_MATCH_LASTMARK_RESTORE(self, state); + + /* No match, match one less pattern and store the new */ + /* count in the Context's Repeat Context */ + self->ctx->u.rep->count = self->ctx->count - 1; + + /* Update the current Input Stream position from the one */ + /* saved in the current Context */ + state->ptr = self->ctx->ptr; + + /* cannot match more repeated items here. make sure the + tail matches */ + + /* Restore the previous Repeat Context to be the current */ + /* Repeat Context */ + state->repeat = self->ctx->u.rep->prev; + + /* Parse the tail */ + /* Move the match pattern to tail to verify that matches and */ + /* create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_3 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MAX_UNTIL_3; + nextctx->pattern = self->ctx->pattern; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_max_until_3: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MAX_UNTIL_3, + self->ctx->pattern); +#endif /* TRIPLE_LOOP */ + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MAX_UNTIL_3(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* maximizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + /* max is not used */ + /* item is not used */ + + TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Done searching through the Input Stream, did we find a match? */ + /* Check for Success or Error */ + if (self->ret) { + if (self->ret < 0) { + /* Error, Return immediately */ + return SRE_MATCH_FAIL; + } + else { + /* Pop stack with Success */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* Failed to Match, Pop stack */ + + /* Restore the current Context's Repeat Context to be the */ + /* current Repeat Context */ + state->repeat = self->ctx->u.rep; + + /* Restore Input Stream position to be the one saved in this */ + /* Context; this will roll back the Input Stream to the last */ + /* point that matched. */ + state->ptr = self->ctx->ptr; + + /* Pop stack with Failure */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MIN_UNTIL_1(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* TODO: This code is EXACTLY the same as SRE_MATCH_ON_JUMP_MAX_UNTIL_1; merge functions! */ + /* minimizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + /* max is not used */ + /* item is not used */ + + TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for Success or Error */ + if (self->ret) { + if (self->ret < 0) { + /* Error, Return immediately */ + return SRE_MATCH_FAIL; + } + else { + /* Pop stack with Success */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* We did not find a match, so Decrement the Match count and */ + /* Pop Stack with failure. */ + /* Decrement Match Cont */ + self->ctx->u.rep->count = self->ctx->count - 1; + + /* Restore Input Stream position to be the one saved in this */ + /* Context; this will roll back the Input Stream to the last */ + /* point that matched. */ + state->ptr = self->ctx->ptr; + + /* Pop stack with Failure */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MIN_UNTIL_2(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* minimizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + int max; /* Initialized Later */ + SRE_CODE *item; /* Initialized Later */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for Success or Error */ + if (self->ret) { + if (self->ret < 0) { + /* Error, Return immediately */ + return SRE_MATCH_FAIL; + } + else { + /* Pop stack with Success */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* Failed to Match So Far... */ + + /* The pattern in the Context's Repeat Context points to */ + /* just past the corresponding REPEAT op code; set the name */ + /* aliases based on this */ + max = (int)self->ctx->u.rep->pattern[2]; + + /* Restore the current Context's Repeat Context to be the */ + /* current Repeat Context */ + state->repeat = self->ctx->u.rep; + + /* Restore Input Stream position to be the one saved in this */ + /* Context; this will roll back the Input Stream to the last */ + /* point that matched. */ + state->ptr = self->ctx->ptr; + + /* Restore the state's current Mark */ + SRE_MATCH_LASTMARK_RESTORE(self, state); + + /* Check to see if we have too many matches and thus fail */ + if (self->ctx->count >= max && max != SRE_MAX_REPEAT) { + /* Failed to Match, Pop stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } + else { + /* We have not reached our match limit, try to match */ + /* another time */ + + /* The pattern in the Context's Repeat Context points to */ + /* just past the corresponding REPEAT op code; set the */ + /* name aliases based on this */ + item = &self->ctx->u.rep->pattern[3]; + + /* Put the number of matches back in the Repeat Context */ + self->ctx->u.rep->count = self->ctx->count; + + /* Parse the Repeated Pattern once again */ + /* Move the match pattern to item to be repeatedly */ + /* matched and create a new parse sub-context */ + /* Go to SRE_MATCH_ON_JUMP_JUMP_MIN_UNTIL_3 when stack is */ + /* popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MIN_UNTIL_3; + nextctx->pattern = item; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_min_until_3: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MIN_UNTIL_3, + item); +#endif /* TRIPLE_LOOP */ + } + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MIN_UNTIL_3(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* minimizing repeat */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + /* max is not used */ + /* item is not used */ + + TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", self->ctx->pattern, + self->ctx->ptr)); + + /* If the tail matched or we encountered an unrecoverable error, */ + /* we're done */ + if (self->ret) { + if (self->ret < 0) { + /* Error, Return immediately */ + return SRE_MATCH_FAIL; + } + else { + /* Pop stack with Success */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* Tail did not Match, pop stack and see if we can try again */ + /* with another Match of item */ + + /* No match, match one less pattern and store the new */ + /* count in the Context's Repeat Context */ + self->ctx->u.rep->count = self->ctx->count - 1; + + /* Update the current Input Stream position from the one */ + /* saved in the current Context */ + state->ptr = self->ctx->ptr; + + /* Failed to Match, Pop stack */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_REPEAT(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* create repeat context. all the hard work is done + by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ + /* <1=min> <2=max> item tail */ + /* points to */ + /* skip is not used */ + /* min is not used */ + /* max is not used */ + + TRACE(("|%p|%p|JUMP_REPEAT\n", self->ctx->pattern, self->ctx->ptr)); + + /* Roll back the Repeat State */ + state->repeat = self->ctx->u.rep->prev; + + /* Free the memory used by the Repeat State */ + PyObject_FREE(self->ctx->u.rep); + + /* Check for errors */ + if (self->ret) { + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else { + /* Successful Match */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* Failed to match */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_REPEAT_ONE_1(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* Note: Requires the next operator after REPEAT_ONE be a literal */ + /* match repeated sequence (maximizing regexp) */ + /* <1=min> <2=max> item tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + int min = (int)self->ctx->pattern[1]; + /* max is not used */ + /* item is not used */ + + /* self->ctx->u.chr contain the literal to match; set in the */ + /* main function */ + SRE_CHAR literal = (SRE_CHAR)self->ctx->u.chr; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for errors */ + if (self->ret) { + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else { + /* Last attempt was successful */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + + /* Restore the previous Mark from the Current Context */ + SRE_MATCH_LASTMARK_RESTORE(self, state); + + do { + /* Did not match, remove a character from the match and try */ + /* again */ + self->ctx->ptr--; + self->ctx->count--; + + /* Roll back matched characters until the subsequent */ + /* character literal can be matched */ + } while (self->ctx->count >= (Py_ssize_t)min && + (self->ctx->ptr >= self->end || + *self->ctx->ptr != literal)); + + /* Make sure we've matched at least min characters */ + if (self->ctx->count >= (Py_ssize_t)min) { + + /* Update the current input offset */ + state->ptr = self->ctx->ptr; + + /* This string matches so far, so add a new stack frame and */ + /* continue to try and match */ + /* Move the match pattern to the end of the */ + /* Pseudo-Recursion group, just passed the corresponding */ + /* Success op code and denoted by Repeat One's */ + /* parameter and create a new parse sub-context */ + /* Go to this function (SRE_MATCH_ON_JUMP_REPEAT_ONE_1) when */ + /* stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_REPEAT_ONE_1; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_repeat_one_1: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_REPEAT_ONE_1, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ + } + else { + /* Subsequent literal could not be matched */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_REPEAT_ONE_2(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* Note: general case (the next character in pattern is not a */ + /* literal) */ + /* match repeated sequence (maximizing regexp) */ + /* <1=min> <2=max> item tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + int min = (int)self->ctx->pattern[1]; + /* max is not ised */ + /* item is not used */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for errors */ + if (self->ret) { + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else { + /* Last attempt was successful */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + + /* Did not match, remove a character from the match and try again */ + self->ctx->ptr--; + self->ctx->count--; + + /* Restore the previous Mark from the Current Context */ + SRE_MATCH_LASTMARK_RESTORE(self, state); + + /* This code will act like a while because it will spawn and pop */ + /* a new stack frame until it either matches or runs out of */ + /* characters */ + if (self->ctx->count >= (Py_ssize_t)min) { + /* Update the current input offset */ + state->ptr = self->ctx->ptr; + + /* This string matches so far, so add a new stack frame and */ + /* continue to try and match */ + /* Move the match pattern to the end of the */ + /* Pseudo-Recursion group, just passed the corresponding */ + /* Success op code and denoted by Repeat One's */ + /* parameter and create a new parse sub-context */ + /* Go to this function (SRE_MATCH_ON_JUMP_REPEAT_ONE_2) when */ + /* stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_REPEAT_ONE_2; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_repeat_one_2: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_REPEAT_ONE_2, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ + } + + /* Could not match */ + return SRE_MATCH_RETURN_FAILURE(self); +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* <1=min> <2=max> item tail */ + /* points to tail */ + int skip = (int)self->ctx->pattern[0]; + /* min is not used */ + int max = (int)self->ctx->pattern[2]; + SRE_CODE *item = &self->ctx->pattern[3]; +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for errors */ + if (self->ret) { + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else { + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + + /* Update the current input stream position, ptr and search from */ + /* there */ + state->ptr = self->ctx->ptr; + + /* Did not match, see if we can consume one more characert */ + self->ret = SRE_COUNT(state, item, 1); + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + /* Otherwise, self->ret contains offset in characters that can */ + /* subsequently be matched in the input. */ + + /* FIXME: SRE_COUNT may have disturbed the current context?? */ + /* Re-retrieve the current ctx pointed to by self->ctx_pos */ + SRE_MATCH_DATA_LOOKUP_AT(state, &self->ctx, self->ctx_pos); + + /* Failed to get another character so the match failed */ + if (self->ret == 0) { + return SRE_MATCH_RETURN_FAILURE(self); + } + + /* SRE_COUNT should only find at most 1 character */ + assert(self->ret == 1); + + /* Consume another character and try again */ + self->ctx->ptr++; + self->ctx->count++; + + /* Restore the previous Mark from the Current Context */ + SRE_MATCH_LASTMARK_RESTORE(self, state); + + /* This code will act like a while because it will spawn and push */ + /* a new stack frame until it either matches or runs out of */ + /* characters */ + if ((Py_ssize_t)max == SRE_MAX_REPEAT || + self->ctx->count <= (Py_ssize_t)max) { + /* Update the current input offset */ + state->ptr = self->ctx->ptr; + + /* This string matches so far, so add a new stack frame and */ + /* continue to try and match */ + /* Move the match pattern to the end of the */ + /* Pseudo-Recursion group, just passed the corresponding */ + /* Success op code and denoted by Min Repeat One's */ + /* parameter and create a new parse sub-context */ + /* Go to this function (SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE) */ + /* when stack is popped */ +#if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_MIN_REPEAT_ONE; + nextctx->pattern = self->ctx->pattern + skip; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_min_repeat_one: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; +#else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_MIN_REPEAT_ONE, + self->ctx->pattern + + skip); +#endif /* TRIPLE_LOOP */ + } + else { + /* Could not match */ + return SRE_MATCH_RETURN_FAILURE(self); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_BRANCH(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* alternation */ + /* code */ + /* (<0=skip> code )* tail */ + /* Each points to the one passed the next , */ + /* i.e. the next or if the last one */ + /* Each always points to the end of the Branching, */ + /* i.e. tail */ + int skip = (int)self->ctx->pattern[0]; + SRE_CODE *p_next_op; /* Initialized Later */ + SRE_CHAR literal; /* Initialized Later */ + SRE_CODE *set; /* Initialized Later */ +#if TRIPLE_LOOP + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ +#endif /* TRIPLE_LOOP */ + + TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); + + /* Check for Success or unrecoverable Error */ + if (self->ret) { + if (self->ctx->u.rep) { + /* A mark was left in the Context's Repeat Context; */ + /* discard it */ + SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark); + } + + /* Returns non-zero on error */ + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else { + /* Pop Contect with a Success Condition */ + return SRE_MATCH_RETURN_SUCCESS(self); + } + } + else { + /* If there is a Repeat Context stored in this Context */ + if (self->ctx->u.rep) { + /* Preserve the Mark left there by the original Branch */ + /* handler */ + SRE_MATCH_MARK_POP_KEEP(state, self->ctx->lastmark); + } + + /* Restore the previous Mark stored in the Current Context to be */ + /* the current last Mark. */ + SRE_MATCH_LASTMARK_RESTORE(self, state); + + /* This is done as a do-while because it represnts the return */ + /* to the for iteration, which would increment */ + /* self->ctx->pattern by skip before continuing */ + do { + /* The last Branch option would or has failed to match */ + /* Increment the pattern to the next branch point */ + self->ctx->pattern += skip; + + /* Update the named aliases list */ + skip = (int)self->ctx->pattern[0]; + p_next_op = &self->ctx->pattern[1]; + literal = (SRE_CHAR)self->ctx->pattern[2]; + set = &self->ctx->pattern[3]; + + /* Skip simple match cases; e.g. Literal and In op codes */ + /* If skip is the op code, it will be zero and the */ + /* loop will exit */ + } while (skip && + + /* Check for Literal Miss-Match */ + ((*p_next_op == SRE_OP_LITERAL && + /* Check for End of Input Stream */ + (self->ctx->ptr >= self->end || + /* Check for Literal Miss-match */ + (SRE_CODE)*self->ctx->ptr != literal)) || + + /* Check for Character Class Miss-Match */ + (*p_next_op == SRE_OP_IN && + /* Check for End of Input Stream */ + (self->ctx->ptr >= self->end || + /* Check for Character not in Class */ + !SRE_CHARSET(set, (SRE_CODE)*self->ctx->ptr))))); + + /* If skip is the op code, it will be zero and the */ + /* pseudo-recursion will be skipped. */ + if (skip) { + /* Set the current input buffer position to that stored */ + /* in the Current Context */ + state->ptr = self->ctx->ptr; + + /* Move the match pattern to the next code block's op */ + /* code and create a new parse sub-context */ + /* Go to this function (SRE_MATCH_ON_JUMP_BRANCH) when */ + /* stack is popped */ + #if TRIPLE_LOOP + /*** Prepare for Pseudo-Recursion ***/ + /* Return non-zero on error */ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = JUMP_BRANCH; + nextctx->pattern = p_next_op; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + /* goto entrance; */ + /* jump_branch: */ + /*** Restore after Pseudo-Recursion ***/ + self->good = SRE_MATCH_EXIT; + return SRE_MATCH_PASS; + #else /* TRIPLE_LOOP */ + return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state, + JUMP_BRANCH, + p_next_op); + #endif /* TRIPLE_LOOP */ + } + else { + /* There are no more codes sections in the Branch, and */ + /* none of the previous ones matched, so fail and pop */ + /* stack. */ + + if (self->ctx->u.rep) { + /* A mark was left in the Context's Repeat Context; */ + /* discard it */ + SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark); + } + + /* Fail and Pop COntext */ + return SRE_MATCH_RETURN_FAILURE(self); + } + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_ASSERT(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* assert subpattern */ + /* tail */ + /* points to tail */ + /* is 0 for look-ahead */ + int skip = (int)self->ctx->pattern[0]; + /* back is not used */ + /* pattern is not used */ + + TRACE(("|%p|%p|JUMP_ASSERT\n", self->ctx->pattern, self->ctx->ptr)); + + /* Check for failure */ + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else if (self->ret == 0) { + return SRE_MATCH_RETURN_FAILURE(self); + } + else { + /* Advance Pattern to Next Op Code */ + self->ctx->pattern += skip; + + /* Successful Return */ + return SRE_MATCH_PASS; + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_JUMP_ASSERT_NOT(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + /* assert subpattern */ + /* tail */ + /* points to tail */ + /* is 0 for look-ahead */ + int skip = (int)self->ctx->pattern[0]; + /* back is not used */ + /* pattern is not used */ + + TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", self->ctx->pattern, + self->ctx->ptr)); + + /* Check for Errors, and if successful, fail because this is */ + /* Not Assert */ + if (self->ret) { + if (self->ret < 0) { + return SRE_MATCH_FAIL; + } + else { + return SRE_MATCH_RETURN_FAILURE(self); + } + } + else { + /* Advance Pattern to Next Op Code */ + self->ctx->pattern += skip; + + /* Successful Return */ + return SRE_MATCH_PASS; + } +} + +Py_ssize_t (*SRE_MATCH_LOOKUP_TABLE[SRE_OP__COUNT]) + (SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) = { + SRE_MATCH_ON_FAILURE, /* SRE_OP_FAILURE */ + SRE_MATCH_ON_SUCCESS, /* SRE_OP_SUCCESS */ + SRE_MATCH_ON_ANY, /* SRE_OP_ANY */ + SRE_MATCH_ON_ANY_ALL, /* SRE_OP_ANY_ALL */ + SRE_MATCH_ON_ASSERT, /* SRE_OP_ASSERT */ + SRE_MATCH_ON_ASSERT_NOT, /* SRE_OP_ASSERT_NOT */ + SRE_MATCH_ON_AT, /* SRE_OP_AT */ + SRE_MATCH_ON_BRANCH, /* SRE_OP_BRANCH */ + SRE_MATCH_ON_DEFAULT, /* (not used) SRE_OP_CALL */ + SRE_MATCH_ON_CATEGORY, /* SRE_OP_CATEGORY */ + SRE_MATCH_ON_DEFAULT, /* (not used) SRE_OP_CHARSET */ + SRE_MATCH_ON_DEFAULT, /* (not used) SRE_OP_BIGCHARSET */ + SRE_MATCH_ON_GROUPREF, /* SRE_OP_GROUPREF */ + SRE_MATCH_ON_GROUPREF_EXISTS, /* SRE_OP_GROUPREF_EXISTS */ + SRE_MATCH_ON_GROUPREF_IGNORE, /* SRE_OP_GROUPREF_IGNORE */ + SRE_MATCH_ON_IN, /* SRE_OP_IN */ + SRE_MATCH_ON_IN_IGNORE, /* SRE_OP_IN_IGNORE */ + SRE_MATCH_ON_JUMP, /* SRE_OP_INFO (Ignore Info and Jump Pass it) */ + SRE_MATCH_ON_JUMP, /* SRE_OP_JUMP */ + SRE_MATCH_ON_LITERAL, /* SRE_OP_LITERAL */ + SRE_MATCH_ON_LITERAL_IGNORE, /* SRE_OP_LITERAL_IGNORE */ + SRE_MATCH_ON_MARK, /* SRE_OP_MARK */ + SRE_MATCH_ON_MAX_UNTIL, /* SRE_OP_MAX_UNTIL */ + SRE_MATCH_ON_MIN_UNTIL, /* SRE_OP_MIN_UNTIL */ + SRE_MATCH_ON_NOT_LITERAL, /* SRE_OP_NOT_LITERAL */ + SRE_MATCH_ON_NOT_LITERAL_IGNORE, /* SRE_OP_NOT_LITERAL_IGNORE */ + SRE_MATCH_ON_DEFAULT, /* (not used) SRE_OP_NEGATE */ + SRE_MATCH_ON_DEFAULT, /* (not used) SRE_OP_RANGE */ + SRE_MATCH_ON_REPEAT, /* SRE_OP_REPEAT */ + SRE_MATCH_ON_REPEAT_ONE, /* SRE_OP_REPEAT_ONE */ + SRE_MATCH_ON_DEFAULT, /* (not used) SRE_OP_SUBPATTERN */ + SRE_MATCH_ON_MIN_REPEAT_ONE, /* SRE_OP_MIN_REPEAT_ONE */ + 0, /* SRE_OP_ATOMIC_START */ + 0, /* SRE_OP_ATOMIC_END */ + 0 /* SRE_OP_POSSESSIVE */ +}; + +Py_ssize_t (*SRE_MATCH_UNWIND_LOOKUP_TABLE[JUMP__COUNT]) + (SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) = { + SRE_MATCH_ON_JUMP_NONE, /* JUMP_NONE */ + SRE_MATCH_ON_JUMP_MAX_UNTIL_1, /* JUMP_MAX_UNTIL_1 */ + SRE_MATCH_ON_JUMP_MAX_UNTIL_2, /* JUMP_MAX_UNTIL_2 */ + SRE_MATCH_ON_JUMP_MAX_UNTIL_3, /* JUMP_MAX_UNTIL_3 */ + SRE_MATCH_ON_JUMP_MIN_UNTIL_1, /* JUMP_MIN_UNTIL_1 */ + SRE_MATCH_ON_JUMP_MIN_UNTIL_2, /* JUMP_MIN_UNTIL_2 */ + SRE_MATCH_ON_JUMP_MIN_UNTIL_3, /* JUMP_MIN_UNTIL_3 */ + SRE_MATCH_ON_JUMP_REPEAT, /* JUMP_REPEAT */ + SRE_MATCH_ON_JUMP_REPEAT_ONE_1, /* JUMP_REPEAT_ONE_1 */ + SRE_MATCH_ON_JUMP_REPEAT_ONE_2, /* JUMP_REPEAT_ONE_2 */ + SRE_MATCH_ON_JUMP_MIN_REPEAT_ONE, /* JUMP_MIN_REPEAT_ONE */ + SRE_MATCH_ON_JUMP_BRANCH, /* JUMP_BRANCH */ + SRE_MATCH_ON_JUMP_ASSERT, /* JUMP_ASSERT */ + SRE_MATCH_ON_JUMP_ASSERT_NOT /* JUMP_ASSERT_NOT */ +}; + +/* check if string matches the given pattern. returns <0 for + error, 0 for failure, and 1 for success */ +#if TRIPLE_LOOP +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) +{ + unsigned int sigcount = 0; /* Iteration Counter; for signalling */ + + SRE_MATCH_GLOBAL_CONTEXT self = { + /* Member Initialization */ + 0, /* end (init below) */ + 0, /* alloc_pos */ + -1, /* ctx_pos */ + 0, /* ret (default to success) */ + JUMP_NONE, /* jump */ + 0, /* ctx (pointer) */ + SRE_MATCH_EXIT, /* good */ + SRE_MATCH_GOOD, /* restart */ + (unsigned int)-1 /* op_code */ + }; + self.end = (SRE_CHAR *)state->end; + + TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); + + /* Returns non-zero on error */ + if (SRE_MATCH_DATA_ALLOC(&self, state, &self.ctx)) { + return self.ret; + } + self.ctx->last_ctx_pos = -1; + self.ctx->jump = JUMP_NONE; + self.ctx->pattern = pattern; + self.ctx_pos = self.alloc_pos; + + + do { + while (self.restart) { + if (self.good != SRE_MATCH_GOOD) { + /* Entrance: */ + self.good = SRE_MATCH_GOOD; + + self.ctx->ptr = (SRE_CHAR *)state->ptr; + + if (self.ctx->pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> ... */ + /* Info gives the minimum charaters needt to */ + /* match in *OP + 3; So if the input string is */ + /* less than that, fail. */ + int skip = (int)self.ctx->pattern[1]; + /* Flags not used */ + int min = (int)self.ctx->pattern[3]; + + if (min && (self.end - self.ctx->ptr) < min) { + TRACE(("reject (got %d chars, need %d)\n", + (self.end - self.ctx->ptr), min)); + + /* Returns non-zero on error */ + if (SRE_MATCH_RETURN_FAILURE(&self)) { + return self.ret; + } + } + + self.ctx->pattern += skip + 1; + } + } + + while (self.good) { + /* Increment the iteration counter */ + ++sigcount; + + /* Every 1024 iterations, accept an interrupt from */ + /* the user */ + if (0 == (sigcount & 0xfff) && PyErr_CheckSignals()) { + return SRE_ERROR_INTERRUPTED; + } + else { + self.op_code = (unsigned int)*self.ctx->pattern++; + /* TODO: If the op_code is set in the Exit */ + /* region, it would be lost here. */ + + if (self.op_code < SRE_OP__COUNT) { + /* Call the Op Code specific Handler */ + if (SRE_MATCH_LOOKUP_TABLE[self. + op_code](&self, + state)) { + return self.ret; + } + } + else { + TRACE(("|%p|%p|UNKNOWN %d\n", self.ctx->pattern, + self.ctx->ptr, self.op_code)); + + return SRE_ERROR_ILLEGAL; + } + } + } + } + + /* Exit: */ + + self.ctx_pos = self.ctx->last_ctx_pos; + self.jump = self.ctx->jump; + SRE_MATCH_DATA_POP_DISCARD(state); + + if (self.ctx_pos == -1) { + return self.ret; + } + else { + self.good = SRE_MATCH_GOOD; + self.restart = SRE_MATCH_GOOD; + } + + SRE_MATCH_DATA_LOOKUP_AT(state, &self.ctx, self.ctx_pos); + + if (self.jump < JUMP__COUNT) { + /* Call the Op Code specific Handler */ + /* Resturns non-zero on error */ + if (SRE_MATCH_UNWIND_LOOKUP_TABLE[self.jump](&self, + state)) { + return self.ret; + } + } + else { + TRACE(("|%p|%p|UNKNOWN JUMP %d\n", self->ctx->pattern, + self.ctx->ptr, self.jump)); + + return SRE_ERROR_ILLEGAL; + } + } while (self.ret >= 0 && self.ctx_pos != -1); + + return self.ret; +} + +#else /* TRIPLE_LOOP */ +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_PUSH_CTX_AND_POSITION(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state, Py_ssize_t jump, + SRE_CODE *pattern) +{ + SRE_MATCH_CONTEXT *nextctx; /* Next Context */ + + /* Returns non-zero on error */ + if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) { + return SRE_MATCH_FAIL; + } + nextctx->last_ctx_pos = self->ctx_pos; + nextctx->jump = jump; + nextctx->pattern = pattern; + self->ctx_pos = self->alloc_pos; + self->ctx = nextctx; + + self->ctx->ptr = (SRE_CHAR *)state->ptr; + + if (self->ctx->pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> ... tail */ + /* Info gives the minimum charaters needed to match in */ + /* *OP + 3; So if the input string is less than that, fail. */ + /* points to tail */ + int skip = (int)self->ctx->pattern[1]; + /* Flags not used */ + int min = (int)self->ctx->pattern[3]; + + if (min && (self->end - self->ctx->ptr) < min) { + TRACE(("reject (got %d chars, need %d)\n", + (self->end - self->ctx->ptr), min)); + + return SRE_MATCH_RETURN_FAILURE(self); + } + + self->ctx->pattern += skip + 1; + } + + return SRE_MATCH_PASS; +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH_ON_POP_CTX_AND_POSITION(SRE_MATCH_GLOBAL_CONTEXT *self, + SRE_STATE *state) +{ + self->ctx_pos = self->ctx->last_ctx_pos; + self->jump = self->ctx->jump; + SRE_MATCH_DATA_POP_DISCARD(state); + + if (self->ctx_pos == -1) { + /* End of Pattern; abort now and return */ + /* Return Pass and handle outside */ + return SRE_MATCH_PASS; + } + + SRE_MATCH_DATA_LOOKUP_AT(state, &self->ctx, self->ctx_pos); + + if (self->jump < JUMP__COUNT) { + /* Call the Op Code specific Handler */ + /* Resturns non-zero on error */ + return SRE_MATCH_UNWIND_LOOKUP_TABLE[self->jump](self, state); + } + else { + TRACE(("|%p|%p|UNKNOWN JUMP %d\n", self->ctx->pattern, + self->ctx->ptr, self->jump)); + + return SRE_MATCH_RETURN_ERROR(self, SRE_ERROR_ILLEGAL); + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) +{ + unsigned int sigcount = 0; /* Iteration Counter; for signalling */ + + SRE_MATCH_GLOBAL_CONTEXT self = { + /* Member Initialization */ + 0, /* end (init below) */ + 0, /* alloc_pos */ + -1, /* ctx_pos */ + 0, /* ret (default to success) */ + JUMP_NONE, /* jump */ + 0, /* ctx (pointer) */ + (unsigned int)-1 /* op_code */ + }; + self.end = (SRE_CHAR *)state->end; + + TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); + + /* Returns non-zero on error */ + if (SRE_MATCH_ON_PUSH_CTX_AND_POSITION(&self, state, JUMP_NONE, + pattern)) { + return self.ret; + } + + for (;;) { + /* Increment the iteration counter */ + ++sigcount; + + /* Every 1024 iterations, accept an interrupt from the user */ + if (0 == (sigcount & 0xfff) && PyErr_CheckSignals()) { + return SRE_ERROR_INTERRUPTED; + } + else { + self.op_code = (unsigned int)*self.ctx->pattern++; + /* TODO: If the op_code is set in the Exit region, it */ + /* would be lost here. */ + + if (self.op_code < SRE_OP__COUNT) { + /* Call the Op Code specific Handler */ + /* Returns non-zero on error */ + if (SRE_MATCH_LOOKUP_TABLE[self.op_code](&self, + state)) { + /* Unwind Stack */ + /* Pop retirns non-zero on error */ + while (self.ret >= 0 && + SRE_MATCH_ON_POP_CTX_AND_POSITION(&self, + state)); + + if (self.ret < 0 || self.ctx_pos == -1) { + return self.ret; + } + } + } + else { + TRACE(("|%p|%p|UNKNOWN %d\n", self.ctx->pattern, + self.ctx->ptr, self.op_code)); + + return SRE_ERROR_ILLEGAL; + } + } + } + + /* Should never get here */ +#if SHOULD_NEVER_GET_HERE + SRE_MATCH_ON_POP_CTX_AND_POSITION(&self, state) + /* TODO: Do not ignore return? */ + +#endif /* SHOULD_NEVER_GET_HERE */ + return self.ret; +} + +#endif /* TRIPLE_LOOP */ +#endif /* !REMOVE_SRE_MATCH_MACROS */ LOCAL(Py_ssize_t) SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) {