diff -r 64f4dbac9d07 Misc/NEWS --- a/Misc/NEWS Sun Mar 22 12:34:50 2015 -0400 +++ b/Misc/NEWS Mon Mar 23 00:09:58 2015 +0200 @@ -23,6 +23,8 @@ Core and Builtins Library ------- +- Issue #18684: Fixed pointing out of the boundary errors in the re module. + - Issue #22351: The nntplib.NNTP constructor no longer leaves the connection and socket open until the garbage collector cleans them up. Patch by Martin Panter. diff -r 64f4dbac9d07 Modules/_sre.c --- a/Modules/_sre.c Sun Mar 22 12:34:50 2015 -0400 +++ b/Modules/_sre.c Mon Mar 23 00:09:58 2015 +0200 @@ -907,7 +907,7 @@ pattern_split(PatternObject* self, PyObj } if (state.start == state.ptr) { - if (last == state.end) + if (last == state.end || state.ptr == state.end) break; /* skip one character */ state.start = (void*) ((char*) state.ptr + state.charsize); @@ -1105,6 +1105,8 @@ pattern_subx(PatternObject* self, PyObje next: /* move on */ + if (state.ptr == state.end) + break; if (state.ptr == state.start) state.start = (void*) ((char*) state.ptr + state.charsize); else @@ -2591,6 +2593,9 @@ scanner_match(ScannerObject* self, PyObj PyObject* match; Py_ssize_t status; + if (state->start == NULL) + Py_RETURN_NONE; + state_reset(state); state->ptr = state->start; @@ -2602,10 +2607,14 @@ scanner_match(ScannerObject* self, PyObj match = pattern_new_match((PatternObject*) self->pattern, state, status); - if (status == 0 || state->ptr == state->start) + if (status == 0) + state->start = NULL; + else if (state->ptr != state->start) + state->start = state->ptr; + else if (state->ptr != state->end) state->start = (void*) ((char*) state->ptr + state->charsize); else - state->start = state->ptr; + state->start = NULL; return match; } @@ -2618,6 +2627,9 @@ scanner_search(ScannerObject* self, PyOb PyObject* match; Py_ssize_t status; + if (state->start == NULL) + Py_RETURN_NONE; + state_reset(state); state->ptr = state->start; @@ -2629,10 +2641,14 @@ scanner_search(ScannerObject* self, PyOb match = pattern_new_match((PatternObject*) self->pattern, state, status); - if (status == 0 || state->ptr == state->start) - state->start = (void*) ((char*) state->ptr + state->charsize); + if (status == 0) + state->start = NULL; + else if (state->ptr != state->start) + state->start = state->ptr; + else if (state->ptr != state->end) + state->start = (void*) ((char*) state->ptr + 1); else - state->start = state->ptr; + state->start = NULL; return match; } diff -r 64f4dbac9d07 Modules/sre_lib.h --- a/Modules/sre_lib.h Sun Mar 22 12:34:50 2015 -0400 +++ b/Modules/sre_lib.h Mon Mar 23 00:09:58 2015 +0200 @@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_IS_LINEBREAK((int) ptr[-1])); case SRE_AT_END: - return (((void*) (ptr+1) == state->end && + return (((SRE_CHAR *)state->end - ptr == 1 && SRE_IS_LINEBREAK((int) ptr[0])) || ((void*) ptr == state->end)); @@ -1109,9 +1109,9 @@ entrance: /* */ TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, ctx->ptr, ctx->pattern[1])); + if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) + RETURN_FAILURE; state->ptr = ctx->ptr - ctx->pattern[1]; - if (state->ptr < state->beginning) - RETURN_FAILURE; DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); RETURN_ON_FAILURE(ret); ctx->pattern += ctx->pattern[0]; @@ -1122,8 +1122,8 @@ entrance: /* */ TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, ctx->ptr, ctx->pattern[1])); - state->ptr = ctx->ptr - ctx->pattern[1]; - if (state->ptr >= state->beginning) { + if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { + state->ptr = ctx->ptr - ctx->pattern[1]; DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); if (ret) { RETURN_ON_ERROR(ret); @@ -1215,12 +1215,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* SRE_CODE* overlap = NULL; int flags = 0; + if (ptr > end) + return 0; + if (pattern[0] == SRE_OP_INFO) { /* optimization info block */ /* <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ flags = pattern[2]; + if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { + TRACE(("reject (got %u chars, need %u)\n", + (unsigned int)(end - ptr), pattern[3])); + return 0; + } if (pattern[3] > 1) { /* adjust end point (but make sure we leave at least one character in there, so literal search will work) */ @@ -1338,15 +1346,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* break; ptr++; } - } else + } else { /* general case */ - while (ptr <= end) { + assert(ptr <= end); + while (1) { TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); - state->start = state->ptr = ptr++; + state->start = state->ptr = ptr; status = SRE(match)(state, pattern, 0); - if (status != 0) + if (status != 0 || ptr >= end) break; + ptr++; } + } return status; }