diff -r 6866384d9ccb Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Jan 26 19:00:20 2013 +0100 +++ b/Objects/unicodeobject.c Sat Jan 26 23:50:42 2013 +0200 @@ -5998,6 +5998,11 @@ while (s < end) { Py_UNICODE uch; Py_UCS4 ch; + if (end - s < Py_UNICODE_SIZE) { + endinpos = end-starts; + reason = "truncated input"; + goto error; + } /* We copy the raw representation one byte at a time because the pointer may be unaligned (see test_codeccallbacks). */ ((char *) &uch)[0] = s[0]; @@ -6007,37 +6012,18 @@ ((char *) &uch)[3] = s[3]; #endif ch = uch; - +#ifdef Py_UNICODE_WIDE /* We have to sanity check the raw data, otherwise doom looms for some malformed UCS-4 data. */ - if ( -#ifdef Py_UNICODE_WIDE - ch > 0x10ffff || -#endif - end-s < Py_UNICODE_SIZE - ) - { - startinpos = s - starts; - if (end-s < Py_UNICODE_SIZE) { - endinpos = end-starts; - reason = "truncated input"; - } - else { - endinpos = s - starts + Py_UNICODE_SIZE; - reason = "illegal code point (> 0x10FFFF)"; - } - if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, - "unicode_internal", reason, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &writer)) - goto onError; - continue; - } - + if (ch > 0x10ffff) { + endinpos = s - starts + Py_UNICODE_SIZE; + reason = "illegal code point (> 0x10FFFF)"; + goto error; + } +#endif s += Py_UNICODE_SIZE; #ifndef Py_UNICODE_WIDE - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end) + if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE) { Py_UNICODE uch2; ((char *) &uch2)[0] = s[0]; @@ -6054,6 +6040,16 @@ goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); writer.pos++; + continue; + + error: + startinpos = s - starts; + if (unicode_decode_call_errorhandler_writer( + errors, &errorHandler, + "unicode_internal", reason, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &writer)) + goto onError; } Py_XDECREF(errorHandler);