diff -r ba32b8c7f5be Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Mar 31 14:10:10 2012 +0300 +++ b/Objects/unicodeobject.c Fri Apr 20 21:27:35 2012 +0300 @@ -3401,7 +3401,7 @@ Py_ssize_t outpos; PyUnicodeObject *unicode; Py_UNICODE *p; - const unsigned char *q, *e, *aligned_end; + const unsigned char *q, *e, *e2, *aligned_end; int bo = 0; /* assume native ordering by default */ int native_ordering = 0; const char *errmsg = ""; @@ -3425,7 +3425,7 @@ /* Unpack UTF-16 encoded data */ p = unicode->str; q = (unsigned char *)s; - e = q + size - 1; + e = q + size; if (byteorder) bo = *byteorder; @@ -3475,8 +3475,9 @@ native_ordering = ilo > ihi; #endif + e2 = e - 1; aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); - while (q < e) { + while (q < e2) { Py_UNICODE ch; /* First check for possible aligned read of a C 'long'. Unaligned reads are more expensive, better to defer to another iteration. */ @@ -3546,7 +3547,7 @@ } p = _p; q = _q; - if (q >= e) + if (q >= e2) break; } ch = (q[ihi] << 8) | q[ilo]; @@ -3559,10 +3560,10 @@ } /* UTF-16 code pair: */ - if (q > e) { + if (q >= e2) { errmsg = "unexpected end of data"; startinpos = (((const char *)q) - 2) - starts; - endinpos = ((const char *)e) + 1 - starts; + endinpos = ((const char *)e) - starts; goto utf16Error; } if (0xD800 <= ch && ch <= 0xDBFF) { @@ -3606,28 +3607,19 @@ &outpos, &p)) goto onError; + /* Update data because unicode_decode_call_errorhandler might have + changed the input object. */ + e2 = e - 1; + aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); } /* remaining byte at the end? (size should be even) */ - if (e == q) { + if (q != e) { if (!consumed) { errmsg = "truncated data"; startinpos = ((const char *)q) - starts; - endinpos = ((const char *)e) + 1 - starts; + endinpos = ((const char *)e) - starts; outpos = p - PyUnicode_AS_UNICODE(unicode); - if (unicode_decode_call_errorhandler( - errors, - &errorHandler, - "utf16", errmsg, - &starts, - (const char **)&e, - &startinpos, - &endinpos, - &exc, - (const char **)&q, - &unicode, - &outpos, - &p)) - goto onError; + goto utf16Error; /* The remaining input chars are ignored if the callback chooses to skip the input */ }