Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 68360) +++ Objects/unicodeobject.c (copie de travail) @@ -2006,11 +2006,11 @@ const char *errors, Py_ssize_t *consumed) { - const char *starts = s; int n; Py_ssize_t startinpos; Py_ssize_t endinpos; Py_ssize_t outpos; + Py_ssize_t i; const char *e; PyUnicodeObject *unicode; Py_UNICODE *p; @@ -2033,23 +2033,23 @@ p = unicode->str; e = s + size; - while (s < e) { - Py_UCS4 ch = (unsigned char)*s; + for (i = 0; i < size;) { + Py_UCS4 ch = (unsigned char) s[i]; if (ch < 0x80) { + i++; *p++ = (Py_UNICODE)ch; - s++; continue; } n = utf8_code_length[ch]; - if (s + n > e) { + if (i + n > size) { if (consumed) break; else { errmsg = "unexpected end of data"; - startinpos = s-starts; + startinpos = i; endinpos = size; goto utf8Error; } @@ -2059,43 +2059,43 @@ case 0: errmsg = "unexpected code byte"; - startinpos = s-starts; - endinpos = startinpos+1; + startinpos = i; + endinpos = i + 1; goto utf8Error; case 1: errmsg = "internal error"; - startinpos = s-starts; - endinpos = startinpos+1; + startinpos = i; + endinpos = i + 1; goto utf8Error; case 2: - if ((s[1] & 0xc0) != 0x80) { + if ((s[i+1] & 0xc0) != 0x80) { errmsg = "invalid data"; - startinpos = s-starts; - endinpos = startinpos+2; + startinpos = i; + endinpos = i + 2; goto utf8Error; } - ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f); + ch = ((s[i] & 0x1f) << 6) + (s[i+1] & 0x3f); if (ch < 0x80) { - startinpos = s-starts; - endinpos = startinpos+2; + startinpos = i; + endinpos = i + 2; errmsg = "illegal encoding"; goto utf8Error; } - else - *p++ = (Py_UNICODE)ch; + i += 2; + *p++ = (Py_UNICODE)ch; break; case 3: - if ((s[1] & 0xc0) != 0x80 || - (s[2] & 0xc0) != 0x80) { + if ((s[i+1] & 0xc0) != 0x80 || + (s[i+2] & 0xc0) != 0x80) { errmsg = "invalid data"; - startinpos = s-starts; - endinpos = startinpos+3; + startinpos = i; + endinpos = i + 3; goto utf8Error; } - ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); + ch = ((s[i] & 0x0f) << 12) + ((s[i+1] & 0x3f) << 6) + (s[i+2] & 0x3f); if (ch < 0x0800) { /* Note: UTF-8 encodings of surrogates are considered legal UTF-8 sequences; @@ -2105,25 +2105,25 @@ unit. */ errmsg = "illegal encoding"; - startinpos = s-starts; - endinpos = startinpos+3; + startinpos = i; + endinpos = i + 3; goto utf8Error; } - else - *p++ = (Py_UNICODE)ch; + i += 3; + *p++ = (Py_UNICODE)ch; break; case 4: - if ((s[1] & 0xc0) != 0x80 || - (s[2] & 0xc0) != 0x80 || - (s[3] & 0xc0) != 0x80) { + if ((s[i+1] & 0xc0) != 0x80 || + (s[i+2] & 0xc0) != 0x80 || + (s[i+3] & 0xc0) != 0x80) { errmsg = "invalid data"; - startinpos = s-starts; - endinpos = startinpos+4; + startinpos = i; + endinpos = i + 4; goto utf8Error; } - ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) + - ((s[2] & 0x3f) << 6) + (s[3] & 0x3f); + ch = ((s[i] & 0x7) << 18) + ((s[i+1] & 0x3f) << 12) + + ((s[i+2] & 0x3f) << 6) + (s[i+3] & 0x3f); /* validate and convert to UTF-16 */ if ((ch < 0x10000) /* minimum value allowed for 4 byte encoding */ @@ -2131,10 +2131,11 @@ UTF-16 */ { errmsg = "illegal encoding"; - startinpos = s-starts; - endinpos = startinpos+4; + startinpos = i; + endinpos = i + 4; goto utf8Error; } + i += 4; #ifdef Py_UNICODE_WIDE *p++ = (Py_UNICODE)ch; #else @@ -2154,24 +2155,27 @@ default: /* Other sizes are only needed for UCS-4 */ errmsg = "unsupported Unicode code range"; - startinpos = s-starts; - endinpos = startinpos+n; + startinpos = i; + endinpos = i + n; goto utf8Error; } - s += n; continue; utf8Error: - outpos = p-PyUnicode_AS_UNICODE(unicode); - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "utf8", errmsg, - &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos, &p)) - goto onError; + { + const char *nexts = &s[i]; + outpos = p-PyUnicode_AS_UNICODE(unicode); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "utf8", errmsg, + &s, &e, &startinpos, &endinpos, &exc, &nexts, + &unicode, &outpos, &p)) + goto onError; + i = nexts - s; + } } if (consumed) - *consumed = s-starts; + *consumed = i; /* Adjust length */ if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)