diff -r 9ceac471bd8c Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Mar 22 22:40:44 2012 -0400 +++ b/Objects/unicodeobject.c Mon Mar 26 22:19:09 2012 +0300 @@ -6874,10 +6874,10 @@ Py_ssize_t endinpos; Py_ssize_t outpos; const char *e; - int has_error; const unsigned char *p = (const unsigned char *)s; const unsigned char *end = p + size; const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK); + unsigned char *q; PyObject *errorHandler = NULL; PyObject *exc = NULL; @@ -6890,8 +6890,42 @@ if (size == 1 && (unsigned char)s[0] < 128) return get_latin1_char((unsigned char)s[0]); - has_error = 0; - while (p < end && !has_error) { + v = PyUnicode_New(size, 127); + if (v == NULL) + goto onError; + + q = PyUnicode_1BYTE_DATA(v); + if (((size_t) p & LONG_PTR_MASK) == ((size_t) q & LONG_PTR_MASK)) { + while (p < end) { + /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for + an explanation. */ + if (!((size_t) p & LONG_PTR_MASK)) { + /* Help register allocation */ + register const unsigned char *_p = p; + register unsigned char *_q = q; + while (_p < aligned_end) { + unsigned long value = *(const unsigned long *) _p; + if (value & ASCII_CHAR_MASK) + goto hasError; + *((unsigned long *)_q) = value; + _p += SIZEOF_LONG; + _q += SIZEOF_LONG; + } + if (_p == end) + break; + p = _p; + q = _q; + } + if (*p & 0x80) + goto hasError; + *q = *p; + ++p; + ++q; + } + return v; + } + + while (p < end) { /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for an explanation. */ if (!((size_t) p & LONG_PTR_MASK)) { @@ -6899,36 +6933,24 @@ register const unsigned char *_p = p; while (_p < aligned_end) { unsigned long value = *(unsigned long *) _p; - if (value & ASCII_CHAR_MASK) { - has_error = 1; - break; - } + if (value & ASCII_CHAR_MASK) + goto hasError; _p += SIZEOF_LONG; } if (_p == end) break; - if (has_error) - break; p = _p; } - if (*p & 0x80) { - has_error = 1; - break; - } - else { - ++p; - } - } - if (!has_error) - return unicode_fromascii((const unsigned char *)s, size); - - v = PyUnicode_New(size, 127); - if (v == NULL) - goto onError; - if (size == 0) - return v; - kind = PyUnicode_KIND(v); - data = PyUnicode_DATA(v); + if (*p & 0x80) + goto hasError; + ++p; + } + memcpy(PyUnicode_1BYTE_DATA(v), s, size); + return v; + +hasError: + kind = PyUnicode_1BYTE_KIND; + data = PyUnicode_1BYTE_DATA(v); outpos = 0; e = s + size; while (s < e) {