| --- a/Objects/unicodeobject.c Thu Mar 22 22:40:44 2012 -0400 |
| +++ b/Objects/unicodeobject.c Tue Mar 27 01:49:43 2012 +0300 |
| @@ -6874,7 +6874,6 @@ |
| Py_ssize_t endinpos; |
| Py_ssize_t outpos; |
| const char *e; |
| - int has_error; |
| const unsigned char *p = (const unsigned char *)s; |
| const unsigned char *end = p + size; |
| const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK); |
| @@ -6890,8 +6889,38 @@ |
| if (size == 1 && (unsigned char)s[0] < 128) |
| return get_latin1_char((unsigned char)s[0]); |
| - has_error = 0; |
| - while (p < end && !has_error) { |
| + v = PyUnicode_New(size, 127); |
| + if (v == NULL) |
| + goto onError; |
| + |
| +#if SIZEOF_LONG <= SIZEOF_VOID_P |
| + if (!((size_t) p & LONG_PTR_MASK)) { |
| + /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for |
| + an explanation. */ |
| + /* Help register allocation */ |
| + register const unsigned char *_p = p; |
| + register unsigned char * q = PyUnicode_1BYTE_DATA(v); |
| + while (_p < aligned_end) { |
| + unsigned long value = *(const unsigned long *) _p; |
| + if (value & ASCII_CHAR_MASK) |
| + goto hasError; |
| + *((unsigned long *)q) = value; |
| + _p += SIZEOF_LONG; |
| + q += SIZEOF_LONG; |
| + } |
| + p = _p; |
| + while (p < end) { |
| + if (*p & 0x80) |
| + goto hasError; |
| + *q = *p; |
| + ++p; |
| + ++q; |
| + } |
| + return v; |
| + } |
| +#endif |
| + |
| + while (p < end) { |
| /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for |
| an explanation. */ |
| if (!((size_t) p & LONG_PTR_MASK)) { |
| @@ -6899,36 +6928,24 @@ |
| register const unsigned char *_p = p; |
| while (_p < aligned_end) { |
| unsigned long value = *(unsigned long *) _p; |
| - if (value & ASCII_CHAR_MASK) { |
| - has_error = 1; |
| - break; |
| - } |
| + if (value & ASCII_CHAR_MASK) |
| + goto hasError; |
| _p += SIZEOF_LONG; |
| } |
| if (_p == end) |
| break; |
| - if (has_error) |
| - break; |
| p = _p; |
| } |
| - if (*p & 0x80) { |
| - has_error = 1; |
| - break; |
| - } |
| - else { |
| - ++p; |
| - } |
| - } |
| - if (!has_error) |
| - return unicode_fromascii((const unsigned char *)s, size); |
| - |
| - v = PyUnicode_New(size, 127); |
| - if (v == NULL) |
| - goto onError; |
| - if (size == 0) |
| - return v; |
| - kind = PyUnicode_KIND(v); |
| - data = PyUnicode_DATA(v); |
| + if (*p & 0x80) |
| + goto hasError; |
| + ++p; |
| + } |
| + memcpy(PyUnicode_1BYTE_DATA(v), s, size); |
| + return v; |
| + |
| +hasError: |
| + kind = PyUnicode_1BYTE_KIND; |
| + data = PyUnicode_1BYTE_DATA(v); |
| outpos = 0; |
| e = s + size; |
| while (s < e) { |