Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 68145) +++ Objects/unicodeobject.c (working copy) @@ -2001,6 +2001,13 @@ return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); } +#define LONG_MASK (size_t) (SIZEOF_LONG - 1) +#if (SIZEOF_LONG == 8) +#define ASCII_MASK 0x8080808080808080L +#else +#define ASCII_MASK 0x80808080L +#endif + PyObject *PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t size, const char *errors, @@ -2011,7 +2018,7 @@ Py_ssize_t startinpos; Py_ssize_t endinpos; Py_ssize_t outpos; - const char *e; + const char *e, *aligned_end; PyUnicodeObject *unicode; Py_UNICODE *p; const char *errmsg = ""; @@ -2032,11 +2039,43 @@ /* Unpack UTF-8 encoded data */ p = unicode->str; e = s + size; + aligned_end = (const char *) ((size_t) e & ~LONG_MASK); while (s < e) { Py_UCS4 ch = (unsigned char)*s; if (ch < 0x80) { + /* Fast path for runs of ASCII characters. */ + if (!((size_t) s & LONG_MASK)) { + /* Help register allocation */ + register const char *_s = s; + register Py_UNICODE *_p = p; + while (_s < aligned_end) { + long word = *(long *) _s; + if (word & ASCII_MASK) + break; + _p[0] = (unsigned char) _s[0]; + _p[1] = (unsigned char) _s[1]; + _p[2] = (unsigned char) _s[2]; + _p[3] = (unsigned char) _s[3]; +#if (SIZEOF_LONG == 8) + _p[4] = (unsigned char) _s[4]; + _p[5] = (unsigned char) _s[5]; + _p[6] = (unsigned char) _s[6]; + _p[7] = (unsigned char) _s[7]; +#endif + _s += SIZEOF_LONG; + _p += SIZEOF_LONG; + } + s = _s; + p = _p; + if (s == e) + break; + ch = (unsigned char)*s; + } + } + + if (ch < 0x80) { *p++ = (Py_UNICODE)ch; s++; continue; @@ -2169,6 +2208,7 @@ &starts, &e, &startinpos, &endinpos, &exc, &s, &unicode, &outpos, &p)) goto onError; + aligned_end = (const char *) ((size_t) e & ~LONG_MASK); } if (consumed) *consumed = s-starts; @@ -2188,6 +2228,9 @@ return NULL; } +#undef LONG_MASK +#undef ASCII_MASK + /* Allocation strategy: if the string is short, convert into a stack buffer and allocate exactly as much space needed at the end. Else allocate the maximum possible needed (4 result bytes per Unicode character), and return