Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86735) +++ Include/unicodeobject.h (working copy) @@ -355,6 +355,12 @@ for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ } while (0) +#ifdef Py_UNICODE_WIDE +# define Py_UNICODE_NEXT(ptr, end) *ptr++ +#else +PyAPI_FUNC(Py_UCS4) _Py_unicode_next(const Py_UNICODE **pptr, const Py_UNICODE *end); +# define Py_UNICODE_NEXT(ptr, end) _Py_unicode_next(&ptr, end) +#endif /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */ @@ -737,7 +743,7 @@ const char *errors /* error handling */ ); -/* Encodes a Unicode object and returns the result as Python string +/* Encodes a Unicode object and returns the result as Python bytes object. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86735) +++ Objects/unicodeobject.c (working copy) @@ -7785,7 +7785,7 @@ static PyObject* unicode_isalpha(PyUnicodeObject *self) { - register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self); + const Py_UNICODE *p = PyUnicode_AS_UNICODE(self); register const Py_UNICODE *e; /* Shortcut for single character strings */ @@ -7798,8 +7798,8 @@ return PyBool_FromLong(0); e = p + PyUnicode_GET_SIZE(self); - for (; p < e; p++) { - if (!Py_UNICODE_ISALPHA(*p)) + while (p < e) { + if (!Py_UNICODE_ISALPHA(Py_UNICODE_NEXT(p, e))) return PyBool_FromLong(0); } return PyBool_FromLong(1); @@ -10364,7 +10364,24 @@ return PyModule_Create(&_string_module); } +#ifndef Py_UNICODE_WIDE +Py_UCS4 +_Py_unicode_next(const Py_UNICODE **pptr, const Py_UNICODE *end) +{ + Py_UCS4 ch; + ch = *(*pptr)++; + if (0xD800 <= ch && ch <= 0xDBFF && *pptr < end) { + Py_UCS4 ch2 = **pptr; + if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { + ch = (((ch - 0xD800)<<10) | (ch2 - 0xDC00)) + 0x10000; + (*pptr)++; + } + } + return ch; +} +#endif + #ifdef __cplusplus } #endif