Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86824) +++ Include/unicodeobject.h (working copy) @@ -355,6 +355,33 @@ for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ } while (0) +#define Py_UNICODE_ISSURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF) +#define Py_UNICODE_ISHIGHSURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF) +#define Py_UNICODE_ISLOWSURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF) +#define Py_UNICODE_JOIN_SURROGATES(high, low) \ + ((Py_UCS4)(((((Py_UCS4)high - 0xD800) << 10) | \ + ((Py_UCS4)low - 0xDC00)) + 0x10000)) +#ifdef Py_UNICODE_WIDE +#define Py_UNICODE_NEXT(ptr, end) *ptr++ +#define Py_UNICODE_PUT_NEXT(ptr, ch) *ptr++ = ch +#else +#define Py_UNICODE_NEXT(ptr, end) \ + ((Py_UNICODE_ISHIGHSURROGATE(*ptr) && ptr < end) ? \ + (Py_UNICODE_ISLOWSURROGATE(ptr[1]) ? \ + (ptr += 2,Py_UNICODE_JOIN_SURROGATES(ptr[-2], ptr[-1])) : \ + (Py_UCS4)*ptr++) : \ + (Py_UCS4)*ptr++) +#define Py_UNICODE_PUT_NEXT(ptr, ch) \ + do { \ + if (ch > 0xFFFF) { \ + Py_UCS4 code = ch - 0x10000; \ + *ptr++ = 0xD800 | (code >> 10); \ + *ptr++ = 0xDC00 | (code & 0x3FF); \ + } \ + else \ + *ptr++ = ch; \ + } while (0) +#endif /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */ @@ -737,7 +764,7 @@ const char *errors /* error handling */ ); -/* Encodes a Unicode object and returns the result as Python string +/* Encodes a Unicode object and returns the result as Python bytes object. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86824) +++ Objects/unicodeobject.c (working copy) @@ -8892,7 +8892,35 @@ static PyObject* unicode_upper(PyUnicodeObject *self) { - return fixup(self, fixupper); + PyUnicodeObject *result; + Py_ssize_t size = PyUnicode_GET_SIZE(self); + Py_UNICODE *rp, *wp, *end; + int changed = 0; + + result = _PyUnicode_New(size); + if (result == NULL) + return NULL; + + rp = PyUnicode_AS_UNICODE(self); + wp = PyUnicode_AS_UNICODE(result); + end = rp + size; + while (rp < end) { + Py_UCS4 ch, uc; + ch = Py_UNICODE_NEXT(rp, end); + uc = Py_UNICODE_TOUPPER(ch); + if (uc != ch) + changed = 1; + Py_UNICODE_PUT_NEXT(wp, uc); + } + if (!changed && PyUnicode_CheckExact(self)) { + /* If no changes are made, return a reference to the original buffer + instead (to save space, not time) */ + Py_INCREF(self); + Py_DECREF(result); + return (PyObject *)self; + } + + return (PyObject *)result; } PyDoc_STRVAR(zfill__doc__,