Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 82196) +++ Objects/unicodeobject.c (copie de travail) @@ -50,6 +50,30 @@ #include #endif +/* Py_UNICODE* to Py_UCS4 converter */ +#ifdef Py_UNICODE_WIDE +#define Py_UNICODE_JOIN_LOW_SURROGATE(ucs, pos, size) (void)0 +#else Py_UNICODE_WIDE +#define Py_UNICODE_JOIN_LOW_SURROGATE(ucs, pos, size) \ +do { \ + if (ucs >= 0xD800 && ucs < 0xDC00 && size) { \ + Py_UNICODE ch2 = *(pos); \ + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { \ + ucs = (((ucs & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; \ + (pos)++; \ + (size)--; \ + } \ + } \ +} while (0) +#endif + +#define Py_UNICODE_GET_UCS4_CHAR(ucs, pos, size) \ + do { \ + ucs = *(pos++); \ + (size)--; \ + Py_UNICODE_JOIN_LOW_SURROGATE(ucs, pos, size); \ + } while (0) + /* Limit for the Unicode object free list */ #define PyUnicode_MAXFREELIST 1024 @@ -2974,18 +2998,9 @@ iorder[3] = 0; } - while (size-- > 0) { - Py_UCS4 ch = *s++; -#ifndef Py_UNICODE_WIDE - if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { - Py_UCS4 ch2 = *s; - if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { - ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; - s++; - size--; - } - } -#endif + while (size > 0) { + Py_UCS4 ch; + Py_UNICODE_GET_UCS4_CHAR(ch, s, size); STORECHAR(ch); } @@ -7672,16 +7687,17 @@ unicode_isprintable(PyObject *self) { register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self); - register const Py_UNICODE *e; + register Py_ssize_t size = PyUnicode_GET_SIZE(self); + Py_UCS4 ucs; /* Shortcut for single character strings */ - if (PyUnicode_GET_SIZE(self) == 1 && Py_UNICODE_ISPRINTABLE(*p)) { + if (size == 1 && Py_UNICODE_ISPRINTABLE(*p)) { Py_RETURN_TRUE; } - e = p + PyUnicode_GET_SIZE(self); - for (; p < e; p++) { - if (!Py_UNICODE_ISPRINTABLE(*p)) { + while (size > 0) { + Py_UNICODE_GET_UCS4_CHAR(ucs, p, size); + if (!Py_UNICODE_ISPRINTABLE(ucs)) { Py_RETURN_FALSE; } } @@ -8092,20 +8108,9 @@ else { Py_UCS4 ucs = ch; -#ifndef Py_UNICODE_WIDE - Py_UNICODE ch2 = 0; /* Get code point from surrogate pair */ - if (size > 0) { - ch2 = *s; - if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 - && ch2 <= 0xDFFF) { - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) - + 0x00010000; - s++; - size--; - } - } -#endif + Py_UNICODE_JOIN_LOW_SURROGATE(ucs, s, size); + /* Map Unicode whitespace and control characters (categories Z* and C* except ASCII space) */ @@ -8145,7 +8150,7 @@ *p++ = ch; #ifndef Py_UNICODE_WIDE if (ucs >= 0x10000) - *p++ = ch2; + *p++ = *s; #endif } }