Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 85136) +++ Objects/unicodeobject.c (copie de travail) @@ -1153,19 +1153,73 @@ return ret; } -static void +/* Convert a Unicode object to a wide character string. Return the number of + wide characters including the nul character. w can be NULL. */ +static Py_ssize_t unicode_aswidechar(PyUnicodeObject *unicode, wchar_t *w, Py_ssize_t size) { #if Py_UNICODE_SIZE == SIZEOF_WCHAR_T memcpy(w, unicode->str, size * sizeof(wchar_t)); -#else + return size; +#elif Py_UNICODE_SIZE == 2 && SIZEOF_WCHAR_T == 4 + register const Py_UNICODE *u; + const Py_UNICODE *uend; + const wchar_t *worig, *wend; + Py_ssize_t nchar; + + u = PyUnicode_AS_UNICODE(unicode); + uend = u + PyUnicode_GET_SIZE(unicode); + if (w != NULL) { + worig = w; + wend = w + size; + while (u != uend && w != wend) { + if (0xD800 <= u[0] && u[0] <= 0xDBFF + && 0xDC00 <= u[1] && u[1] <= 0xDFFF) + { + *w = (((u[0] & 0x3FF) << 10) | (u[1] & 0x3FF)) + 0x10000; + u += 2; + } else { + *w = *u; + u++; + } + w++; + } + if (w != wend) { + *w = L'\0'; + w++; + } + return w - worig; + } else { + nchar = 1; /* nul character at the end */ + while (u != uend) { + if (0xD800 <= u[0] && u[0] <= 0xDBFF + && 0xDC00 <= u[1] && u[1] <= 0xDFFF) + u += 2; + else + u++; + nchar++; + } + } + return nchar; +#elif Py_UNICODE_SIZE == 4 && SIZEOF_WCHAR_T == 2 register Py_UNICODE *u; - register Py_ssize_t i; + const wchar_t *worig, *wend; + + if (w == NULL) + return size; + u = PyUnicode_AS_UNICODE(unicode); - for (i = size; i > 0; i--) + worig = w; + wend = w + size; + while (w != wend) { + /* FIXME: create surrogate pairs if needed */ *w++ = *u++; + } + return w - worig; +#else +# error "unsupported wchar_t and Py_UNICODE sizes" #endif } @@ -1178,17 +1232,7 @@ PyErr_BadInternalCall(); return -1; } - - /* If possible, try to copy the 0-termination as well */ - if (size > PyUnicode_GET_SIZE(unicode)) - size = PyUnicode_GET_SIZE(unicode) + 1; - - unicode_aswidechar(unicode, w, size); - - if (size > PyUnicode_GET_SIZE(unicode)) - return PyUnicode_GET_SIZE(unicode); - else - return size; + return unicode_aswidechar(unicode, w, size); } wchar_t* @@ -1203,19 +1247,19 @@ return NULL; } - if ((PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) < PyUnicode_GET_SIZE(unicode)) { + buflen = unicode_aswidechar(unicode, NULL, 0); + if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) { PyErr_NoMemory(); return NULL; } - buflen = PyUnicode_GET_SIZE(unicode) + 1; /* copy L'\0' */ buffer = PyMem_MALLOC(buflen * sizeof(wchar_t)); if (buffer == NULL) { PyErr_NoMemory(); return NULL; } - unicode_aswidechar(unicode, buffer, buflen); - if (size) + buflen = unicode_aswidechar(unicode, buffer, buflen); + if (size != NULL) *size = buflen; return buffer; }