Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (révision 85081) +++ Include/unicodeobject.h (copie de travail) @@ -99,8 +99,8 @@ #endif /* If the compiler provides a wchar_t type we try to support it - through the interface functions PyUnicode_FromWideChar() and - PyUnicode_AsWideChar(). */ + through the interface functions PyUnicode_FromWideChar(), + PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ #ifdef HAVE_USABLE_WCHAR_T # ifndef HAVE_WCHAR_H @@ -156,6 +156,7 @@ # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar +# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist # define PyUnicode_Compare PyUnicodeUCS2_Compare # define PyUnicode_CompareWithASCII PyUnicodeUCS2_CompareASCII @@ -239,6 +240,7 @@ # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar +# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist # define PyUnicode_Compare PyUnicodeUCS4_Compare # define PyUnicode_CompareWithASCII PyUnicodeUCS4_CompareWithASCII @@ -570,6 +572,18 @@ Py_ssize_t size /* size of buffer */ ); +/* Convert the Unicode object to a wide character string. The output string + always ends with a nul character. If size is not NULL, write the number of + wide characters (including the final nul character) into *size. + + Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it) on + success. On error, returns NULL and *size is undefined. */ + +PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( + PyUnicodeObject *unicode, /* Unicode object */ + Py_ssize_t *size /* number of characters of the result */ + ); + #endif /* --- Unicode ordinals --------------------------------------------------- */ Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 85081) +++ Objects/unicodeobject.c (copie de travail) @@ -1153,10 +1153,27 @@ return ret; } -Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, - wchar_t *w, - Py_ssize_t size) +static void +unicode_aswidechar(PyUnicodeObject *unicode, + wchar_t *w, + Py_ssize_t size) { +#if Py_UNICODE_SIZE == SIZEOF_WCHAR_T + memcpy(w, unicode->str, size * sizeof(wchar_t)); +#else + register Py_UNICODE *u; + register Py_ssize_t i; + u = PyUnicode_AS_UNICODE(unicode); + for (i = size; i > 0; i--) + *w++ = *u++; +#endif +} + +Py_ssize_t +PyUnicode_AsWideChar(PyUnicodeObject *unicode, + wchar_t *w, + Py_ssize_t size) +{ if (unicode == NULL) { PyErr_BadInternalCall(); return -1; @@ -1166,17 +1183,7 @@ if (size > PyUnicode_GET_SIZE(unicode)) size = PyUnicode_GET_SIZE(unicode) + 1; -#if Py_UNICODE_SIZE == SIZEOF_WCHAR_T - memcpy(w, unicode->str, size * sizeof(wchar_t)); -#else - { - register Py_UNICODE *u; - register Py_ssize_t i; - u = PyUnicode_AS_UNICODE(unicode); - for (i = size; i > 0; i--) - *w++ = *u++; - } -#endif + unicode_aswidechar(unicode, w, size); if (size > PyUnicode_GET_SIZE(unicode)) return PyUnicode_GET_SIZE(unicode); @@ -1184,6 +1191,29 @@ return size; } +wchar_t* +PyUnicode_AsWideCharString(PyUnicodeObject *unicode, + Py_ssize_t *size) +{ + wchar_t* buffer; + Py_ssize_t buflen, ret; + + if (unicode == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + if ((PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) > PyUnicode_GET_SIZE(unicode)) + return PyErr_NoMemory(); + + buflen = PyUnicode_GET_SIZE(unicode) + 1; /* copy L'\0' */ + buffer = PyMem_MALLOC(buflen * sizeof(wchar_t)); + if (buffer == NULL) + return PyErr_NoMemory(); + unicode_aswidechar(unicode, buffer, buflen); + return buflen; +} + #endif PyObject *PyUnicode_FromOrdinal(int ordinal) Index: Doc/c-api/unicode.rst =================================================================== --- Doc/c-api/unicode.rst (révision 85081) +++ Doc/c-api/unicode.rst (copie de travail) @@ -462,6 +462,16 @@ required by the application. +.. cfunction:: wchar_t* PyUnicode_AsWideCharString(PyUnicodeObject *unicode, Py_ssize_t *size) + + Convert the Unicode object to a wide character string. The output string + always ends with a nul character. If *size* is not *NULL*, write the number + of wide characters (including the final nul character) into *\*size*. + + Returns a buffer allocated by :cfunc:`PyMem_Alloc` (use :cfunc:`PyMem_Free` + to free it) on success. On error, returns *NULL* and *\*size* is undefined. + + .. _builtincodecs: Built-in Codecs Index: Modules/_localemodule.c =================================================================== --- Modules/_localemodule.c (révision 85081) +++ Modules/_localemodule.c (copie de travail) @@ -242,29 +242,16 @@ { PyObject *os1, *os2, *result = NULL; wchar_t *ws1 = NULL, *ws2 = NULL; - Py_ssize_t len1, len2; if (!PyArg_ParseTuple(args, "UU:strcoll", &os1, &os2)) return NULL; /* Convert the unicode strings to wchar[]. */ - len1 = PyUnicode_GET_SIZE(os1) + 1; - ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t)); - if (!ws1) { - PyErr_NoMemory(); + ws1 = PyUnicode_AsWideCharString((PyUnicodeObject*)os1, NULL); + if (ws1 == NULL) goto done; - } - if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1) + ws2 = PyUnicode_AsWideCharString((PyUnicodeObject*)os2, NULL); + if (ws2 == NULL) goto done; - ws1[len1 - 1] = 0; - len2 = PyUnicode_GET_SIZE(os2) + 1; - ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t)); - if (!ws2) { - PyErr_NoMemory(); - goto done; - } - if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1) - goto done; - ws2[len2 - 1] = 0; /* Collate the strings. */ result = PyLong_FromLong(wcscoll(ws1, ws2)); done: