--- Objects/unicodeobject.c.orig 2009-01-17 04:57:06.000000000 +0100 +++ Objects/unicodeobject.c 2009-01-17 04:54:35.000000000 +0100 @@ -562,22 +562,37 @@ #ifdef HAVE_WCHAR_H PyObject *PyUnicode_FromWideChar(register const wchar_t *w, - Py_ssize_t size) + Py_ssize_t size) { PyUnicodeObject *unicode; + register Py_ssize_t i; + Py_ssize_t alloc; +#ifndef Py_UNICODE_WIDE + const wchar_t *orig_w; +#endif if (w == NULL) { if (size == 0) return PyUnicode_FromStringAndSize(NULL, 0); - PyErr_BadInternalCall(); - return NULL; + PyErr_BadInternalCall(); + return NULL; } if (size == -1) { size = wcslen(w); } - unicode = _PyUnicode_New(size); + alloc = size; +#ifndef Py_UNICODE_WIDE + orig_w = w; + for (i = size; i > 0; i--) { + if (0xffff < *w) + alloc++; + w++; + } + w = orig_w; +#endif + unicode = _PyUnicode_New(alloc); if (!unicode) return NULL; @@ -586,11 +601,20 @@ memcpy(unicode->str, w, size * sizeof(wchar_t)); #else { - register Py_UNICODE *u; - register Py_ssize_t i; - u = PyUnicode_AS_UNICODE(unicode); - for (i = size; i > 0; i--) - *u++ = *w++; + register Py_UNICODE *u; + u = PyUnicode_AS_UNICODE(unicode); + for (i = size; i > 0; i--) { +#ifndef Py_UNICODE_WIDE + if (0xffff < *w) { + wchar_t ordinal = *w++; + ordinal -= 0x10000; + *u++ = 0xD800 | (ordinal >> 10); + *u++ = 0xDC00 | (ordinal & 0x3FF); + continue; + } +#endif + *u++ = *w++; + } } #endif