Index: Python/bltinmodule.c =================================================================== --- Python/bltinmodule.c (revision 86753) +++ Python/bltinmodule.c (working copy) @@ -1377,24 +1377,13 @@ } } else if (PyUnicode_Check(obj)) { - size = PyUnicode_GET_SIZE(obj); - if (size == 1) { - ord = (long)*PyUnicode_AS_UNICODE(obj); + const Py_UNICODE *begin = PyUnicode_AS_UNICODE(obj); + const Py_UNICODE *end; + size = PyUnicode_GET_SIZE(obj); + end = begin + size; + ord = Py_UNICODE_NEXT(begin, end); + if (begin == end) return PyLong_FromLong(ord); - } -#ifndef Py_UNICODE_WIDE - if (size == 2) { - /* Decode a valid surrogate pair */ - int c0 = PyUnicode_AS_UNICODE(obj)[0]; - int c1 = PyUnicode_AS_UNICODE(obj)[1]; - if (0xD800 <= c0 && c0 <= 0xDBFF && - 0xDC00 <= c1 && c1 <= 0xDFFF) { - ord = ((((c0 & 0x03FF) << 10) | (c1 & 0x03FF)) + - 0x00010000); - return PyLong_FromLong(ord); - } - } -#endif } else if (PyByteArray_Check(obj)) { /* XXX Hopefully this is temporary */ Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86753) +++ Include/unicodeobject.h (working copy) @@ -355,6 +355,22 @@ for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ } while (0) +#define Py_UNICODE_ISSURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF) +#define Py_UNICODE_ISHIGHSURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF) +#define Py_UNICODE_ISLOWSURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF) +#define Py_UNICODE_JOIN_SURROGATES(high, low) \ + ((Py_UCS4)(((((Py_UCS4)high - 0xD800) << 10) | \ + ((Py_UCS4)low - 0xDC00)) + 0x10000)) +#ifdef Py_UNICODE_WIDE +#define Py_UNICODE_NEXT(ptr, end) *ptr++ +#else +#define Py_UNICODE_NEXT(ptr, end) \ + ((Py_UNICODE_ISHIGHSURROGATE(*ptr) && ptr < end) ? \ + (Py_UNICODE_ISLOWSURROGATE(ptr[1]) ? \ + (ptr += 2,Py_UNICODE_JOIN_SURROGATES(ptr[-2], ptr[-1])) : \ + (Py_UCS4)*ptr++) : \ + (Py_UCS4)*ptr++) +#endif /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */ @@ -737,7 +753,7 @@ const char *errors /* error handling */ ); -/* Encodes a Unicode object and returns the result as Python string +/* Encodes a Unicode object and returns the result as Python bytes object. */ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86753) +++ Objects/unicodeobject.c (working copy) @@ -1192,19 +1192,8 @@ if (w != NULL) { worig = w; wend = w + size; - while (u != uend && w != wend) { - if (0xD800 <= u[0] && u[0] <= 0xDBFF - && 0xDC00 <= u[1] && u[1] <= 0xDFFF) - { - *w = (((u[0] & 0x3FF) << 10) | (u[1] & 0x3FF)) + 0x10000; - u += 2; - } - else { - *w = *u; - u++; - } - w++; - } + while (u != uend && w != wend) + *w++ = Py_UNICODE_NEXT(u, uend); if (w != wend) *w = L'\0'; return w - worig; @@ -3213,6 +3202,7 @@ const char *errors, int byteorder) { + const Py_UNICODE *send = s + size; PyObject *v; unsigned char *p; Py_ssize_t nsize, bytesize; @@ -3257,7 +3247,7 @@ if (byteorder == 0) STORECHAR(0xFEFF); if (size == 0) - goto done; + return v; if (byteorder == -1) { /* force LE */ @@ -3274,22 +3264,11 @@ iorder[3] = 0; } - while (size-- > 0) { - Py_UCS4 ch = *s++; -#ifndef Py_UNICODE_WIDE - if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { - Py_UCS4 ch2 = *s; - if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { - ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; - s++; - size--; - } - } -#endif + while (s < send) { + Py_UCS4 ch; + ch = Py_UNICODE_NEXT(s, send); STORECHAR(ch); } - - done: return v; #undef STORECHAR } Index: Lib/test/test_builtin.py =================================================================== --- Lib/test/test_builtin.py (revision 86753) +++ Lib/test/test_builtin.py (working copy) @@ -857,6 +857,9 @@ self.assertEqual(ord("\U0010FFFE"), 0x0010FFFE) self.assertEqual(ord("\U0010FFFF"), 0x0010FFFF) + self.assertRaises(TypeError, ord, 'ab') + self.assertRaises(TypeError, ord, '\U0000FFFFx') + def test_pow(self): self.assertEqual(pow(0,0), 1) self.assertEqual(pow(0,1), 0)