diff -r 7801ef4a4ce3 Lib/idlelib/PyShell.py --- a/Lib/idlelib/PyShell.py Tue Sep 03 19:43:49 2013 -0500 +++ b/Lib/idlelib/PyShell.py Wed Sep 04 23:50:45 2013 +0300 @@ -1268,16 +1268,6 @@ self.set_line_and_column() def write(self, s, tags=()): - if isinstance(s, str) and len(s) and max(s) > '\uffff': - # Tk doesn't support outputting non-BMP characters - # Let's assume what printed string is not very long, - # find first non-BMP character and construct informative - # UnicodeEncodeError exception. - for start, char in enumerate(s): - if char > '\uffff': - break - raise UnicodeEncodeError("UCS-2", char, start, start+1, - 'Non-BMP character not supported in Tk') try: self.text.mark_gravity("iomark", "right") count = OutputWindow.write(self, s, tags, "iomark") diff -r 7801ef4a4ce3 Lib/test/test_tcl.py --- a/Lib/test/test_tcl.py Tue Sep 03 19:43:49 2013 -0500 +++ b/Lib/test/test_tcl.py Wed Sep 04 23:50:45 2013 +0300 @@ -163,6 +163,9 @@ self.assertEqual(passValue(False), False) self.assertEqual(passValue('string'), 'string') self.assertEqual(passValue('string\u20ac'), 'string\u20ac') + self.assertEqual(passValue('string\ud801'), 'string\ud801') + self.assertEqual(passValue('string\ud801\udca2'), 'string\ud801\udca2') + self.assertEqual(passValue('string\U000104a2'), 'string\U000104a2') for i in (0, 1, -1, 2**31-1, -2**31): self.assertEqual(passValue(i), i) for f in (0.0, 1.0, -1.0, 1/3, diff -r 7801ef4a4ce3 Modules/_tkinter.c --- a/Modules/_tkinter.c Tue Sep 03 19:43:49 2013 -0500 +++ b/Modules/_tkinter.c Wed Sep 04 23:50:45 2013 +0300 @@ -442,6 +442,19 @@ static PyObject * +fromTclStringAndSize(const char *s, Py_ssize_t size) +{ + return PyUnicode_DecodeUTF8(s, size, "replace"); +} + +static PyObject * +fromTclString(const char *s) +{ + return fromTclStringAndSize(s, strlen(s)); +} + + +static PyObject * Split(char *list) { int argc; @@ -458,13 +471,13 @@ * Could be a quoted string containing funnies, e.g. {"}. * Return the string itself. */ - return PyUnicode_FromString(list); + return fromTclString(list); } if (argc == 0) v = PyUnicode_FromString(""); else if (argc == 1) - v = PyUnicode_FromString(argv[0]); + v = fromTclString(argv[0]); else if ((v = PyTuple_New(argc)) != NULL) { int i; PyObject *w; @@ -807,7 +820,7 @@ int len; if (!self->string) { s = Tcl_GetStringFromObj(self->value, &len); - self->string = PyUnicode_FromStringAndSize(s, len); + self->string = fromTclStringAndSize(s, len); if (!self->string) return NULL; } @@ -896,7 +909,7 @@ static PyObject* get_typename(PyTclObject* obj, void* ignored) { - return PyUnicode_FromString(obj->value->typePtr->name); + return fromTclString(obj->value->typePtr->name); } @@ -1008,6 +1021,27 @@ return NULL; } kind = PyUnicode_KIND(value); + if (kind == sizeof(Tcl_UniChar)) + return Tcl_NewUnicodeObj(inbuf, size); + if (PyUnicode_IS_COMPACT_ASCII(value) || kind > sizeof(Tcl_UniChar)) { + PyObject *bytes = NULL; + const char *utf8 = PyUnicode_AsUTF8AndSize(value, &size); + if (utf8 == NULL) { + bytes = PyUnicode_AsEncodedString(value, "utf-8", "surrogatepass"); + if (bytes == NULL) + return NULL; + utf8 = PyBytes_AS_STRING(bytes); + size = PyBytes_GET_SIZE(bytes); + } + if (size > INT_MAX) { + PyErr_Format(Tkinter_TclError, "string too long"); + Py_XDECREF(bytes); + return NULL; + } + result = Tcl_NewStringObj(utf8, size); + Py_XDECREF(bytes); + return result; + } allocsize = ((size_t)size) * sizeof(Tcl_UniChar); outbuf = (Tcl_UniChar*)ckalloc(allocsize); /* Else overflow occurred, and we take the next exit */ @@ -1015,23 +1049,8 @@ PyErr_NoMemory(); return NULL; } - for (i = 0; i < size; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, inbuf, i); - /* We cannot test for sizeof(Tcl_UniChar) directly, - so we test for UTF-8 size instead. */ -#if TCL_UTF_MAX == 3 - if (ch >= 0x10000) { - /* Tcl doesn't do UTF-16, yet. */ - PyErr_Format(Tkinter_TclError, - "character U+%x is above the range " - "(U+0000-U+FFFF) allowed by Tcl", - ch); - ckfree(FREECAST outbuf); - return NULL; - } -#endif - outbuf[i] = ch; - } + for (i = 0; i < size; i++) + outbuf[i] = (Tcl_UniChar)PyUnicode_READ(kind, inbuf, i); result = Tcl_NewUnicodeObj(outbuf, size); ckfree(FREECAST outbuf); return result; @@ -1058,8 +1077,7 @@ TkappObject *app = (TkappObject*)tkapp; if (value->typePtr == NULL) { - return PyUnicode_FromStringAndSize(value->bytes, - value->length); + return fromTclStringAndSize(value->bytes, value->length); } if (value->typePtr == app->BooleanType) { @@ -1116,15 +1134,9 @@ } if (value->typePtr == app->StringType) { -#if TCL_UTF_MAX==3 return PyUnicode_FromKindAndData( - PyUnicode_2BYTE_KIND, Tcl_GetUnicode(value), + sizeof(Tcl_UniChar), Tcl_GetUnicode(value), Tcl_GetCharLength(value)); -#else - return PyUnicode_FromKindAndData( - PyUnicode_4BYTE_KIND, Tcl_GetUnicode(value), - Tcl_GetCharLength(value)); -#endif } return newPyTclObject(value); @@ -1230,7 +1242,7 @@ const char *s = Tcl_GetStringResult(self->interp); const char *p = s; - res = PyUnicode_FromStringAndSize(s, (int)(p-s)); + res = fromTclStringAndSize(s, (int)(p-s)); } return res; } @@ -1421,7 +1433,7 @@ if (err == TCL_ERROR) res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1473,7 +1485,7 @@ res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1496,7 +1508,7 @@ if (err == TCL_ERROR) res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1719,7 +1731,7 @@ res = FromObj(self, tres); } else { - res = PyUnicode_FromString(Tcl_GetString(tres)); + res = fromTclString(Tcl_GetString(tres)); } } LEAVE_OVERLAP_TCL @@ -1982,7 +1994,7 @@ goto finally; for (i = 0; i < argc; i++) { - PyObject *s = PyUnicode_FromString(argv[i]); + PyObject *s = fromTclString(argv[i]); if (!s || PyTuple_SetItem(v, i, s)) { Py_DECREF(v); v = NULL; @@ -2101,7 +2113,7 @@ return PythonCmd_Error(interp); for (i = 0; i < (argc - 1); i++) { - PyObject *s = PyUnicode_FromString(argv[i + 1]); + PyObject *s = fromTclString(argv[i + 1]); if (!s) { /* Is Tk leaking 0xC080 in %A - a "modified" utf-8 null? */ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) &&