diff -r b5530669ef70 Lib/idlelib/PyShell.py --- a/Lib/idlelib/PyShell.py Wed Sep 04 20:52:14 2013 +0200 +++ b/Lib/idlelib/PyShell.py Thu Sep 05 13:52:14 2013 +0300 @@ -1271,16 +1271,6 @@ self.set_line_and_column() def write(self, s, tags=()): - if isinstance(s, str) and len(s) and max(s) > '\uffff': - # Tk doesn't support outputting non-BMP characters - # Let's assume what printed string is not very long, - # find first non-BMP character and construct informative - # UnicodeEncodeError exception. - for start, char in enumerate(s): - if char > '\uffff': - break - raise UnicodeEncodeError("UCS-2", char, start, start+1, - 'Non-BMP character not supported in Tk') try: self.text.mark_gravity("iomark", "right") count = OutputWindow.write(self, s, tags, "iomark") diff -r b5530669ef70 Lib/test/test_tcl.py --- a/Lib/test/test_tcl.py Wed Sep 04 20:52:14 2013 +0200 +++ b/Lib/test/test_tcl.py Thu Sep 05 13:52:14 2013 +0300 @@ -163,19 +163,50 @@ self.assertEqual(passValue(False), False) self.assertEqual(passValue('string'), 'string') self.assertEqual(passValue('string\u20ac'), 'string\u20ac') + self.assertEqual(passValue('string\ud801'), 'string\ud801') + self.assertEqual(passValue('string\ud801\udca2'), 'string\ud801\udca2') + self.assertEqual(passValue('string\U000104a2'), 'string\U000104a2') + self.assertEqual(passValue('str\x00ing'), 'str\x00ing') + self.assertEqual(passValue(b'str\x00ing'), 'str\x00ing') + self.assertEqual(passValue(b'str\xc0\x80ing'), 'str\x00ing') for i in (0, 1, -1, 2**31-1, -2**31): self.assertEqual(passValue(i), i) for f in (0.0, 1.0, -1.0, 1/3, sys.float_info.min, sys.float_info.max, -sys.float_info.min, -sys.float_info.max): self.assertEqual(passValue(f), f) - for f in float('nan'), float('inf'), -float('inf'): - if f != f: # NaN - self.assertNotEqual(passValue(f), f) - else: - self.assertEqual(passValue(f), f) + self.assertEqual(passValue(float('inf')), float('inf')) + self.assertEqual(passValue(-float('inf')), -float('inf')) + x = passValue(float('nan')) + self.assertNotEqual(x, x) self.assertEqual(passValue((1, '2', (3.4,))), (1, '2', (3.4,))) + def test_user_command(self): + self.interp.createcommand('testfunc', lambda arg: arg) + def testfunc(value): + return self.interp.call('testfunc', value) + + self.assertEqual(testfunc(True), '1') + self.assertEqual(testfunc(False), '0') + self.assertEqual(testfunc('string'), 'string') + self.assertEqual(testfunc('string\u20ac'), 'string\u20ac') + self.assertEqual(testfunc('string\udca2'), 'string\ufffd\ufffd\ufffd') + self.assertEqual(testfunc('string\ud801\udca2'), + 'string\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd') + self.assertEqual(testfunc('string\U000104a2'), 'string\U000104a2') + #self.assertEqual(testfunc('str\x00ing'), 'str\x00ing') + for i in (0, 1, -1, 2**31-1, -2**31): + self.assertEqual(testfunc(i), str(i)) + for f in (0.0, 1.0, -1.0, 1/3, + sys.float_info.min, sys.float_info.max, + -sys.float_info.min, -sys.float_info.max): + self.assertEqual(testfunc(f), str(f)) + self.assertEqual(testfunc(float('inf')), 'Inf') + self.assertEqual(testfunc(-float('inf')), '-Inf') + self.assertEqual(testfunc(float('nan')), 'NaN') + self.assertEqual(testfunc(()), '') + self.assertEqual(testfunc((1, '2', (3.4,))), '1 2 3.4') + def test_splitlist(self): splitlist = self.interp.tk.splitlist call = self.interp.tk.call diff -r b5530669ef70 Modules/_tkinter.c --- a/Modules/_tkinter.c Wed Sep 04 20:52:14 2013 +0200 +++ b/Modules/_tkinter.c Thu Sep 05 13:52:14 2013 +0300 @@ -343,6 +343,44 @@ static PyObject * +fromTclStringAndSize(const char *s, Py_ssize_t size) +{ + PyObject *r = PyUnicode_DecodeUTF8(s, size, NULL); + if (!r && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + char *buf = NULL; + PyErr_Clear(); + /* Tcl encodes null character as \xc0\x80 */ + if (memchr(s, '\xc0', size)) { + const char *e = s + size; + char *q = buf = (char *)PyMem_Malloc(size); + if (buf == NULL) + return NULL; + while (s < e) { + if (s + 1 < e && s[0] == '\xc0' && s[1] == '\x80') { + *q++ = '\0'; + s += 2; + } + else + *q++ = *s++; + } + s = buf; + size = q - s; + } + r = PyUnicode_DecodeUTF8(s, size, "replace"); + if (buf != NULL) + PyMem_Free(buf); + } + return r; +} + +static PyObject * +fromTclString(const char *s) +{ + return fromTclStringAndSize(s, strlen(s)); +} + + +static PyObject * Split(char *list) { int argc; @@ -358,13 +396,13 @@ * Could be a quoted string containing funnies, e.g. {"}. * Return the string itself. */ - return PyUnicode_FromString(list); + return fromTclString(list); } if (argc == 0) v = PyUnicode_FromString(""); else if (argc == 1) - v = PyUnicode_FromString(argv[0]); + v = fromTclString(argv[0]); else if ((v = PyTuple_New(argc)) != NULL) { int i; PyObject *w; @@ -712,7 +750,7 @@ int len; if (!self->string) { s = Tcl_GetStringFromObj(self->value, &len); - self->string = PyUnicode_FromStringAndSize(s, len); + self->string = fromTclStringAndSize(s, len); if (!self->string) return NULL; } @@ -801,7 +839,7 @@ static PyObject* get_typename(PyTclObject* obj, void* ignored) { - return PyUnicode_FromString(obj->value->typePtr->name); + return fromTclString(obj->value->typePtr->name); } @@ -888,6 +926,27 @@ return NULL; } kind = PyUnicode_KIND(value); + if (kind == sizeof(Tcl_UniChar)) + return Tcl_NewUnicodeObj(inbuf, size); + if (PyUnicode_IS_COMPACT_ASCII(value) || kind > sizeof(Tcl_UniChar)) { + PyObject *bytes = NULL; + const char *utf8 = PyUnicode_AsUTF8AndSize(value, &size); + if (utf8 == NULL) { + bytes = PyUnicode_AsEncodedString(value, "utf-8", "surrogatepass"); + if (bytes == NULL) + return NULL; + utf8 = PyBytes_AS_STRING(bytes); + size = PyBytes_GET_SIZE(bytes); + } + if (size > INT_MAX) { + PyErr_Format(Tkinter_TclError, "string too long"); + Py_XDECREF(bytes); + return NULL; + } + result = Tcl_NewStringObj(utf8, size); + Py_XDECREF(bytes); + return result; + } allocsize = ((size_t)size) * sizeof(Tcl_UniChar); outbuf = (Tcl_UniChar*)ckalloc(allocsize); /* Else overflow occurred, and we take the next exit */ @@ -895,23 +954,8 @@ PyErr_NoMemory(); return NULL; } - for (i = 0; i < size; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, inbuf, i); - /* We cannot test for sizeof(Tcl_UniChar) directly, - so we test for UTF-8 size instead. */ -#if TCL_UTF_MAX == 3 - if (ch >= 0x10000) { - /* Tcl doesn't do UTF-16, yet. */ - PyErr_Format(Tkinter_TclError, - "character U+%x is above the range " - "(U+0000-U+FFFF) allowed by Tcl", - ch); - ckfree(FREECAST outbuf); - return NULL; - } -#endif - outbuf[i] = ch; - } + for (i = 0; i < size; i++) + outbuf[i] = (Tcl_UniChar)PyUnicode_READ(kind, inbuf, i); result = Tcl_NewUnicodeObj(outbuf, size); ckfree(FREECAST outbuf); return result; @@ -938,8 +982,7 @@ TkappObject *app = (TkappObject*)tkapp; if (value->typePtr == NULL) { - return PyUnicode_FromStringAndSize(value->bytes, - value->length); + return fromTclStringAndSize(value->bytes, value->length); } if (value->typePtr == app->BooleanType) { @@ -996,15 +1039,9 @@ } if (value->typePtr == app->StringType) { -#if TCL_UTF_MAX==3 return PyUnicode_FromKindAndData( - PyUnicode_2BYTE_KIND, Tcl_GetUnicode(value), + sizeof(Tcl_UniChar), Tcl_GetUnicode(value), Tcl_GetCharLength(value)); -#else - return PyUnicode_FromKindAndData( - PyUnicode_4BYTE_KIND, Tcl_GetUnicode(value), - Tcl_GetCharLength(value)); -#endif } return newPyTclObject(value); @@ -1110,7 +1147,7 @@ const char *s = Tcl_GetStringResult(self->interp); const char *p = s; - res = PyUnicode_FromStringAndSize(s, (int)(p-s)); + res = fromTclStringAndSize(s, (int)(p-s)); } return res; } @@ -1265,7 +1302,7 @@ if (err == TCL_ERROR) res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1289,7 +1326,7 @@ res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1312,7 +1349,7 @@ if (err == TCL_ERROR) res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1535,7 +1572,7 @@ res = FromObj(self, tres); } else { - res = PyUnicode_FromString(Tcl_GetString(tres)); + res = fromTclString(Tcl_GetString(tres)); } } LEAVE_OVERLAP_TCL @@ -1674,7 +1711,7 @@ if (retval == TCL_ERROR) res = Tkinter_Error(self); else - res = Py_BuildValue("s", Tkapp_Result(self)); + res = fromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1799,7 +1836,7 @@ goto finally; for (i = 0; i < argc; i++) { - PyObject *s = PyUnicode_FromString(argv[i]); + PyObject *s = fromTclString(argv[i]); if (!s || PyTuple_SetItem(v, i, s)) { Py_DECREF(v); v = NULL; @@ -1897,20 +1934,8 @@ return PythonCmd_Error(interp); for (i = 0; i < (argc - 1); i++) { - PyObject *s = PyUnicode_FromString(argv[i + 1]); - if (!s) { - /* Is Tk leaking 0xC080 in %A - a "modified" utf-8 null? */ - if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) && - !strcmp(argv[i + 1], "\xC0\x80")) { - PyErr_Clear(); - /* Convert to "strict" utf-8 null */ - s = PyUnicode_FromString("\0"); - } else { - Py_DECREF(arg); - return PythonCmd_Error(interp); - } - } - if (PyTuple_SetItem(arg, i, s)) { + PyObject *s = fromTclString(argv[i + 1]); + if (!s || PyTuple_SetItem(arg, i, s)) { Py_DECREF(arg); return PythonCmd_Error(interp); }