Index: Lib/test/test_syslog.py =================================================================== --- Lib/test/test_syslog.py (revision 87118) +++ Lib/test/test_syslog.py (working copy) @@ -11,6 +11,8 @@ def test_openlog(self): syslog.openlog('python') + # Issue #6697. + self.assertRaises(UnicodeEncodeError, syslog.openlog, '\uD800') def test_syslog(self): syslog.openlog('python') Index: Lib/test/datetimetester.py =================================================================== --- Lib/test/datetimetester.py (revision 87118) +++ Lib/test/datetimetester.py (working copy) @@ -2508,11 +2508,18 @@ # Check that an invalid tzname result raises an exception. class Badtzname(tzinfo): - def tzname(self, dt): return 42 + tz = 42 + def tzname(self, dt): return self.tz t = time(2, 3, 4, tzinfo=Badtzname()) self.assertEqual(t.strftime("%H:%M:%S"), "02:03:04") self.assertRaises(TypeError, t.strftime, "%Z") + # Issue #6697: + if '_Fast' in str(type(self)): + Badtzname.tz = '\ud800' + self.assertRaises(ValueError, t.strftime, "%Z") + + def test_hash_edge_cases(self): # Offsets that overflow a basic time. t1 = self.theclass(0, 1, 2, 3, tzinfo=FixedOffset(1439, "")) Index: Lib/test/test_socket.py =================================================================== --- Lib/test/test_socket.py (revision 87118) +++ Lib/test/test_socket.py (working copy) @@ -667,7 +667,10 @@ type=socket.SOCK_STREAM, proto=0, flags=socket.AI_PASSIVE) self.assertEqual(a, b) + # Issue #6697. + self.assertRaises(UnicodeEncodeError, socket.getaddrinfo, 'localhost', '\uD800') + def test_getnameinfo(self): # only IP addresses are allowed self.assertRaises(socket.error, socket.getnameinfo, ('mail.python.org',0), 0) Index: Lib/test/test_pyexpat.py =================================================================== --- Lib/test/test_pyexpat.py (revision 87118) +++ Lib/test/test_pyexpat.py (working copy) @@ -203,7 +203,10 @@ operations = out.out self._verify_parse_output(operations) + # Issue #6697. + self.assertRaises(AttributeError, getattr, parser, '\uD800') + def test_parse_file(self): # Try parsing a file out = self.Outputter() Index: Lib/test/test_xml_etree_c.py =================================================================== --- Lib/test/test_xml_etree_c.py (revision 87118) +++ Lib/test/test_xml_etree_c.py (working copy) @@ -8,10 +8,26 @@ # cElementTree specific tests def sanity(): - """ + r""" Import sanity. >>> from xml.etree import cElementTree + + Issue #6697. + + >>> e = cElementTree.Element('a') + >>> getattr(e, '\uD800') # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + UnicodeEncodeError: ... + + >>> p = cElementTree.XMLParser() + >>> p.version.split()[0] + 'Expat' + >>> getattr(p, '\uD800') + Traceback (most recent call last): + ... + AttributeError: 'XMLParser' object has no attribute '\ud800' """ Index: Modules/_datetimemodule.c =================================================================== --- Modules/_datetimemodule.c (revision 87118) +++ Modules/_datetimemodule.c (working copy) @@ -1257,7 +1257,8 @@ assert(PyUnicode_Check(Zreplacement)); ptoappend = _PyUnicode_AsStringAndSize(Zreplacement, &ntoappend); - ntoappend = Py_SIZE(Zreplacement); + if (ptoappend == NULL) + goto Done; } else if (ch == 'f') { /* format microseconds */ Index: Modules/socketmodule.c =================================================================== --- Modules/socketmodule.c (revision 87118) +++ Modules/socketmodule.c (working copy) @@ -1406,9 +1406,9 @@ { struct sockaddr_hci *addr = (struct sockaddr_hci *)addr_ret; #if defined(__NetBSD__) || defined(__DragonFly__) - char *straddr = PyBytes_AS_STRING(args); + char *straddr = PyBytes_AS_STRING(args); - _BT_HCI_MEMB(addr, family) = AF_BLUETOOTH; + _BT_HCI_MEMB(addr, family) = AF_BLUETOOTH; if (straddr == NULL) { PyErr_SetString(socket_error, "getsockaddrarg: " "wrong format"); @@ -4022,8 +4022,10 @@ pptr = pbuf; } else if (PyUnicode_Check(pobj)) { pptr = _PyUnicode_AsString(pobj); + if (pptr == NULL) + goto err; } else if (PyBytes_Check(pobj)) { - pptr = PyBytes_AsString(pobj); + pptr = PyBytes_AS_STRING(pobj); } else if (pobj == Py_None) { pptr = (char *)NULL; } else { Index: Modules/_lsprof.c =================================================================== --- Modules/_lsprof.c (revision 87118) +++ Modules/_lsprof.c (working copy) @@ -178,7 +178,16 @@ PyObject *mod = fn->m_module; const char *modname; if (mod && PyUnicode_Check(mod)) { + /* XXX: The following will truncate module names with embedded + * null-characters. It is unlikely that this can happen in + * practice and the concequences are not serious enough to + * introduce extra checks here. + */ modname = _PyUnicode_AsString(mod); + if (modname == NULL) { + modname = ""; + PyErr_Clear(); + } } else if (mod && PyModule_Check(mod)) { modname = PyModule_GetName(mod); Index: Modules/syslogmodule.c =================================================================== --- Modules/syslogmodule.c (revision 87118) +++ Modules/syslogmodule.c (working copy) @@ -68,9 +68,9 @@ * is optional. */ - Py_ssize_t argv_len; + Py_ssize_t argv_len, scriptlen; PyObject *scriptobj; - char *atslash; + Py_UNICODE *atslash, *atstart; PyObject *argv = PySys_GetObject("argv"); if (argv == NULL) { @@ -90,13 +90,16 @@ if (!PyUnicode_Check(scriptobj)) { return(NULL); } - if (PyUnicode_GET_SIZE(scriptobj) == 0) { + scriptlen = PyUnicode_GET_SIZE(scriptobj); + if (scriptlen == 0) { return(NULL); } - atslash = strrchr(_PyUnicode_AsString(scriptobj), SEP); + atstart = PyUnicode_AS_UNICODE(scriptobj); + atslash = Py_UNICODE_strrchr(atstart, SEP); if (atslash) { - return(PyUnicode_FromString(atslash + 1)); + return(PyUnicode_FromUnicode(atslash + 1, + scriptlen - (atslash - atstart) - 1)); } else { Py_INCREF(scriptobj); return(scriptobj); @@ -113,6 +116,7 @@ long facility = LOG_USER; PyObject *new_S_ident_o = NULL; static char *keywords[] = {"ident", "logoption", "facility", 0}; + char *S_ident_o_str; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Ull:openlog", keywords, &new_S_ident_o, &logopt, &facility)) @@ -130,12 +134,19 @@ Py_XDECREF(S_ident_o); S_ident_o = new_S_ident_o; + if (S_ident_o) { + S_ident_o_str = _PyUnicode_AsString(S_ident_o); + if (S_ident_o_str == NULL) + return NULL; + } else + S_ident_o_str = NULL; + /* At this point, S_ident_o should be INCREF()ed. openlog(3) does not * make a copy, and syslog(3) later uses it. We can't garbagecollect it * If NULL, just let openlog figure it out (probably using C argv[0]). */ - openlog(S_ident_o ? _PyUnicode_AsString(S_ident_o) : NULL, logopt, facility); + openlog(S_ident_o_str, logopt, facility); S_log_open = 1; Py_INCREF(Py_None); Index: Modules/pyexpat.c =================================================================== --- Modules/pyexpat.c (revision 87118) +++ Modules/pyexpat.c (working copy) @@ -1215,11 +1215,12 @@ } static int -handlername2int(const char *name) +handlername2int(PyObject *name) { int i; for (i = 0; handler_info[i].name != NULL; i++) { - if (strcmp(name, handler_info[i].name) == 0) { + if (PyUnicode_CompareWithASCIIString( + name, handler_info[i].name) == 0) { return i; } } @@ -1237,13 +1238,13 @@ static PyObject * xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) { - char *name = ""; + Py_UNICODE *name; int handlernum = -1; - if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); + if (!PyUnicode_Check(nameobj)) + goto generic; - handlernum = handlername2int(name); + handlernum = handlername2int(nameobj); if (handlernum != -1) { PyObject *result = self->handlers[handlernum]; @@ -1252,46 +1253,48 @@ Py_INCREF(result); return result; } + + name = PyUnicode_AS_UNICODE(nameobj); if (name[0] == 'E') { - if (strcmp(name, "ErrorCode") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0) return PyLong_FromLong((long) XML_GetErrorCode(self->itself)); - if (strcmp(name, "ErrorLineNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0) return PyLong_FromLong((long) XML_GetErrorLineNumber(self->itself)); - if (strcmp(name, "ErrorColumnNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0) return PyLong_FromLong((long) XML_GetErrorColumnNumber(self->itself)); - if (strcmp(name, "ErrorByteIndex") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0) return PyLong_FromLong((long) XML_GetErrorByteIndex(self->itself)); } if (name[0] == 'C') { - if (strcmp(name, "CurrentLineNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0) return PyLong_FromLong((long) XML_GetCurrentLineNumber(self->itself)); - if (strcmp(name, "CurrentColumnNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0) return PyLong_FromLong((long) XML_GetCurrentColumnNumber(self->itself)); - if (strcmp(name, "CurrentByteIndex") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0) return PyLong_FromLong((long) XML_GetCurrentByteIndex(self->itself)); } if (name[0] == 'b') { - if (strcmp(name, "buffer_size") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0) return PyLong_FromLong((long) self->buffer_size); - if (strcmp(name, "buffer_text") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0) return get_pybool(self->buffer != NULL); - if (strcmp(name, "buffer_used") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0) return PyLong_FromLong((long) self->buffer_used); } - if (strcmp(name, "namespace_prefixes") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0) return get_pybool(self->ns_prefixes); - if (strcmp(name, "ordered_attributes") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0) return get_pybool(self->ordered_attributes); - if (strcmp(name, "specified_attributes") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0) return get_pybool((long) self->specified_attributes); - if (strcmp(name, "intern") == 0) { + if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) { if (self->intern == NULL) { Py_INCREF(Py_None); return Py_None; @@ -1301,7 +1304,7 @@ return self->intern; } } - + generic: return PyObject_GenericGetAttr((PyObject*)self, nameobj); } @@ -1352,7 +1355,7 @@ } static int -sethandler(xmlparseobject *self, const char *name, PyObject* v) +sethandler(xmlparseobject *self, PyObject *name, PyObject* v) { int handlernum = handlername2int(name); if (handlernum >= 0) { @@ -1388,14 +1391,15 @@ } static int -xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) +xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v) { /* Set attribute 'name' to value 'v'. v==NULL means delete */ if (v == NULL) { PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); return -1; } - if (strcmp(name, "buffer_text") == 0) { + assert(PyUnicode_Check(name)); + if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) { if (PyObject_IsTrue(v)) { if (self->buffer == NULL) { self->buffer = malloc(self->buffer_size); @@ -1414,7 +1418,7 @@ } return 0; } - if (strcmp(name, "namespace_prefixes") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) { if (PyObject_IsTrue(v)) self->ns_prefixes = 1; else @@ -1422,14 +1426,14 @@ XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); return 0; } - if (strcmp(name, "ordered_attributes") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) { if (PyObject_IsTrue(v)) self->ordered_attributes = 1; else self->ordered_attributes = 0; return 0; } - if (strcmp(name, "specified_attributes") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) { if (PyObject_IsTrue(v)) self->specified_attributes = 1; else @@ -1437,7 +1441,7 @@ return 0; } - if (strcmp(name, "buffer_size") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) { long new_buffer_size; if (!PyLong_Check(v)) { PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer"); @@ -1480,7 +1484,7 @@ return 0; } - if (strcmp(name, "CharacterDataHandler") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) { /* If we're changing the character data handler, flush all * cached data with the old handler. Not sure there's a * "right" thing to do, though, but this probably won't @@ -1492,7 +1496,7 @@ if (sethandler(self, name, v)) { return 0; } - PyErr_SetString(PyExc_AttributeError, name); + PyErr_SetObject(PyExc_AttributeError, name); return -1; } @@ -1524,7 +1528,7 @@ (destructor)xmlparse_dealloc, /*tp_dealloc*/ (printfunc)0, /*tp_print*/ 0, /*tp_getattr*/ - (setattrfunc)xmlparse_setattr, /*tp_setattr*/ + 0, /*tp_setattr*/ 0, /*tp_reserved*/ (reprfunc)0, /*tp_repr*/ 0, /*tp_as_number*/ @@ -1534,7 +1538,7 @@ (ternaryfunc)0, /*tp_call*/ (reprfunc)0, /*tp_str*/ (getattrofunc)xmlparse_getattro, /* tp_getattro */ - 0, /* tp_setattro */ + (setattrofunc)xmlparse_setattro, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ Xmlparsetype__doc__, /* tp_doc - Documentation string */ Index: Modules/parsermodule.c =================================================================== --- Modules/parsermodule.c (revision 87118) +++ Modules/parsermodule.c (working copy) @@ -792,6 +792,11 @@ } } temp_str = _PyUnicode_AsStringAndSize(temp, &len); + if (temp_str == NULL) { + Py_DECREF(temp); + Py_XDECREF(elem); + return 0; + } strn = (char *)PyObject_MALLOC(len + 1); if (strn != NULL) (void) memcpy(strn, temp_str, len + 1); @@ -870,6 +875,8 @@ encoding = PySequence_GetItem(tuple, 2); /* tuple isn't borrowed anymore here, need to DECREF */ tuple = PySequence_GetSlice(tuple, 0, 2); + if (tuple == NULL) + return NULL; } res = PyNode_New(num); if (res != NULL) { @@ -881,6 +888,12 @@ Py_ssize_t len; const char *temp; temp = _PyUnicode_AsStringAndSize(encoding, &len); + if (temp == NULL) { + Py_DECREF(res); + Py_DECREF(encoding); + Py_DECREF(tuple); + return NULL; + } res->n_str = (char *)PyObject_MALLOC(len + 1); if (res->n_str != NULL && temp != NULL) (void) memcpy(res->n_str, temp, len + 1); Index: Modules/_elementtree.c =================================================================== --- Modules/_elementtree.c (revision 87118) +++ Modules/_elementtree.c (working copy) @@ -1483,6 +1483,9 @@ if (PyUnicode_Check(nameobj)) name = _PyUnicode_AsString(nameobj); + + if (name == NULL) + return NULL; /* handle common attributes first */ if (strcmp(name, "tag") == 0) { @@ -2194,8 +2197,8 @@ Py_XDECREF(res); } else if (!PyErr_Occurred()) { /* Report the first error, not the last */ - char message[128]; - sprintf(message, "undefined entity &%.100s;", _PyUnicode_AsString(key)); + char message[128] = "undefined entity "; + strncat(message, data_in, data_len < 100?data_len:100); expat_set_error( message, EXPAT(GetErrorLineNumber)(self->parser), @@ -2796,29 +2799,25 @@ static PyObject* xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) { - PyObject* res; - char *name = ""; - - if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); - - PyErr_Clear(); - - if (strcmp(name, "entity") == 0) - res = self->entity; - else if (strcmp(name, "target") == 0) - res = self->target; - else if (strcmp(name, "version") == 0) { - char buffer[100]; - sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, + if (PyUnicode_Check(nameobj)) { + PyObject* res; + if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0) + res = self->entity; + else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0) + res = self->target; + else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) { + return PyUnicode_FromFormat( + "Expat %d.%d.%d", XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); - return PyUnicode_DecodeUTF8(buffer, strlen(buffer), "strict"); - } else { - return PyObject_GenericGetAttr((PyObject*) self, nameobj); - } + } + else + goto generic; - Py_INCREF(res); - return res; + Py_INCREF(res); + return res; + } + generic: + return PyObject_GenericGetAttr((PyObject*) self, nameobj); } static PyTypeObject XMLParser_Type = { Index: Modules/_testcapimodule.c =================================================================== --- Modules/_testcapimodule.c (revision 87118) +++ Modules/_testcapimodule.c (working copy) @@ -1741,15 +1741,16 @@ { PyObject *result; char *msg; + static const Py_UNICODE one[] = {'1', 0}; -#define CHECK_1_FORMAT(FORMAT, TYPE) \ - result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \ - if (result == NULL) \ - return NULL; \ - if (strcmp(_PyUnicode_AsString(result), "1")) { \ - msg = FORMAT " failed at 1"; \ - goto Fail; \ - } \ +#define CHECK_1_FORMAT(FORMAT, TYPE) \ + result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \ + if (result == NULL) \ + return NULL; \ + if (Py_UNICODE_strcmp(PyUnicode_AS_UNICODE(result), one)) { \ + msg = FORMAT " failed at 1"; \ + goto Fail; \ + } \ Py_DECREF(result) CHECK_1_FORMAT("%d", int);