diff -r 9afdd88fe33a Include/pyexpat.h --- a/Include/pyexpat.h Wed May 22 15:35:35 2013 +0300 +++ b/Include/pyexpat.h Wed May 22 16:41:31 2013 +0300 @@ -45,6 +45,7 @@ void (*SetUserData)(XML_Parser parser, void *userData); void (*SetStartDoctypeDeclHandler)(XML_Parser parser, XML_StartDoctypeDeclHandler start); + enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding); /* always add new stuff to the end! */ }; diff -r 9afdd88fe33a Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py Wed May 22 15:35:35 2013 +0300 +++ b/Lib/test/test_xml_etree.py Wed May 22 16:41:31 2013 +0300 @@ -668,15 +668,18 @@ elem = ET.fromstring("text") self.assertEqual(ET.tostring(elem), b'text') - def test_encoding(encoding): - def check(encoding): - ET.XML("" % encoding) - check("ascii") - check("us-ascii") - check("iso-8859-1") - check("iso-8859-15") - check("cp437") - check("mac-roman") + def test_encoding(self): + def check(encoding, body=''): + xml = ("%s" % + (encoding, body)) + self.assertEqual(ET.XML(xml.encode(encoding)).text, body) + self.assertEqual(ET.XML(xml).text, body) + check("ascii", 'a') + check("us-ascii", 'a') + check("iso-8859-1", '\xbd') + check("iso-8859-15", '\u20ac') + check("cp437", '\u221a') + check("mac-roman", '\u02da') def test_methods(self): # Test serialization methods. @@ -2002,11 +2005,13 @@ class XMLParserTest(unittest.TestCase): - sample1 = '22' - sample2 = ('' - 'text') + sample1 = b'22' + sample2 = (b'' + b'text') + sample3 = ('\n' + '$\xa3\u20ac\U0001017b') def _check_sample_element(self, e): self.assertEqual(e.tag, 'file') @@ -2042,12 +2047,21 @@ _doctype = (name, pubid, system) parser = MyParserWithDoctype() - parser.feed(self.sample2) + with self.assertWarns(DeprecationWarning): + parser.feed(self.sample2) parser.close() self.assertEqual(_doctype, ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) + def test_parse_string(self): + parser = ET.XMLParser(target=ET.TreeBuilder()) + parser.feed(self.sample3) + e = parser.close() + self.assertEqual(e.tag, 'money') + self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') + self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') + class NamespaceParseTest(unittest.TestCase): def test_find_with_namespace(self): @@ -2473,6 +2487,7 @@ ElementFindTest, ElementIterTest, TreeBuilderTest, + XMLParserTest, BugsTest, ] diff -r 9afdd88fe33a Modules/_elementtree.c --- a/Modules/_elementtree.c Wed May 22 15:35:35 2013 +0300 +++ b/Modules/_elementtree.c Wed May 22 16:41:31 2013 +0300 @@ -3288,7 +3288,7 @@ } LOCAL(PyObject*) -expat_parse(XMLParserObject* self, char* data, int data_len, int final) +expat_parse(XMLParserObject* self, const char* data, int data_len, int final) { int ok; @@ -3334,16 +3334,37 @@ } static PyObject* -xmlparser_feed(XMLParserObject* self, PyObject* args) +xmlparser_feed(XMLParserObject* self, PyObject* arg) { /* feed data to parser */ - char* data; - int data_len; - if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len)) - return NULL; - - return expat_parse(self, data, data_len, 0); + if (PyUnicode_Check(arg)) { + Py_ssize_t data_len; + const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len); + if (data == NULL) + return NULL; + if (data_len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + /* Explicitly set UTF-8 encoding. Return code ignored. */ + (void)EXPAT(SetEncoding)(self->parser, "utf-8"); + return expat_parse(self, data, (int)data_len, 0); + } + else { + Py_buffer view; + PyObject *res; + if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0) + return NULL; + if (view.len > INT_MAX) { + PyBuffer_Release(&view); + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + res = expat_parse(self, view.buf, (int)view.len, 0); + PyBuffer_Release(&view); + return res; + } } static PyObject* @@ -3523,7 +3544,7 @@ } static PyMethodDef xmlparser_methods[] = { - {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS}, + {"feed", (PyCFunction) xmlparser_feed, METH_O}, {"close", (PyCFunction) xmlparser_close, METH_VARARGS}, {"_parse_whole", (PyCFunction) xmlparser_parse_whole, METH_VARARGS}, {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS}, diff -r 9afdd88fe33a Modules/pyexpat.c --- a/Modules/pyexpat.c Wed May 22 15:35:35 2013 +0300 +++ b/Modules/pyexpat.c Wed May 22 16:41:31 2013 +0300 @@ -1937,6 +1937,7 @@ capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler; capi.SetUserData = XML_SetUserData; capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler; + capi.SetEncoding = XML_SetEncoding; /* export using capsule */ capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);