diff -r 5eefc85b8be8 Include/pyexpat.h
--- a/Include/pyexpat.h Mon Feb 25 17:21:42 2013 +0200
+++ b/Include/pyexpat.h Mon Feb 25 17:38:48 2013 +0200
@@ -45,6 +45,7 @@
void (*SetUserData)(XML_Parser parser, void *userData);
void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
XML_StartDoctypeDeclHandler start);
+ enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding);
/* always add new stuff to the end! */
};
diff -r 5eefc85b8be8 Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py Mon Feb 25 17:21:42 2013 +0200
+++ b/Lib/test/test_xml_etree.py Mon Feb 25 17:38:48 2013 +0200
@@ -1847,11 +1847,13 @@
class XMLParserTest(unittest.TestCase):
- sample1 = '22'
- sample2 = (''
- 'text')
+ sample1 = b'22'
+ sample2 = (b''
+ b'text')
+ sample3 = ('\n'
+ '$\xa3\u20ac\U0001017b')
def _check_sample_element(self, e):
self.assertEqual(e.tag, 'file')
@@ -1885,12 +1887,21 @@
_doctype = (name, pubid, system)
parser = MyParserWithDoctype()
- parser.feed(self.sample2)
+ with self.assertWarns(DeprecationWarning):
+ parser.feed(self.sample2)
parser.close()
self.assertEqual(_doctype,
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
+ def test_parse_string(self):
+ parser = ET.XMLParser(target=ET.TreeBuilder())
+ parser.feed(self.sample3)
+ e = parser.close()
+ self.assertEqual(e.tag, 'money')
+ self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
+ self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
+
class NamespaceParseTest(unittest.TestCase):
def test_find_with_namespace(self):
@@ -2312,6 +2323,7 @@
ElementFindTest,
ElementIterTest,
TreeBuilderTest,
+ XMLParserTest,
BugsTest,
]
diff -r 5eefc85b8be8 Modules/_elementtree.c
--- a/Modules/_elementtree.c Mon Feb 25 17:21:42 2013 +0200
+++ b/Modules/_elementtree.c Mon Feb 25 17:38:48 2013 +0200
@@ -3327,13 +3327,22 @@
Py_TYPE(self)->tp_free((PyObject *)self);
}
+#define MAX_CHUNK_SIZE (1 << 20)
+
LOCAL(PyObject*)
-expat_parse(XMLParserObject* self, char* data, int data_len, int final)
+expat_parse(XMLParserObject* self, const char* data, Py_ssize_t data_len, int final)
{
int ok;
+ while (data_len > MAX_CHUNK_SIZE) {
+ ok = EXPAT(Parse)(self->parser, data, MAX_CHUNK_SIZE, 0);
+ if (!ok)
+ goto done;
+ data += MAX_CHUNK_SIZE;
+ data_len -= MAX_CHUNK_SIZE;
+ }
ok = EXPAT(Parse)(self->parser, data, data_len, final);
-
+done:
if (PyErr_Occurred())
return NULL;
@@ -3374,16 +3383,28 @@
}
static PyObject*
-xmlparser_feed(XMLParserObject* self, PyObject* args)
+xmlparser_feed(XMLParserObject* self, PyObject* arg)
{
/* feed data to parser */
- char* data;
- int data_len;
- if (!PyArg_ParseTuple(args, "s#:feed", &data, &data_len))
- return NULL;
-
- return expat_parse(self, data, data_len, 0);
+ if (PyUnicode_Check(arg)) {
+ Py_ssize_t data_len;
+ const char *data = PyUnicode_AsUTF8AndSize(arg, &data_len);
+ if (data == NULL)
+ return NULL;
+ /* Explicitly set UTF-8 encoding. Return code ignored. */
+ (void)EXPAT(SetEncoding)(self->parser, "utf-8");
+ return expat_parse(self, data, data_len, 0);
+ }
+ else {
+ Py_buffer view;
+ PyObject *res;
+ if (PyObject_GetBuffer(arg, &view, PyBUF_SIMPLE) < 0)
+ return NULL;
+ res = expat_parse(self, view.buf, view.len, 0);
+ PyBuffer_Release(&view);
+ return res;
+ }
}
static PyObject*
@@ -3568,7 +3589,7 @@
}
static PyMethodDef xmlparser_methods[] = {
- {"feed", (PyCFunction) xmlparser_feed, METH_VARARGS},
+ {"feed", (PyCFunction) xmlparser_feed, METH_O},
{"close", (PyCFunction) xmlparser_close, METH_VARARGS},
{"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
{"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
diff -r 5eefc85b8be8 Modules/pyexpat.c
--- a/Modules/pyexpat.c Mon Feb 25 17:21:42 2013 +0200
+++ b/Modules/pyexpat.c Mon Feb 25 17:38:48 2013 +0200
@@ -1937,6 +1937,7 @@
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
capi.SetUserData = XML_SetUserData;
capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
+ capi.SetEncoding = XML_SetEncoding;
/* export using capsule */
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);