diff -r 9e65015582a5 Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py Fri Nov 20 18:33:33 2015 +0200 +++ b/Lib/xml/etree/ElementTree.py Sat Nov 21 12:53:38 2015 +0200 @@ -95,6 +95,7 @@ import sys import re import warnings import io +import collections import contextlib from . import ElementPath @@ -1202,7 +1203,30 @@ def iterparse(source, events=None, parse if not hasattr(source, "read"): source = open(source, "rb") close_source = True - return _IterParseIterator(source, events, parser, close_source) + + # Use the internal, undocumented _parser argument for now; When the + # parser argument of iterparse is removed, this can be killed. + pullparser = XMLPullParser(events=events, _parser=parser) + def iterator(): + while True: + yield from pullparser.read_events() + # load event buffer + data = source.read(16 * 1024) + if not data: + break + pullparser.feed(data) + root = pullparser._close_and_return_root() + yield from pullparser.read_events() + it.root = root + if close_source: + source.close() + + class IterParseIterator(collections.Iterator): + __next__ = iterator().__next__ + it = IterParseIterator() + it.root = None + del iterator, IterParseIterator + return it class XMLPullParser: @@ -1212,9 +1236,7 @@ class XMLPullParser: # upon in user code. It will be removed in a future release. # See http://bugs.python.org/issue17741 for more details. - # _elementtree.c expects a list, not a deque - self._events_queue = [] - self._index = 0 + self._events_queue = collections.deque() self._parser = _parser or XMLParser(target=TreeBuilder()) # wire up the parser for event reporting if events is None: @@ -1252,58 +1274,14 @@ class XMLPullParser: retrieved from the iterator. """ events = self._events_queue - while True: - index = self._index - try: - event = events[self._index] - # Avoid retaining references to past events - events[self._index] = None - except IndexError: - break - index += 1 - # Compact the list in a O(1) amortized fashion - # As noted above, _elementree.c needs a list, not a deque - if index * 2 >= len(events): - events[:index] = [] - self._index = 0 - else: - self._index = index + while events: + event = events.popleft() if isinstance(event, Exception): raise event else: yield event -class _IterParseIterator: - - def __init__(self, source, events, parser, close_source=False): - # Use the internal, undocumented _parser argument for now; When the - # parser argument of iterparse is removed, this can be killed. - self._parser = XMLPullParser(events=events, _parser=parser) - self._file = source - self._close_file = close_source - self.root = self._root = None - - def __next__(self): - while 1: - for event in self._parser.read_events(): - return event - if self._parser._parser is None: - self.root = self._root - if self._close_file: - self._file.close() - raise StopIteration - # load event buffer - data = self._file.read(16 * 1024) - if data: - self._parser.feed(data) - else: - self._root = self._parser._close_and_return_root() - - def __iter__(self): - return self - - def XML(text, parser=None): """Parse XML document from string constant. diff -r 9e65015582a5 Modules/_elementtree.c --- a/Modules/_elementtree.c Fri Nov 20 18:33:33 2015 +0200 +++ b/Modules/_elementtree.c Sat Nov 21 12:53:38 2015 +0200 @@ -2292,6 +2292,7 @@ typedef struct { /* element tracing */ PyObject *events; /* list of events, or NULL if not collecting */ + PyObject *events_append; /* the append method of the list of events or NULL */ PyObject *start_event_obj; /* event objects (NULL to ignore) */ PyObject *end_event_obj; PyObject *start_ns_event_obj; @@ -2327,6 +2328,7 @@ treebuilder_new(PyTypeObject *type, PyOb t->index = 0; t->events = NULL; + t->events_append = NULL; t->start_event_obj = t->end_event_obj = NULL; t->start_ns_event_obj = t->end_ns_event_obj = NULL; } @@ -2377,6 +2379,7 @@ treebuilder_gc_clear(TreeBuilderObject * Py_CLEAR(self->end_event_obj); Py_CLEAR(self->start_event_obj); Py_CLEAR(self->events); + Py_CLEAR(self->events_append); Py_CLEAR(self->stack); Py_CLEAR(self->data); Py_CLEAR(self->last); @@ -2452,6 +2455,28 @@ treebuilder_add_subelement(PyObject *ele } } +LOCAL(int) +treebuilder_append_event(TreeBuilderObject *self, PyObject *action, + PyObject *node) +{ + int err = 0; + if (action == NULL) + PyObject *res = PyTuple_Pack(2, action, node); + if (res == NULL) + return -1; + if (self->events_append != NULL) { + if (PyObject_CallFunctionObjArgs(self->events_append, res, NULL) == NULL) + err = -1; + } + else if (self->events != NULL) { + if (PyList_Append(self->events, res) < 0) + err = -1; + } + Py_DECREF(res); + } + return err; +} + /* -------------------------------------------------------------------- */ /* handlers */ @@ -2519,16 +2544,8 @@ treebuilder_handle_start(TreeBuilderObje Py_INCREF(node); self->last = node; - if (self->start_event_obj) { - PyObject* res; - PyObject* action = self->start_event_obj; - res = PyTuple_Pack(2, action, node); - if (res) { - PyList_Append(self->events, res); - Py_DECREF(res); - } else - PyErr_Clear(); /* FIXME: propagate error */ - } + if (treebuilder_append_event(self, self->start_event_obj, node) < 0) + goto error; return node; @@ -2608,17 +2625,9 @@ treebuilder_handle_end(TreeBuilderObject self->last = self->this; self->this = item; - if (self->end_event_obj) { - PyObject* res; - PyObject* action = self->end_event_obj; - PyObject* node = (PyObject*) self->last; - res = PyTuple_Pack(2, action, node); - if (res) { - PyList_Append(self->events, res); - Py_DECREF(res); - } else - PyErr_Clear(); /* FIXME: propagate error */ - } + if (treebuilder_append_event(self, self->end_event_obj, + (PyObject*) self->last) < 0) + return NULL; Py_INCREF(self->last); return (PyObject*) self->last; @@ -2628,42 +2637,24 @@ LOCAL(void) treebuilder_handle_namespace(TreeBuilderObject* self, int start, PyObject *prefix, PyObject *uri) { - PyObject* res; - PyObject* action; - PyObject* parcel; - if (!self->events) return; if (start) { + PyObject* parcel; if (!self->start_ns_event_obj) return; - action = self->start_ns_event_obj; parcel = Py_BuildValue("OO", prefix, uri); if (!parcel) return; - Py_INCREF(action); + if (treebuilder_append_event(self, self->start_ns_event_obj, + parcel) < 0) + PyErr_Clear(); /* FIXME: propagate error */ + Py_DECREF(parcel); } else { - if (!self->end_ns_event_obj) - return; - action = self->end_ns_event_obj; - Py_INCREF(action); - parcel = Py_None; - Py_INCREF(parcel); - } - - res = PyTuple_New(2); - - if (res) { - PyTuple_SET_ITEM(res, 0, action); - PyTuple_SET_ITEM(res, 1, parcel); - PyList_Append(self->events, res); - Py_DECREF(res); - } - else { - Py_DECREF(action); - Py_DECREF(parcel); - PyErr_Clear(); /* FIXME: propagate error */ + if (treebuilder_append_event(self, self->end_ns_event_obj, + Py_None) < 0) + PyErr_Clear(); /* FIXME: propagate error */ } } @@ -3602,7 +3593,7 @@ static PyObject * /*[clinic input] _elementtree.XMLParser._setevents - events_queue: object(subclass_of='&PyList_Type') + events_queue: object events_to_report: object = None / @@ -3612,7 +3603,7 @@ static PyObject * _elementtree_XMLParser__setevents_impl(XMLParserObject *self, PyObject *events_queue, PyObject *events_to_report) -/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/ +/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/ { /* activate element event reporting */ Py_ssize_t i, seqlen; @@ -3633,6 +3624,12 @@ static PyObject * Py_INCREF(events_queue); Py_XDECREF(target->events); target->events = events_queue; + Py_CLEAR(target->events_append); + if (!PyList_CheckExact(events_queue)) { + target->events_append = PyObject_GetAttrString(events_queue, "append"); + if (target->events_append == NULL) + return NULL; + } /* clear out existing events */ Py_CLEAR(target->start_event_obj); diff -r 9e65015582a5 Modules/clinic/_elementtree.c.h --- a/Modules/clinic/_elementtree.c.h Fri Nov 20 18:33:33 2015 +0200 +++ b/Modules/clinic/_elementtree.c.h Sat Nov 21 12:53:38 2015 +0200 @@ -668,12 +668,13 @@ static PyObject * PyObject *events_queue; PyObject *events_to_report = Py_None; - if (!PyArg_ParseTuple(args, "O!|O:_setevents", - &PyList_Type, &events_queue, &events_to_report)) + if (!PyArg_UnpackTuple(args, "_setevents", + 1, 2, + &events_queue, &events_to_report)) goto exit; return_value = _elementtree_XMLParser__setevents_impl(self, events_queue, events_to_report); exit: return return_value; } -/*[clinic end generated code: output=25b8bf7e7f2151ca input=a9049054013a1b77]*/ +/*[clinic end generated code: output=19d94e2d2726d3aa input=a9049054013a1b77]*/