Index: Lib/test/test_xml_etree_c.py =================================================================== --- Lib/test/test_xml_etree_c.py (revision 76687) +++ Lib/test/test_xml_etree_c.py (working copy) @@ -38,7 +38,7 @@ if not callable(method): print method, "not callable" -def serialize(ET, elem, encoding=None): +def serialize(ET, elem, encoding=None, getvalue=True): import StringIO file = StringIO.StringIO() tree = ET.ElementTree(elem) @@ -46,7 +46,11 @@ tree.write(file, encoding) else: tree.write(file) - return file.getvalue() + if getvalue: + return file.getvalue() + else: + file.seek(0) + return file def summarize(elem): return elem.tag @@ -77,6 +81,12 @@ >>> check_method(element.items) >>> check_method(element.getiterator) + These methods return an iterable. See bug 6472. + + >>> check_method(element.getiterator("tag").next) + >>> check_method(element.findall("tag").next) + >>> check_method(element.findall("*").next) + Basic method sanity checks. >>> serialize(ET, element) # 1 @@ -99,6 +109,19 @@ ValueError: list.remove(x): x not in list >>> serialize(ET, element) # 6 '' + >>> element[0:0] = [subelement, subelement] + >>> serialize(ET, element[1]) + '' + >>> assert element[:1] == [element[0]] + >>> del element[1:2] + >>> serialize(ET, element) + '' + + Method iterparse should return an iterator. See bug 6472. + + >>> next(ET.iterparse(serialize(ET, element, getvalue=False))) + ... # doctest: +ELLIPSIS + ('end', ) """ def find(): Index: Lib/test/test_xml_etree.py =================================================================== --- Lib/test/test_xml_etree.py (revision 76687) +++ Lib/test/test_xml_etree.py (working copy) @@ -40,7 +40,7 @@ if not callable(method): print method, "not callable" -def serialize(ET, elem, encoding=None): +def serialize(ET, elem, encoding=None, getvalue=True): import StringIO file = StringIO.StringIO() tree = ET.ElementTree(elem) @@ -48,7 +48,11 @@ tree.write(file, encoding) else: tree.write(file) - return file.getvalue() + if getvalue: + return file.getvalue() + else: + file.seek(0) + return file def summarize(elem): return elem.tag @@ -81,6 +85,12 @@ >>> check_method(element.items) >>> check_method(element.getiterator) + These methods return an iterable. See bug 6472. + + >>> check_method(element.getiterator("tag").next) + >>> check_method(element.findall("tag").next) + >>> check_method(element.findall("*").next) + Basic method sanity checks. >>> serialize(ET, element) # 1 @@ -103,6 +113,19 @@ ValueError: list.remove(x): x not in list >>> serialize(ET, element) # 6 '' + >>> element[0:0] = [subelement, subelement] + >>> serialize(ET, element[1]) + '' + >>> assert element[:1] == [element[0]] + >>> del element[1:2] + >>> serialize(ET, element) + '' + + Method iterparse should return an iterator. See bug 6472. + + >>> next(ET.iterparse(serialize(ET, element, getvalue=False))) + ... # doctest: +ELLIPSIS + ('end', ) """ def find(): Index: Lib/xml/etree/ElementTree.py =================================================================== --- Lib/xml/etree/ElementTree.py (revision 76687) +++ Lib/xml/etree/ElementTree.py (working copy) @@ -232,7 +232,11 @@ # @exception AssertionError If element is not a valid object. def __setitem__(self, index, element): - assert iselement(element) + if isinstance(index, slice): + for elt in element: + assert iselement(elt) + else: + assert iselement(element) self._children[index] = element ## @@ -245,38 +249,6 @@ del self._children[index] ## - # Returns a list containing subelements in the given range. - # - # @param start The first subelement to return. - # @param stop The first subelement that shouldn't be returned. - # @return A sequence object containing subelements. - - def __getslice__(self, start, stop): - return self._children[start:stop] - - ## - # Replaces a number of subelements with elements from a sequence. - # - # @param start The first subelement to replace. - # @param stop The first subelement that shouldn't be replaced. - # @param elements A sequence object with zero or more elements. - # @exception AssertionError If a sequence member is not a valid object. - - def __setslice__(self, start, stop, elements): - for element in elements: - assert iselement(element) - self._children[start:stop] = list(elements) - - ## - # Deletes a number of subelements. - # - # @param start The first subelement to delete. - # @param stop The first subelement to leave in there. - - def __delslice__(self, start, stop): - del self._children[start:stop] - - ## # Adds a subelement to the end of this element. # # @param element The element to add. @@ -417,14 +389,13 @@ # @defreturn list or iterator def getiterator(self, tag=None): - nodes = [] if tag == "*": tag = None if tag is None or self.tag == tag: - nodes.append(self) + yield self for node in self._children: - nodes.extend(node.getiterator(tag)) - return nodes + for element in node.getiterator(tag): + yield element # compatibility _Element = _ElementInterface Index: Lib/xml/etree/ElementPath.py =================================================================== --- Lib/xml/etree/ElementPath.py (revision 76687) +++ Lib/xml/etree/ElementPath.py (working copy) @@ -94,8 +94,6 @@ ) if self.path and isinstance(self.path[-1], xpath_descendant_or_self): raise SyntaxError("path cannot end with //") - if len(self.path) == 1 and isinstance(self.path[0], type("")): - self.tag = self.path[0] ## # Find first matching object. @@ -104,9 +102,7 @@ tag = self.tag if tag is None: nodeset = self.findall(element) - if not nodeset: - return None - return nodeset[0] + return next(nodeset, None) for elem in element: if elem.tag == tag: return elem @@ -119,9 +115,10 @@ tag = self.tag if tag is None: nodeset = self.findall(element) - if not nodeset: + try: + return next(nodeset).text or "" + except StopIteration: return default - return nodeset[0].text or "" for elem in element: if elem.tag == tag: return elem.text or "" @@ -130,39 +127,34 @@ ## # Find all matching objects. - def findall(self, element): - nodeset = [element] - index = 0 - while 1: - try: - path = self.path[index] - index = index + 1 - except IndexError: - return nodeset - set = [] + def _findall(self, element, index=0): + try: + path = self.path[index] + index += 1 + except IndexError: + yield element + else: if isinstance(path, xpath_descendant_or_self): try: tag = self.path[index] - if not isinstance(tag, type("")): - tag = None + if isinstance(tag, type("")): + index += 1 else: - index = index + 1 + tag = None except IndexError: - tag = None # invalid path - for node in nodeset: - new = list(node.getiterator(tag)) - if new and new[0] is node: - set.extend(new[1:]) - else: - set.extend(new) + tag = None # invalid path + nodes = element.getiterator(tag) + if next(nodes, element) is not element: + nodes = element.getiterator(tag) else: - for node in nodeset: - for node in node: - if path == "*" or node.tag == path: - set.append(node) - if not set: - return [] - nodeset = set + nodes = (node for node in element if path in ('*', node.tag)) + + for node in nodes: + for match in self._findall(node, index): + yield match + + def findall(self, element): + return self._findall(element) _cache = {} Index: Modules/_elementtree.c =================================================================== --- Modules/_elementtree.c (revision 76687) +++ Modules/_elementtree.c (working copy) @@ -831,18 +831,24 @@ static PyObject* element_findall(ElementObject* self, PyObject* args) { - int i; - PyObject* out; + /* int i; + PyObject* out; */ PyObject* tag; if (!PyArg_ParseTuple(args, "O:findall", &tag)) return NULL; - if (checkpath(tag)) + /* Use the Python implementation without condition. + The method will return an iterator in all cases. + See bug 6472. */ + + /* if (checkpath(tag)) */ + return PyObject_CallMethod( elementpath_obj, "findall", "OO", self, tag ); + /* out = PyList_New(0); if (!out) return NULL; @@ -862,6 +868,7 @@ } return out; + */ } static PyObject* @@ -960,39 +967,6 @@ } static PyObject* -element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end) -{ - ElementObject* self = (ElementObject*) self_; - Py_ssize_t i; - PyObject* list; - - if (!self->extra) - return PyList_New(0); - - /* standard clamping */ - if (start < 0) - start = 0; - if (end < 0) - end = 0; - if (end > self->extra->length) - end = self->extra->length; - if (start > end) - start = end; - - list = PyList_New(end - start); - if (!list) - return NULL; - - for (i = start; i < end; i++) { - PyObject* item = self->extra->children[i]; - Py_INCREF(item); - PyList_SET_ITEM(list, i - start, item); - } - - return list; -} - -static PyObject* element_insert(ElementObject* self, PyObject* args) { int i; @@ -1188,77 +1162,6 @@ } static int -element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item) -{ - ElementObject* self = (ElementObject*) self_; - Py_ssize_t i, new, old; - PyObject* recycle = NULL; - - if (!self->extra) - element_new_extra(self, NULL); - - /* standard clamping */ - if (start < 0) - start = 0; - if (end < 0) - end = 0; - if (end > self->extra->length) - end = self->extra->length; - if (start > end) - start = end; - - old = end - start; - - if (item == NULL) - new = 0; - else if (PyList_CheckExact(item)) { - new = PyList_GET_SIZE(item); - } else { - /* FIXME: support arbitrary sequences? */ - PyErr_Format( - PyExc_TypeError, - "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name - ); - return -1; - } - - if (old > 0) { - /* to avoid recursive calls to this method (via decref), move - old items to the recycle bin here, and get rid of them when - we're done modifying the element */ - recycle = PyList_New(old); - for (i = 0; i < old; i++) - PyList_SET_ITEM(recycle, i, self->extra->children[i + start]); - } - - if (new < old) { - /* delete slice */ - for (i = end; i < self->extra->length; i++) - self->extra->children[i + new - old] = self->extra->children[i]; - } else if (new > old) { - /* insert slice */ - if (element_resize(self, new - old) < 0) - return -1; - for (i = self->extra->length-1; i >= end; i--) - self->extra->children[i + new - old] = self->extra->children[i]; - } - - /* replace the slice */ - for (i = 0; i < new; i++) { - PyObject* element = PyList_GET_ITEM(item, i); - Py_INCREF(element); - self->extra->children[i + start] = element; - } - - self->extra->length += new - old; - - /* discard the recycle bin, and everything in it */ - Py_XDECREF(recycle); - - return 0; -} - -static int element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) { ElementObject* self = (ElementObject*) self_; @@ -1288,6 +1191,153 @@ return 0; } +static PyObject* +element_subscr(PyObject* self_, PyObject* item) +{ + ElementObject* self = (ElementObject*) self_; + + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i==-1 && PyErr_Occurred()) { + return NULL; + } + if (i < 0 && self->extra) + i += self->extra->length; + return element_getitem(self_, i); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelen, cur, i; + PyObject* list; + + if (!self->extra) + return PyList_New(0); + + if (PySlice_GetIndicesEx((PySliceObject *)item, + self->extra->length, + &start, &stop, &step, &slicelen) < 0) { + return NULL; + } + + if (slicelen <= 0) + return PyList_New(0); + else { + list = PyList_New(slicelen); + if (!list) + return NULL; + + for (cur = start, i = 0; i < slicelen; + cur += step, i++) { + PyObject* item = self->extra->children[cur]; + Py_INCREF(item); + PyList_SET_ITEM(list, i, item); + } + + return list; + } + } + else { + PyErr_SetString(PyExc_TypeError, + "element indices must be integers"); + return NULL; + } +} + +static int +element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) +{ + ElementObject* self = (ElementObject*) self_; + + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + + if (i==-1 && PyErr_Occurred()) { + return -1; + } + if (i < 0 && self->extra) + i += self->extra->length; + return element_setitem(self_, i, value); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelen, newlen, cur, i; + + PyObject* recycle = NULL; + + if (PySlice_GetIndicesEx((PySliceObject *)item, + self->extra->length, + &start, &stop, &step, &slicelen) < 0) { + return -1; + } + + if (!self->extra) + element_new_extra(self, NULL); + + if (value == NULL) + newlen = 0; + else if (PyList_CheckExact(value)) { + newlen = PyList_GET_SIZE(value); + } else { + /* FIXME: support arbitrary sequences? */ + PyErr_Format( + PyExc_TypeError, + "expected list, not \"%.200s\"", Py_TYPE(value)->tp_name + ); + return -1; + } + + if (step != 1 && newlen != slicelen) + { + PyErr_Format(PyExc_ValueError, + "attempt to assign sequence of size %zd " + "to extended slice of size %zd", + newlen, slicelen + ); + return -1; + } + + if (slicelen > 0) { + /* to avoid recursive calls to this method (via decref), move + old items to the recycle bin here, and get rid of them when + we're done modifying the element */ + recycle = PyList_New(slicelen); + for (cur = start, i = 0; i < slicelen; + cur += step, i++) + PyList_SET_ITEM(recycle, i, self->extra->children[cur]); + } + + if (newlen < slicelen) { + /* delete slice */ + for (i = stop; i < self->extra->length; i++) + self->extra->children[i + newlen - slicelen] = self->extra->children[i]; + } else if (newlen > slicelen) { + /* insert slice */ + if (element_resize(self, newlen - slicelen) < 0) + return -1; + for (i = self->extra->length-1; i >= stop; i--) + self->extra->children[i + newlen - slicelen] = self->extra->children[i]; + } + + /* replace the slice */ + for (cur = start, i = 0; i < newlen; + cur += step, i++) { + PyObject* element = PyList_GET_ITEM(value, i); + Py_INCREF(element); + self->extra->children[cur] = element; + } + + self->extra->length += newlen - slicelen; + + /* discard the recycle bin, and everything in it */ + Py_XDECREF(recycle); + + return 0; + } + else { + PyErr_SetString(PyExc_TypeError, + "element indices must be integers"); + return -1; + } +} + static PyMethodDef element_methods[] = { {"clear", (PyCFunction) element_clear, METH_VARARGS}, @@ -1399,14 +1449,20 @@ return 0; } +static PyMappingMethods element_as_mapping = { + (lenfunc)element_length, + (binaryfunc)element_subscr, + (objobjargproc)element_ass_subscr +}; + static PySequenceMethods element_as_sequence = { - (lenfunc) element_length, + (lenfunc)element_length, 0, /* sq_concat */ 0, /* sq_repeat */ element_getitem, - element_getslice, + 0, element_setitem, - element_setslice, + 0, }; statichere PyTypeObject Element_Type = { @@ -1421,6 +1477,7 @@ (reprfunc)element_repr, /* tp_repr */ 0, /* tp_as_number */ &element_as_sequence, /* tp_as_sequence */ + &element_as_mapping, /* tp_as_mapping */ }; /* ==================================================================== */ @@ -2724,30 +2781,40 @@ "if hasattr(ET, 'iterparse'):\n" " cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */ #else - "class iterparse(object):\n" + "class iterparse:\n" " root = None\n" " def __init__(self, file, events=None):\n" " if not hasattr(file, 'read'):\n" " file = open(file, 'rb')\n" " self._file = file\n" - " self._events = events\n" - " def __iter__(self):\n" - " events = []\n" + " self._events = []\n" + " self._index = 0\n" + " self.root = self._root = None\n" " b = cElementTree.TreeBuilder()\n" - " p = cElementTree.XMLParser(b)\n" - " p._setevents(events, self._events)\n" + " self._parser = cElementTree.XMLParser(b)\n" + " self._parser._setevents(self._events, events)\n" + " def next(self):\n" " while 1:\n" - " data = self._file.read(16384)\n" - " if not data:\n" - " break\n" - " p.feed(data)\n" - " for event in events:\n" - " yield event\n" - " del events[:]\n" - " root = p.close()\n" - " for event in events:\n" - " yield event\n" - " self.root = root\n" + " try:\n" + " item = self._events[self._index]\n" + " except IndexError:\n" + " if self._parser is None:\n" + " self.root = self._root\n" + " raise StopIteration\n" + " # load event buffer\n" + " del self._events[:]\n" + " self._index = 0\n" + " data = self._file.read(16384)\n" + " if data:\n" + " self._parser.feed(data)\n" + " else:\n" + " self._root = self._parser.close()\n" + " self._parser = None\n" + " else:\n" + " self._index = self._index + 1\n" + " return item\n" + " def __iter__(self):\n" + " return self\n" "cElementTree.iterparse = iterparse\n" #endif