Index: Lib/test/test_xml_etree_c.py
===================================================================
--- Lib/test/test_xml_etree_c.py (revision 76687)
+++ Lib/test/test_xml_etree_c.py (working copy)
@@ -38,7 +38,7 @@
if not callable(method):
print method, "not callable"
-def serialize(ET, elem, encoding=None):
+def serialize(ET, elem, encoding=None, getvalue=True):
import StringIO
file = StringIO.StringIO()
tree = ET.ElementTree(elem)
@@ -46,7 +46,11 @@
tree.write(file, encoding)
else:
tree.write(file)
- return file.getvalue()
+ if getvalue:
+ return file.getvalue()
+ else:
+ file.seek(0)
+ return file
def summarize(elem):
return elem.tag
@@ -77,6 +81,12 @@
>>> check_method(element.items)
>>> check_method(element.getiterator)
+ These methods return an iterable. See bug 6472.
+
+ >>> check_method(element.getiterator("tag").next)
+ >>> check_method(element.findall("tag").next)
+ >>> check_method(element.findall("*").next)
+
Basic method sanity checks.
>>> serialize(ET, element) # 1
@@ -99,6 +109,19 @@
ValueError: list.remove(x): x not in list
>>> serialize(ET, element) # 6
''
+ >>> element[0:0] = [subelement, subelement]
+ >>> serialize(ET, element[1])
+ ''
+ >>> assert element[:1] == [element[0]]
+ >>> del element[1:2]
+ >>> serialize(ET, element)
+ ''
+
+ Method iterparse should return an iterator. See bug 6472.
+
+ >>> next(ET.iterparse(serialize(ET, element, getvalue=False)))
+ ... # doctest: +ELLIPSIS
+ ('end', )
"""
def find():
Index: Lib/test/test_xml_etree.py
===================================================================
--- Lib/test/test_xml_etree.py (revision 76687)
+++ Lib/test/test_xml_etree.py (working copy)
@@ -40,7 +40,7 @@
if not callable(method):
print method, "not callable"
-def serialize(ET, elem, encoding=None):
+def serialize(ET, elem, encoding=None, getvalue=True):
import StringIO
file = StringIO.StringIO()
tree = ET.ElementTree(elem)
@@ -48,7 +48,11 @@
tree.write(file, encoding)
else:
tree.write(file)
- return file.getvalue()
+ if getvalue:
+ return file.getvalue()
+ else:
+ file.seek(0)
+ return file
def summarize(elem):
return elem.tag
@@ -81,6 +85,12 @@
>>> check_method(element.items)
>>> check_method(element.getiterator)
+ These methods return an iterable. See bug 6472.
+
+ >>> check_method(element.getiterator("tag").next)
+ >>> check_method(element.findall("tag").next)
+ >>> check_method(element.findall("*").next)
+
Basic method sanity checks.
>>> serialize(ET, element) # 1
@@ -103,6 +113,19 @@
ValueError: list.remove(x): x not in list
>>> serialize(ET, element) # 6
''
+ >>> element[0:0] = [subelement, subelement]
+ >>> serialize(ET, element[1])
+ ''
+ >>> assert element[:1] == [element[0]]
+ >>> del element[1:2]
+ >>> serialize(ET, element)
+ ''
+
+ Method iterparse should return an iterator. See bug 6472.
+
+ >>> next(ET.iterparse(serialize(ET, element, getvalue=False)))
+ ... # doctest: +ELLIPSIS
+ ('end', )
"""
def find():
Index: Lib/xml/etree/ElementTree.py
===================================================================
--- Lib/xml/etree/ElementTree.py (revision 76687)
+++ Lib/xml/etree/ElementTree.py (working copy)
@@ -232,7 +232,11 @@
# @exception AssertionError If element is not a valid object.
def __setitem__(self, index, element):
- assert iselement(element)
+ if isinstance(index, slice):
+ for elt in element:
+ assert iselement(elt)
+ else:
+ assert iselement(element)
self._children[index] = element
##
@@ -245,38 +249,6 @@
del self._children[index]
##
- # Returns a list containing subelements in the given range.
- #
- # @param start The first subelement to return.
- # @param stop The first subelement that shouldn't be returned.
- # @return A sequence object containing subelements.
-
- def __getslice__(self, start, stop):
- return self._children[start:stop]
-
- ##
- # Replaces a number of subelements with elements from a sequence.
- #
- # @param start The first subelement to replace.
- # @param stop The first subelement that shouldn't be replaced.
- # @param elements A sequence object with zero or more elements.
- # @exception AssertionError If a sequence member is not a valid object.
-
- def __setslice__(self, start, stop, elements):
- for element in elements:
- assert iselement(element)
- self._children[start:stop] = list(elements)
-
- ##
- # Deletes a number of subelements.
- #
- # @param start The first subelement to delete.
- # @param stop The first subelement to leave in there.
-
- def __delslice__(self, start, stop):
- del self._children[start:stop]
-
- ##
# Adds a subelement to the end of this element.
#
# @param element The element to add.
@@ -417,14 +389,13 @@
# @defreturn list or iterator
def getiterator(self, tag=None):
- nodes = []
if tag == "*":
tag = None
if tag is None or self.tag == tag:
- nodes.append(self)
+ yield self
for node in self._children:
- nodes.extend(node.getiterator(tag))
- return nodes
+ for element in node.getiterator(tag):
+ yield element
# compatibility
_Element = _ElementInterface
Index: Lib/xml/etree/ElementPath.py
===================================================================
--- Lib/xml/etree/ElementPath.py (revision 76687)
+++ Lib/xml/etree/ElementPath.py (working copy)
@@ -94,8 +94,6 @@
)
if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
raise SyntaxError("path cannot end with //")
- if len(self.path) == 1 and isinstance(self.path[0], type("")):
- self.tag = self.path[0]
##
# Find first matching object.
@@ -104,9 +102,7 @@
tag = self.tag
if tag is None:
nodeset = self.findall(element)
- if not nodeset:
- return None
- return nodeset[0]
+ return next(nodeset, None)
for elem in element:
if elem.tag == tag:
return elem
@@ -119,9 +115,10 @@
tag = self.tag
if tag is None:
nodeset = self.findall(element)
- if not nodeset:
+ try:
+ return next(nodeset).text or ""
+ except StopIteration:
return default
- return nodeset[0].text or ""
for elem in element:
if elem.tag == tag:
return elem.text or ""
@@ -130,39 +127,34 @@
##
# Find all matching objects.
- def findall(self, element):
- nodeset = [element]
- index = 0
- while 1:
- try:
- path = self.path[index]
- index = index + 1
- except IndexError:
- return nodeset
- set = []
+ def _findall(self, element, index=0):
+ try:
+ path = self.path[index]
+ index += 1
+ except IndexError:
+ yield element
+ else:
if isinstance(path, xpath_descendant_or_self):
try:
tag = self.path[index]
- if not isinstance(tag, type("")):
- tag = None
+ if isinstance(tag, type("")):
+ index += 1
else:
- index = index + 1
+ tag = None
except IndexError:
- tag = None # invalid path
- for node in nodeset:
- new = list(node.getiterator(tag))
- if new and new[0] is node:
- set.extend(new[1:])
- else:
- set.extend(new)
+ tag = None # invalid path
+ nodes = element.getiterator(tag)
+ if next(nodes, element) is not element:
+ nodes = element.getiterator(tag)
else:
- for node in nodeset:
- for node in node:
- if path == "*" or node.tag == path:
- set.append(node)
- if not set:
- return []
- nodeset = set
+ nodes = (node for node in element if path in ('*', node.tag))
+
+ for node in nodes:
+ for match in self._findall(node, index):
+ yield match
+
+ def findall(self, element):
+ return self._findall(element)
_cache = {}
Index: Modules/_elementtree.c
===================================================================
--- Modules/_elementtree.c (revision 76687)
+++ Modules/_elementtree.c (working copy)
@@ -831,18 +831,24 @@
static PyObject*
element_findall(ElementObject* self, PyObject* args)
{
- int i;
- PyObject* out;
+ /* int i;
+ PyObject* out; */
PyObject* tag;
if (!PyArg_ParseTuple(args, "O:findall", &tag))
return NULL;
- if (checkpath(tag))
+ /* Use the Python implementation without condition.
+ The method will return an iterator in all cases.
+ See bug 6472. */
+
+ /* if (checkpath(tag)) */
+
return PyObject_CallMethod(
elementpath_obj, "findall", "OO", self, tag
);
+ /*
out = PyList_New(0);
if (!out)
return NULL;
@@ -862,6 +868,7 @@
}
return out;
+ */
}
static PyObject*
@@ -960,39 +967,6 @@
}
static PyObject*
-element_getslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end)
-{
- ElementObject* self = (ElementObject*) self_;
- Py_ssize_t i;
- PyObject* list;
-
- if (!self->extra)
- return PyList_New(0);
-
- /* standard clamping */
- if (start < 0)
- start = 0;
- if (end < 0)
- end = 0;
- if (end > self->extra->length)
- end = self->extra->length;
- if (start > end)
- start = end;
-
- list = PyList_New(end - start);
- if (!list)
- return NULL;
-
- for (i = start; i < end; i++) {
- PyObject* item = self->extra->children[i];
- Py_INCREF(item);
- PyList_SET_ITEM(list, i - start, item);
- }
-
- return list;
-}
-
-static PyObject*
element_insert(ElementObject* self, PyObject* args)
{
int i;
@@ -1188,77 +1162,6 @@
}
static int
-element_setslice(PyObject* self_, Py_ssize_t start, Py_ssize_t end, PyObject* item)
-{
- ElementObject* self = (ElementObject*) self_;
- Py_ssize_t i, new, old;
- PyObject* recycle = NULL;
-
- if (!self->extra)
- element_new_extra(self, NULL);
-
- /* standard clamping */
- if (start < 0)
- start = 0;
- if (end < 0)
- end = 0;
- if (end > self->extra->length)
- end = self->extra->length;
- if (start > end)
- start = end;
-
- old = end - start;
-
- if (item == NULL)
- new = 0;
- else if (PyList_CheckExact(item)) {
- new = PyList_GET_SIZE(item);
- } else {
- /* FIXME: support arbitrary sequences? */
- PyErr_Format(
- PyExc_TypeError,
- "expected list, not \"%.200s\"", Py_TYPE(item)->tp_name
- );
- return -1;
- }
-
- if (old > 0) {
- /* to avoid recursive calls to this method (via decref), move
- old items to the recycle bin here, and get rid of them when
- we're done modifying the element */
- recycle = PyList_New(old);
- for (i = 0; i < old; i++)
- PyList_SET_ITEM(recycle, i, self->extra->children[i + start]);
- }
-
- if (new < old) {
- /* delete slice */
- for (i = end; i < self->extra->length; i++)
- self->extra->children[i + new - old] = self->extra->children[i];
- } else if (new > old) {
- /* insert slice */
- if (element_resize(self, new - old) < 0)
- return -1;
- for (i = self->extra->length-1; i >= end; i--)
- self->extra->children[i + new - old] = self->extra->children[i];
- }
-
- /* replace the slice */
- for (i = 0; i < new; i++) {
- PyObject* element = PyList_GET_ITEM(item, i);
- Py_INCREF(element);
- self->extra->children[i + start] = element;
- }
-
- self->extra->length += new - old;
-
- /* discard the recycle bin, and everything in it */
- Py_XDECREF(recycle);
-
- return 0;
-}
-
-static int
element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
{
ElementObject* self = (ElementObject*) self_;
@@ -1288,6 +1191,153 @@
return 0;
}
+static PyObject*
+element_subscr(PyObject* self_, PyObject* item)
+{
+ ElementObject* self = (ElementObject*) self_;
+
+ if (PyIndex_Check(item)) {
+ Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+ if (i==-1 && PyErr_Occurred()) {
+ return NULL;
+ }
+ if (i < 0 && self->extra)
+ i += self->extra->length;
+ return element_getitem(self_, i);
+ }
+ else if (PySlice_Check(item)) {
+ Py_ssize_t start, stop, step, slicelen, cur, i;
+ PyObject* list;
+
+ if (!self->extra)
+ return PyList_New(0);
+
+ if (PySlice_GetIndicesEx((PySliceObject *)item,
+ self->extra->length,
+ &start, &stop, &step, &slicelen) < 0) {
+ return NULL;
+ }
+
+ if (slicelen <= 0)
+ return PyList_New(0);
+ else {
+ list = PyList_New(slicelen);
+ if (!list)
+ return NULL;
+
+ for (cur = start, i = 0; i < slicelen;
+ cur += step, i++) {
+ PyObject* item = self->extra->children[cur];
+ Py_INCREF(item);
+ PyList_SET_ITEM(list, i, item);
+ }
+
+ return list;
+ }
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "element indices must be integers");
+ return NULL;
+ }
+}
+
+static int
+element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
+{
+ ElementObject* self = (ElementObject*) self_;
+
+ if (PyIndex_Check(item)) {
+ Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+
+ if (i==-1 && PyErr_Occurred()) {
+ return -1;
+ }
+ if (i < 0 && self->extra)
+ i += self->extra->length;
+ return element_setitem(self_, i, value);
+ }
+ else if (PySlice_Check(item)) {
+ Py_ssize_t start, stop, step, slicelen, newlen, cur, i;
+
+ PyObject* recycle = NULL;
+
+ if (PySlice_GetIndicesEx((PySliceObject *)item,
+ self->extra->length,
+ &start, &stop, &step, &slicelen) < 0) {
+ return -1;
+ }
+
+ if (!self->extra)
+ element_new_extra(self, NULL);
+
+ if (value == NULL)
+ newlen = 0;
+ else if (PyList_CheckExact(value)) {
+ newlen = PyList_GET_SIZE(value);
+ } else {
+ /* FIXME: support arbitrary sequences? */
+ PyErr_Format(
+ PyExc_TypeError,
+ "expected list, not \"%.200s\"", Py_TYPE(value)->tp_name
+ );
+ return -1;
+ }
+
+ if (step != 1 && newlen != slicelen)
+ {
+ PyErr_Format(PyExc_ValueError,
+ "attempt to assign sequence of size %zd "
+ "to extended slice of size %zd",
+ newlen, slicelen
+ );
+ return -1;
+ }
+
+ if (slicelen > 0) {
+ /* to avoid recursive calls to this method (via decref), move
+ old items to the recycle bin here, and get rid of them when
+ we're done modifying the element */
+ recycle = PyList_New(slicelen);
+ for (cur = start, i = 0; i < slicelen;
+ cur += step, i++)
+ PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
+ }
+
+ if (newlen < slicelen) {
+ /* delete slice */
+ for (i = stop; i < self->extra->length; i++)
+ self->extra->children[i + newlen - slicelen] = self->extra->children[i];
+ } else if (newlen > slicelen) {
+ /* insert slice */
+ if (element_resize(self, newlen - slicelen) < 0)
+ return -1;
+ for (i = self->extra->length-1; i >= stop; i--)
+ self->extra->children[i + newlen - slicelen] = self->extra->children[i];
+ }
+
+ /* replace the slice */
+ for (cur = start, i = 0; i < newlen;
+ cur += step, i++) {
+ PyObject* element = PyList_GET_ITEM(value, i);
+ Py_INCREF(element);
+ self->extra->children[cur] = element;
+ }
+
+ self->extra->length += newlen - slicelen;
+
+ /* discard the recycle bin, and everything in it */
+ Py_XDECREF(recycle);
+
+ return 0;
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "element indices must be integers");
+ return -1;
+ }
+}
+
static PyMethodDef element_methods[] = {
{"clear", (PyCFunction) element_clear, METH_VARARGS},
@@ -1399,14 +1449,20 @@
return 0;
}
+static PyMappingMethods element_as_mapping = {
+ (lenfunc)element_length,
+ (binaryfunc)element_subscr,
+ (objobjargproc)element_ass_subscr
+};
+
static PySequenceMethods element_as_sequence = {
- (lenfunc) element_length,
+ (lenfunc)element_length,
0, /* sq_concat */
0, /* sq_repeat */
element_getitem,
- element_getslice,
+ 0,
element_setitem,
- element_setslice,
+ 0,
};
statichere PyTypeObject Element_Type = {
@@ -1421,6 +1477,7 @@
(reprfunc)element_repr, /* tp_repr */
0, /* tp_as_number */
&element_as_sequence, /* tp_as_sequence */
+ &element_as_mapping, /* tp_as_mapping */
};
/* ==================================================================== */
@@ -2724,30 +2781,40 @@
"if hasattr(ET, 'iterparse'):\n"
" cElementTree.iterparse = ET.iterparse\n" /* delegate on 2.1 */
#else
- "class iterparse(object):\n"
+ "class iterparse:\n"
" root = None\n"
" def __init__(self, file, events=None):\n"
" if not hasattr(file, 'read'):\n"
" file = open(file, 'rb')\n"
" self._file = file\n"
- " self._events = events\n"
- " def __iter__(self):\n"
- " events = []\n"
+ " self._events = []\n"
+ " self._index = 0\n"
+ " self.root = self._root = None\n"
" b = cElementTree.TreeBuilder()\n"
- " p = cElementTree.XMLParser(b)\n"
- " p._setevents(events, self._events)\n"
+ " self._parser = cElementTree.XMLParser(b)\n"
+ " self._parser._setevents(self._events, events)\n"
+ " def next(self):\n"
" while 1:\n"
- " data = self._file.read(16384)\n"
- " if not data:\n"
- " break\n"
- " p.feed(data)\n"
- " for event in events:\n"
- " yield event\n"
- " del events[:]\n"
- " root = p.close()\n"
- " for event in events:\n"
- " yield event\n"
- " self.root = root\n"
+ " try:\n"
+ " item = self._events[self._index]\n"
+ " except IndexError:\n"
+ " if self._parser is None:\n"
+ " self.root = self._root\n"
+ " raise StopIteration\n"
+ " # load event buffer\n"
+ " del self._events[:]\n"
+ " self._index = 0\n"
+ " data = self._file.read(16384)\n"
+ " if data:\n"
+ " self._parser.feed(data)\n"
+ " else:\n"
+ " self._root = self._parser.close()\n"
+ " self._parser = None\n"
+ " else:\n"
+ " self._index = self._index + 1\n"
+ " return item\n"
+ " def __iter__(self):\n"
+ " return self\n"
"cElementTree.iterparse = iterparse\n"
#endif