diff -r 08c215115842 Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py Sun Dec 30 06:29:49 2012 -0800 +++ b/Lib/test/test_xml_etree.py Sun Dec 30 23:48:50 2012 +0000 @@ -16,11 +16,13 @@ import html import io +import operator import pickle import sys import unittest import weakref +from itertools import product from test import support from test.support import TESTFN, findfile, unlink, import_fresh_module, gc_collect @@ -171,6 +173,38 @@ def check_element(element): for elem in element: check_element(elem) +class ElementTestCase(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.modules = {pyET, ET} + + def pickleRoundTrip(self, obj, name, dumper, loader): + save_m = sys.modules[name] + try: + sys.modules[name] = dumper + temp = pickle.dumps(obj) + sys.modules[name] = loader + result = pickle.loads(temp) + except pickle.PicklingError as pe: + # pyET must be second, because pyET may be (equal to) ET. + human = dict([(ET, "cET"), (pyET, "pyET")]) + raise support.TestFailed("Failed to round-trip %r from %r to %r" + % (obj, + human.get(dumper, dumper), + human.get(loader, loader))) from pe + finally: + sys.modules[name] = save_m + return result + + def assertEqualElements(self, alice, bob): + self.assertIsInstance(alice, (ET.Element, pyET.Element)) + self.assertIsInstance(bob, (ET.Element, pyET.Element)) + self.assertEqual(len(list(alice)), len(list(bob))) + for x, y in zip(alice, bob): + self.assertEqualElements(x, y) + properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') + self.assertEqual(properties(alice), properties(bob)) + # -------------------------------------------------------------------- # element tree tests @@ -1715,7 +1749,7 @@ def check_issue10777(): # -------------------------------------------------------------------- -class BasicElementTest(unittest.TestCase): +class BasicElementTest(ElementTestCase): def test_augmentation_type_errors(self): e = ET.Element('joe') self.assertRaises(TypeError, e.append, 'b') @@ -1770,19 +1804,22 @@ class BasicElementTest(unittest.TestCase self.assertEqual(wref(), None) def test_pickle(self): - # For now this test only works for the Python version of ET, - # so set sys.modules accordingly because pickle uses __import__ - # to load the __module__ of the class. - if pyET: - sys.modules['xml.etree.ElementTree'] = pyET - else: - raise unittest.SkipTest('only for the Python version') - e1 = ET.Element('foo', bar=42) - s = pickle.dumps(e1) - e2 = pickle.loads(s) - self.assertEqual(e2.tag, 'foo') - self.assertEqual(e2.attrib['bar'], 42) - + # issue #16076: the C implementation wasn't pickleable. + for dumper, loader in product(self.modules, repeat=2): + e = dumper.Element('foo', bar=42) + e.text = "text goes here" + e.tail = "opposite of head" + dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) + e.append(dumper.Element('child')) + e.findall('.//grandchild')[0].set('attr', 'other value') + + e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', + dumper, loader) + + self.assertEqual(e2.tag, 'foo') + self.assertEqual(e2.attrib['bar'], 42) + self.assertEqual(len(e2), 2) + self.assertEqualElements(e, e2) class ElementTreeTest(unittest.TestCase): def test_istype(self): @@ -1904,7 +1941,7 @@ class ElementIterTest(unittest.TestCase) self.assertEqual(self._ilist(doc, '*'), all_tags) -class TreeBuilderTest(unittest.TestCase): +class TreeBuilderTest(ElementTestCase): sample1 = ('' @@ -2017,6 +2054,28 @@ class TreeBuilderTest(unittest.TestCase) ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) + def test_pickle(self): + # issue #16076: the C implementation wasn't pickleable. + for dumper, middler, loader in product(self.modules, repeat=3): + tb = dumper.TreeBuilder() + tb.start('tag', {}) + + # Fork the builder, run parallel histories, compare the results. + if dumper is pyET and middler is not pyET: continue + tb2 = self.pickleRoundTrip(tb, 'xml.etree.ElementTree', + dumper, middler) + tb.end('tag') + tb2.end('tag') + if middler is pyET and loader is not pyET: continue + tb3 = self.pickleRoundTrip(tb2, 'xml.etree.ElementTree', + middler, loader) + + tree = tb.close() + tree2 = tb2.close() + tree3 = tb3.close() + self.assertEqualElements(tree, tree2) + self.assertEqualElements(tree, tree3) + class XincludeTest(unittest.TestCase): def _my_loader(self, href, parse): @@ -2423,7 +2482,7 @@ class KeywordArgsTest(unittest.TestCase) class NoAcceleratorTest(unittest.TestCase): def setUp(self): if not pyET: - raise SkipTest('only for the Python version') + raise unittest.SkipTest('only for the Python version') # Test that the C accelerator was not imported for pyET def test_correct_import_pyET(self): @@ -2476,10 +2535,10 @@ class CleanContext(object): def test_main(module=None): # When invoked without a module, runs the Python ET tests by loading pyET. # Otherwise, uses the given module as the ET. + global pyET + pyET = import_fresh_module('xml.etree.ElementTree', + blocked=['_elementtree']) if module is None: - global pyET - pyET = import_fresh_module('xml.etree.ElementTree', - blocked=['_elementtree']) module = pyET global ET @@ -2499,7 +2558,7 @@ def test_main(module=None): # These tests will only run for the pure-Python version that doesn't import # _elementtree. We can't use skipUnless here, because pyET is filled in only # after the module is loaded. - if pyET: + if pyET is not ET: test_classes.extend([ NoAcceleratorTest, ]) @@ -2508,7 +2567,7 @@ def test_main(module=None): support.run_unittest(*test_classes) # XXX the C module should give the same warnings as the Python module - with CleanContext(quiet=(module is not pyET)): + with CleanContext(quiet=(pyET is not ET)): support.run_doctest(sys.modules[__name__], verbosity=True) finally: # don't interfere with subsequent tests diff -r 08c215115842 Modules/_elementtree.c --- a/Modules/_elementtree.c Sun Dec 30 06:29:49 2012 -0800 +++ b/Modules/_elementtree.c Sun Dec 30 23:48:50 2012 +0000 @@ -814,6 +814,208 @@ element_sizeof(PyObject* _self, PyObject return PyLong_FromSsize_t(result); } +static PyObject * +element_reduce(ElementObject *self) +{ + PyObject *children; + + if (self->extra && self->extra->length) { + /* Build a tuple of children. */ + int i; + children = PyTuple_New(self->extra->length); + for (i = 0; i < self->extra->length; i++) { + PyObject *child = self->extra->children[i]; + Py_INCREF(child); + PyTuple_SET_ITEM(children, i, child); + } + } + else { + /* No children. */ + Py_INCREF(Py_None); + children = Py_None; + } + + if (self->extra && self->extra->attrib && self->extra->attrib != Py_None) { + return Py_BuildValue("O(OO)(OOO)", Py_TYPE(self), self->tag, + self->extra->attrib, + self->text, self->tail, children); + } + else { + return Py_BuildValue("O(O)(OOO)", Py_TYPE(self), self->tag, + self->text, self->tail, children); + } +} +PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); + +static PyObject * +element_setstate_from_attributes(ElementObject *self, + PyObject *tag, + PyObject *attrib, + PyObject *text, + PyObject *tail, + PyObject *children) +{ + Py_ssize_t i, nchildren; + + if (!tag) { + PyErr_SetString(PyExc_TypeError, "tag may not be NULL"); + return NULL; + } + if (!text) { + Py_INCREF(Py_None); + text = Py_None; + } + if (!tail) { + Py_INCREF(Py_None); + tail = Py_None; + } + + Py_CLEAR(self->tag); + self->tag = tag; + Py_INCREF(self->tag); + + Py_CLEAR(self->text); + self->text = text; + Py_INCREF(self->text); + + Py_CLEAR(self->tail); + self->tail = tail; + Py_INCREF(self->tail); + + /* Handle ATTRIB and CHILDREN. */ + if (!children && !attrib) + Py_RETURN_NONE; + + /* Compute 'nchildren'. */ + if (children) { + if (!PySequence_Check(children)) { + PyErr_SetString(PyExc_TypeError, "'_children' is not a list"); + return NULL; + } + nchildren = PySequence_Size(children); + } + else { + nchildren = 0; + } + + /* Allocate 'extra'. */ + if (element_resize(self, nchildren)) { + return NULL; + } + assert(self->extra && self->extra->allocated >= nchildren); + + /* Stash children. */ + { + PyObject *fast_children, **fast_items; + fast_children = PySequence_Fast(children, NULL); + fast_items = PySequence_Fast_ITEMS(fast_children); + + memcpy(self->extra->children, fast_items, + nchildren * sizeof(PyObject*)); + for (i = 0; i < nchildren; i++) + Py_INCREF(self->extra->children[i]); + + self->extra->length = nchildren; + self->extra->allocated = nchildren; + + Py_DECREF(fast_children); + } + + /* Stash attrib. */ + if (attrib) { + Py_CLEAR(self->extra->attrib); + self->extra->attrib = attrib; + Py_INCREF(attrib); + } + + Py_RETURN_NONE; +} + +/* __setstate__ for Element instance from the Python implementation. + 'state' should be the instance dict. */ +static PyObject * +element_setstate_from_Python(ElementObject *self, PyObject *state) +{ + static char *kwlist[] = {"tag", "attrib", "text", "tail", "_children", + 0}; + PyObject *args; + PyObject *tag, *attrib, *text, *tail, *children; + int error; + + /* More instance dict members than we know to handle? */ + tag = attrib = text = tail = children = NULL; + args = PyTuple_New(0); + error = ! PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag, + &attrib, &text, &tail, &children); + Py_DECREF(args); + if (error) + return NULL; + else + return element_setstate_from_attributes(self, tag, attrib, text, + tail, children); +} + +/* __setstate__ for 'state' as created by element_reduce(). */ +static PyObject * +element_setstate_from_C(ElementObject *self, PyObject *state) +{ + PyObject *text, *tail, *children; + if (!PyArg_ParseTuple(state, "OOO", &text, &tail, &children)) + return NULL; + + Py_CLEAR(self->text); + self->text = text; + Py_INCREF(self->text); + + Py_CLEAR(self->tail); + self->tail = tail; + Py_INCREF(self->tail); + + if (children == Py_None) { + /* No children. */ + Py_RETURN_NONE; + } + else if (!PyTuple_Check(children)) { + /* Can't happen. */ + PyErr_SetString(PyExc_SystemError, + "Expected _elementtree.Element.__setstate__() " + "to be passed a tuple"); + return NULL; + } + else { + /* Have children. */ + Py_ssize_t nchildren = PyTuple_Size(children); + int i; + + /* attrib, if any, will have been set by now. */ + assert(! self->extra || ! self->extra->length); + if (element_resize(self, nchildren) < 0) + return NULL; + + for (i = 0; i < nchildren; i++) { + if (element_add_subelement(self, PyTuple_GET_ITEM(children, i)) < 0) + return NULL; + } + } + + Py_RETURN_NONE; +} + +static PyObject * +element_setstate(ElementObject *self, PyObject *state) +{ + if (PyTuple_CheckExact(state)) + return element_setstate_from_C(self, state); + else if (!PyDict_CheckExact(state)) { + PyErr_Format(PyExc_TypeError, + "Don't know how to unpickle \"%.200R\" as an Element", + state); + return NULL; + } + else + return element_setstate_from_Python(self, state); +} + LOCAL(int) checkpath(PyObject* tag) { @@ -1582,6 +1784,8 @@ static PyMethodDef element_methods[] = { {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, {"__sizeof__", element_sizeof, METH_NOARGS}, + {"__reduce__", (PyCFunction)element_reduce, METH_NOARGS, reduce_doc}, + {"__setstate__", (PyCFunction)element_setstate, METH_O}, {NULL, NULL} }; @@ -1686,7 +1890,7 @@ static PyMappingMethods element_as_mappi static PyTypeObject Element_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "Element", sizeof(ElementObject), 0, + "_elementtree.Element", sizeof(ElementObject), 0, /* methods */ (destructor)element_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -2002,7 +2206,7 @@ typedef struct { PyObject *data; /* data collector (string or list), or NULL */ - PyObject *stack; /* element stack */ + PyObject *stack; /* element stack; members after [index] place are NULL */ Py_ssize_t index; /* current stack size (0 means empty) */ PyObject *element_factory; @@ -2443,17 +2647,75 @@ treebuilder_start(TreeBuilderObject* sel return treebuilder_handle_start(self, tag, attrib); } +static PyObject * +treebuilder_reduce(TreeBuilderObject *self) +{ + PyObject *args, *stack, *result; + + if (self->events) { + /* XMLParser isn't pickleable, so don't try and support pickling when + we are a private instance used by it. */ + PyErr_SetString(PyExc_ValueError, + "Can't pickle TreeBuilder used from XMLParser"); + return NULL; + } + + /* Build a 0- or 1-elements tuple. */ + args = PyTuple_Pack(!!self->element_factory, self->element_factory); + stack = PyList_GetSlice(self->stack, 0, self->index); + result = Py_BuildValue("OO(OOOOO)", Py_TYPE(self), args, + self->root ? self->root : Py_None, + self->this ? self->this : Py_None, + self->last ? self->last : Py_None, + self->data ? self->data : Py_None, + stack); + Py_DECREF(args); + return result; +} + +static PyObject * +treebuilder_setstate(TreeBuilderObject *self, PyObject *state) +{ + PyObject *root, *this, *last, *data, *stack; + if (!PyArg_ParseTuple(state, "OOOOO", &root, &this, &last, &data, &stack)) + return NULL; + +#define SET_MEMBER(name) \ + do { \ + Py_CLEAR(self->name); \ + if (name == Py_None) \ + self->name = NULL; \ + else { \ + self->name = name; \ + Py_INCREF(self->name); \ + } \ + } while (0) + + SET_MEMBER(root); + SET_MEMBER(this); + SET_MEMBER(last); + SET_MEMBER(data); + SET_MEMBER(stack); + self->index = PyList_Size(self->stack); + +#undef SET_MEMBER + + Py_RETURN_NONE; +} + static PyMethodDef treebuilder_methods[] = { {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, + {"__reduce__", (PyCFunction)treebuilder_reduce, METH_NOARGS, reduce_doc}, + {"__setstate__", (PyCFunction)treebuilder_setstate, METH_O}, {NULL, NULL} }; static PyTypeObject TreeBuilder_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "TreeBuilder", sizeof(TreeBuilderObject), 0, + "_elementtree.TreeBuilder", sizeof(TreeBuilderObject), 0, /* methods */ (destructor)treebuilder_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -3415,7 +3677,7 @@ xmlparser_getattro(XMLParserObject* self static PyTypeObject XMLParser_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "XMLParser", sizeof(XMLParserObject), 0, + "_elementtree.XMLParser", sizeof(XMLParserObject), 0, /* methods */ (destructor)xmlparser_dealloc, /* tp_dealloc */ 0, /* tp_print */