diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -106,6 +106,18 @@ '2008' +.. note:: + + Not all elements of the XML input will end up as elements of the + parsed tree. Currently, this module skips over any XML comments, + processing instructions, and document type declarations in the + input. Processing instructions and comments can nevertheless be + represented as tree elements, and are included when generating XML + output. A document type declaration may be accessed by passing a + custom :class:`TreeBuilder` instance to the :class:`XMLParser` + constructor. + + .. _elementtree-pull-parsing: Pull API for non-blocking parsing @@ -381,6 +393,12 @@ string containing the comment string. Returns an element instance representing a comment. + Note that the element tree XML parser skips over comments in the + input instead of creating comment objects for them. An element tree + will thus only contain comment nodes if they have been inserted + into to the tree using :meth:`~Element.insert`, + :meth:`~Element.append` or :meth:`~Element.extend`. + .. function:: dump(elem) @@ -461,6 +479,13 @@ containing the PI target. *text* is a string containing the PI contents, if given. Returns an element instance, representing a processing instruction. + Note that the element tree XML parser skips over processing + instructions in the input instead of creating objects for them. An + element tree will thus only contain processing instruction nodes if + they have been inserted into to the tree using + :meth:`~Element.insert`, :meth:`~Element.append` or + :meth:`~Element.extend`. + .. function:: register_namespace(prefix, uri) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1612,7 +1612,37 @@ ET.register_namespace('test10777', 'http://myuri/') ET.register_namespace('test10777', 'http://myuri/') - + def test_pi_treatment(self): + root = ET.Element('body', { 'text': 'some text'}) + root.append(ET.ProcessingInstruction('verb', 'extra data')) + tree = ET.ElementTree(root) + stream = io.BytesIO() + tree.write(stream) + + # PIs are serialized to XML.. + self.assertIn(b'', bytes(stream.getbuffer())) + + # ..but skipped when parsing XML + stream.seek(0) + tree2 = ET.parse(stream) + self.assertEqual(len(tree2.getroot().getchildren()), 0) + + + def test_comment_treatment(self): + root = ET.Element('body', { 'text': 'some text'}) + root.append(ET.Comment('not to be rendered')) + tree = ET.ElementTree(root) + stream = io.BytesIO() + tree.write(stream) + + # PIs are serialized to XML.. + self.assertIn(b'', bytes(stream.getbuffer())) + + # ..but skipped when parsing XML + stream.seek(0) + tree2 = ET.parse(stream) + self.assertEqual(len(tree2.getroot().getchildren()), 0) + # --------------------------------------------------------------------