diff -r a206f952668e Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py Thu Aug 08 18:28:53 2013 +0100 +++ b/Lib/test/test_xml_etree.py Sat Aug 10 09:34:03 2013 +0200 @@ -950,49 +950,51 @@ self.assertEqual(serialized, expected) -class IncrementalParserTest(unittest.TestCase): +class TreeEventBuilderTest(unittest.TestCase): def _feed(self, parser, data, chunk_size=None): if chunk_size is None: - parser.data_received(data) + parser.feed(data) else: for i in range(0, len(data), chunk_size): - parser.data_received(data[i:i+chunk_size]) - - def assert_event_tags(self, parser, expected): - events = parser.events() + parser.feed(data[i:i+chunk_size]) + + def assert_event_tags(self, builder, expected): + events = builder.events() self.assertEqual([(action, elem.tag) for action, elem in events], expected) def test_simple_xml(self): for chunk_size in (None, 1, 5): with self.subTest(chunk_size=chunk_size): - parser = ET.IncrementalParser() - self.assert_event_tags(parser, []) + builder = ET.TreeEventBuilder() + parser = ET.XMLParser(target=builder) + self.assert_event_tags(builder, []) self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, []) + self.assert_event_tags(builder, []) self._feed(parser, "\n text\n", chunk_size) - self.assert_event_tags(parser, [('end', 'element')]) + self.assert_event_tags(builder, [('end', 'element')]) self._feed(parser, "texttail\n", chunk_size) self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [ + self.assert_event_tags(builder, [ ('end', 'element'), ('end', 'empty-element'), ]) self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [('end', 'root')]) + self.assert_event_tags(builder, [('end', 'root')]) # Receiving EOF sets the `root` attribute - self.assertIs(parser.root, None) - parser.eof_received() - self.assertEqual(parser.root.tag, 'root') + self.assertIs(builder.root, None) + parser.close() + self.assertEqual(builder.root.tag, 'root') def test_data_received_while_iterating(self): - parser = ET.IncrementalParser() - it = parser.events() + builder = ET.TreeEventBuilder() + parser = ET.XMLParser(target=builder) + it = builder.events() self._feed(parser, "\n text\n") action, elem = next(it) self.assertEqual((action, elem.tag), ('end', 'element')) @@ -1003,97 +1005,103 @@ next(it) def test_simple_xml_with_ns(self): - parser = ET.IncrementalParser() - self.assert_event_tags(parser, []) + builder = ET.TreeEventBuilder() + parser = ET.XMLParser(target=builder) + self.assert_event_tags(builder, []) self._feed(parser, "\n") - self.assert_event_tags(parser, []) + self.assert_event_tags(builder, []) self._feed(parser, "\n") - self.assert_event_tags(parser, []) + self.assert_event_tags(builder, []) self._feed(parser, "text\n") - self.assert_event_tags(parser, [('end', '{namespace}element')]) + self.assert_event_tags(builder, [('end', '{namespace}element')]) self._feed(parser, "texttail\n") self._feed(parser, "\n") - self.assert_event_tags(parser, [ + self.assert_event_tags(builder, [ ('end', '{namespace}element'), ('end', '{namespace}empty-element'), ]) self._feed(parser, "\n") - self.assert_event_tags(parser, [('end', '{namespace}root')]) + self.assert_event_tags(builder, [('end', '{namespace}root')]) # Receiving EOF sets the `root` attribute - self.assertIs(parser.root, None) - parser.eof_received() - self.assertEqual(parser.root.tag, '{namespace}root') + self.assertIs(builder.root, None) + parser.close() + self.assertEqual(builder.root.tag, '{namespace}root') def test_ns_events(self): - parser = ET.IncrementalParser(events=('start-ns', 'end-ns')) + builder = ET.TreeEventBuilder(events=('start-ns', 'end-ns')) + parser = ET.XMLParser(target=builder) self._feed(parser, "\n") self._feed(parser, "\n") self.assertEqual( - list(parser.events()), + list(builder.events()), [('start-ns', ('', 'namespace'))]) self._feed(parser, "text\n") self._feed(parser, "texttail\n") self._feed(parser, "\n") self._feed(parser, "\n") - self.assertEqual(list(parser.events()), [('end-ns', None)]) - parser.eof_received() + self.assertEqual(list(builder.events()), [('end-ns', None)]) + parser.close() def test_events(self): - parser = ET.IncrementalParser(events=()) + builder = ET.TreeEventBuilder(events=()) + parser = ET.XMLParser(target=builder) self._feed(parser, "\n") - self.assert_event_tags(parser, []) - - parser = ET.IncrementalParser(events=('start', 'end')) + self.assert_event_tags(builder, []) + + builder = ET.TreeEventBuilder(events=('start', 'end')) + parser = ET.XMLParser(target=builder) self._feed(parser, "\n") - self.assert_event_tags(parser, []) + self.assert_event_tags(builder, []) self._feed(parser, "\n") - self.assert_event_tags(parser, [('start', 'root')]) + self.assert_event_tags(builder, [('start', 'root')]) self._feed(parser, "text\n") - self.assert_event_tags(parser, [('end', 'element')]) + self.assert_event_tags(builder, [('end', 'element')]) self._feed(parser, "texttail\n") - self.assert_event_tags(parser, [ + self.assert_event_tags(builder, [ ('start', '{foo}element'), ('start', '{foo}empty-element'), ('end', '{foo}empty-element'), ('end', '{foo}element'), ]) self._feed(parser, "") - parser.eof_received() - self.assertIs(parser.root, None) - self.assert_event_tags(parser, [('end', 'root')]) - self.assertEqual(parser.root.tag, 'root') - - parser = ET.IncrementalParser(events=('start',)) + parser.close() + self.assertIs(builder.root, None) + self.assert_event_tags(builder, [('end', 'root')]) + self.assertEqual(builder.root.tag, 'root') + + builder = ET.TreeEventBuilder(events=('start',)) + parser = ET.XMLParser(target=builder) self._feed(parser, "\n") - self.assert_event_tags(parser, []) + self.assert_event_tags(builder, []) self._feed(parser, "\n") - self.assert_event_tags(parser, [('start', 'root')]) + self.assert_event_tags(builder, [('start', 'root')]) self._feed(parser, "text\n") - self.assert_event_tags(parser, []) + self.assert_event_tags(builder, []) self._feed(parser, "texttail\n") - self.assert_event_tags(parser, [ + self.assert_event_tags(builder, [ ('start', '{foo}element'), ('start', '{foo}empty-element'), ]) self._feed(parser, "") - parser.eof_received() - self.assertEqual(parser.root.tag, 'root') + parser.close() + self.assertEqual(builder.root.tag, 'root') def test_events_sequence(self): # Test that events can be some sequence that's not just a tuple or list eventset = {'end', 'start'} - parser = ET.IncrementalParser(events=eventset) + builder = ET.TreeEventBuilder(events=eventset) + parser = ET.XMLParser(target=builder) self._feed(parser, "bar") - self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) + self.assert_event_tags(builder, [('start', 'foo'), ('end', 'foo')]) class DummyIter: def __init__(self): @@ -1103,14 +1111,16 @@ def __next__(self): return next(self.events) - parser = ET.IncrementalParser(events=DummyIter()) + builder = ET.TreeEventBuilder(events=DummyIter()) + parser = ET.XMLParser(target=builder) self._feed(parser, "bar") self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) def test_unknown_event(self): with self.assertRaises(ValueError): - ET.IncrementalParser(events=('start', 'end', 'bogus')) + builder = ET.TreeEventBuilder(events=('start', 'end', 'bogus')) + ET.XMLParser(target=builder) # @@ -2546,7 +2556,7 @@ ElementSlicingTest, BasicElementTest, ElementTreeTest, - IncrementalParserTest, + TreeEventBuilderTest, IOTest, ParseErrorTest, XIncludeTest, diff -r a206f952668e Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py Thu Aug 08 18:28:53 2013 +0100 +++ b/Lib/xml/etree/ElementTree.py Sat Aug 10 09:34:03 2013 +0200 @@ -1210,35 +1210,19 @@ return _IterParseIterator(source, events, parser, close_source) -class IncrementalParser: +class TreeEventBuilder: - def __init__(self, events=None, parser=None): + def __init__(self, events=None, target=None): # _elementtree.c expects a list, not a deque self._events_queue = [] self._index = 0 - self.root = self._root = None - if not parser: - parser = XMLParser(target=TreeBuilder()) - self._parser = parser - # wire up the parser for event reporting + self.root = None if events is None: events = ("end",) - self._parser._setevents(self._events_queue, events) - - def data_received(self, data): - if self._parser is None: - raise ValueError("data_received() called after end of stream") - if data: - try: - self._parser.feed(data) - except SyntaxError as exc: - self._events_queue.append(exc) - - def eof_received(self): - self._root = self._parser.close() - self._parser = None - if self._index >= len(self._events_queue): - self.root = self._root + self._events = events + if target is None: + target = TreeBuilder() + self._target = target def events(self): events = self._events_queue @@ -1261,20 +1245,26 @@ raise event else: yield event - if self._parser is None: - self.root = self._root -class _IterParseIterator(IncrementalParser): +class _IterParseIterator: def __init__(self, source, events, parser, close_source=False): - IncrementalParser.__init__(self, events, parser) + self._event_builder = TreeEventBuilder(events) + if parser is None: + parser = XMLParser(target=self._event_builder) + else: + if events is None: + events = ("end",) + # FIXME: this is the same hack as before, breaking the parser + parser._setevents(self._event_builder._events_queue, events) + self._parser = parser self._file = source self._close_file = close_source def __next__(self): while 1: - for event in self.events(): + for event in self._event_builder.events(): return event if self._parser is None: if self._close_file: @@ -1283,9 +1273,11 @@ # load event buffer data = self._file.read(16384) if data: - self.data_received(data) + self._parser.feed(data) else: - self.eof_received() + self._parser.close() + self.root = self._event_builder.root + self._parser = None def __iter__(self): return self @@ -1447,6 +1439,9 @@ "No module named expat; use SimpleXMLTreeBuilder instead" ) parser = expat.ParserCreate(encoding, "}") + self._event_handler = None + if isinstance(target, TreeEventBuilder): + self._event_handler, target = target, target._target if target is None: target = TreeBuilder() # underscored names are provided for compatibility only @@ -1467,6 +1462,10 @@ parser.CommentHandler = target.comment if hasattr(target, 'pi'): parser.ProcessingInstructionHandler = target.pi + if self._event_handler is not None: + self._setevents(self._event_handler._events_queue, + self._event_handler._events) + # let expat do the buffering, if supported try: parser.buffer_text = 1 @@ -1643,11 +1642,15 @@ except AttributeError: pass else: - return close_handler() + root = close_handler() + if self._event_handler is not None: + self._event_handler.root = root + return root finally: # get rid of circular references del self.parser, self._parser del self.target, self._target + del self._event_handler # Import the C accelerators