diff -r 7b1da249ab6d Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py Sun Aug 25 14:19:29 2013 +0200 +++ b/Lib/test/test_xml_etree.py Sun Aug 25 16:08:37 2013 +0200 @@ -950,169 +950,6 @@ self.assertEqual(serialized, expected) -class IncrementalParserTest(unittest.TestCase): - - def _feed(self, parser, data, chunk_size=None): - if chunk_size is None: - parser.data_received(data) - else: - for i in range(0, len(data), chunk_size): - parser.data_received(data[i:i+chunk_size]) - - def assert_event_tags(self, parser, expected): - events = parser.events() - self.assertEqual([(action, elem.tag) for action, elem in events], - expected) - - def test_simple_xml(self): - for chunk_size in (None, 1, 5): - with self.subTest(chunk_size=chunk_size): - parser = ET.IncrementalParser() - self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, []) - self._feed(parser, - "\n text\n", chunk_size) - self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [ - ('end', 'element'), - ('end', 'empty-element'), - ]) - self._feed(parser, "\n", chunk_size) - self.assert_event_tags(parser, [('end', 'root')]) - # Receiving EOF sets the `root` attribute - self.assertIs(parser.root, None) - parser.eof_received() - self.assertEqual(parser.root.tag, 'root') - - def test_data_received_while_iterating(self): - parser = ET.IncrementalParser() - it = parser.events() - self._feed(parser, "\n text\n") - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'element')) - self._feed(parser, "\n") - action, elem = next(it) - self.assertEqual((action, elem.tag), ('end', 'root')) - with self.assertRaises(StopIteration): - next(it) - - def test_simple_xml_with_ns(self): - parser = ET.IncrementalParser() - self.assert_event_tags(parser, []) - self._feed(parser, "\n") - self.assert_event_tags(parser, []) - self._feed(parser, "\n") - self.assert_event_tags(parser, []) - self._feed(parser, "text\n") - self.assert_event_tags(parser, [('end', '{namespace}element')]) - self._feed(parser, "texttail\n") - self._feed(parser, "\n") - self.assert_event_tags(parser, [ - ('end', '{namespace}element'), - ('end', '{namespace}empty-element'), - ]) - self._feed(parser, "\n") - self.assert_event_tags(parser, [('end', '{namespace}root')]) - # Receiving EOF sets the `root` attribute - self.assertIs(parser.root, None) - parser.eof_received() - self.assertEqual(parser.root.tag, '{namespace}root') - - def test_ns_events(self): - parser = ET.IncrementalParser(events=('start-ns', 'end-ns')) - self._feed(parser, "\n") - self._feed(parser, "\n") - self.assertEqual( - list(parser.events()), - [('start-ns', ('', 'namespace'))]) - self._feed(parser, "text\n") - self._feed(parser, "texttail\n") - self._feed(parser, "\n") - self._feed(parser, "\n") - self.assertEqual(list(parser.events()), [('end-ns', None)]) - parser.eof_received() - - def test_events(self): - parser = ET.IncrementalParser(events=()) - self._feed(parser, "\n") - self.assert_event_tags(parser, []) - - parser = ET.IncrementalParser(events=('start', 'end')) - self._feed(parser, "\n") - self.assert_event_tags(parser, []) - self._feed(parser, "\n") - self.assert_event_tags(parser, [('start', 'root')]) - self._feed(parser, "text\n") - self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, - "texttail\n") - self.assert_event_tags(parser, [ - ('start', '{foo}element'), - ('start', '{foo}empty-element'), - ('end', '{foo}empty-element'), - ('end', '{foo}element'), - ]) - self._feed(parser, "") - parser.eof_received() - self.assertIs(parser.root, None) - self.assert_event_tags(parser, [('end', 'root')]) - self.assertEqual(parser.root.tag, 'root') - - parser = ET.IncrementalParser(events=('start',)) - self._feed(parser, "\n") - self.assert_event_tags(parser, []) - self._feed(parser, "\n") - self.assert_event_tags(parser, [('start', 'root')]) - self._feed(parser, "text\n") - self.assert_event_tags(parser, []) - self._feed(parser, - "texttail\n") - self.assert_event_tags(parser, [ - ('start', '{foo}element'), - ('start', '{foo}empty-element'), - ]) - self._feed(parser, "") - parser.eof_received() - self.assertEqual(parser.root.tag, 'root') - - def test_events_sequence(self): - # Test that events can be some sequence that's not just a tuple or list - eventset = {'end', 'start'} - parser = ET.IncrementalParser(events=eventset) - self._feed(parser, "bar") - self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) - - class DummyIter: - def __init__(self): - self.events = iter(['start', 'end', 'start-ns']) - def __iter__(self): - return self - def __next__(self): - return next(self.events) - - parser = ET.IncrementalParser(events=DummyIter()) - self._feed(parser, "bar") - self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) - - - def test_unknown_event(self): - with self.assertRaises(ValueError): - ET.IncrementalParser(events=('start', 'end', 'bogus')) - - # # xinclude tests (samples from appendix C of the xinclude specification) @@ -2546,7 +2383,6 @@ ElementSlicingTest, BasicElementTest, ElementTreeTest, - IncrementalParserTest, IOTest, ParseErrorTest, XIncludeTest, diff -r 7b1da249ab6d Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py Sun Aug 25 14:19:29 2013 +0200 +++ b/Lib/xml/etree/ElementTree.py Sun Aug 25 16:08:37 2013 +0200 @@ -1207,87 +1207,63 @@ if not hasattr(source, "read"): source = open(source, "rb") close_source = True + if parser is None: + parser = XMLParser(target=TreeBuilder()) return _IterParseIterator(source, events, parser, close_source) -class IncrementalParser: +class _IterParseIterator: - def __init__(self, events=None, parser=None): + def __init__(self, source, events, parser, close_source=False): # _elementtree.c expects a list, not a deque self._events_queue = [] self._index = 0 + self._error = None self.root = self._root = None - if not parser: - parser = XMLParser(target=TreeBuilder()) self._parser = parser # wire up the parser for event reporting if events is None: events = ("end",) - self._parser._setevents(self._events_queue, events) + parser._setevents(self._events_queue, events) - def data_received(self, data): - if self._parser is None: - raise ValueError("data_received() called after end of stream") - if data: - try: - self._parser.feed(data) - except SyntaxError as exc: - self._events_queue.append(exc) - - def eof_received(self): - self._root = self._parser.close() - self._parser = None - if self._index >= len(self._events_queue): - self.root = self._root - - def events(self): - events = self._events_queue - while True: - index = self._index - try: - event = events[self._index] - # Avoid retaining references to past events - events[self._index] = None - except IndexError: - break - index += 1 - # Compact the list in a O(1) amortized fashion - if index * 2 >= len(events): - events[:index] = [] - self._index = 0 - else: - self._index = index - if isinstance(event, Exception): - raise event - else: - yield event - if self._parser is None: - self.root = self._root - - -class _IterParseIterator: - - def __init__(self, source, events, parser, close_source=False): - self._parser = IncrementalParser(events, parser) self._file = source self._close_file = close_source - self.root = None def __next__(self): - while 1: - for event in self._parser.events(): - return event - if self._parser._parser is None: - self.root = self._parser.root + events = self._events_queue + index = self._index + + while len(events) <= index: + if self._parser is None: + self.root = self._root if self._close_file: self._file.close() raise StopIteration + if self._error: + e = self._error + self._error = None + raise e # load event buffer data = self._file.read(16384) if data: - self._parser.data_received(data) + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc else: - self._parser.eof_received() + self._root = self._parser.close() + self._parser = None + + event = events[index] + # Avoid retaining references to past events + events[index] = None + index += 1 + # Compact the list in a O(1) amortized fashion + if index * 2 >= len(events): + del events[:index] + index = 0 + self._index = index + return event def __iter__(self): return self