diff -r 7b1da249ab6d Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py Sun Aug 25 14:19:29 2013 +0200
+++ b/Lib/test/test_xml_etree.py Sun Aug 25 16:08:37 2013 +0200
@@ -950,169 +950,6 @@
self.assertEqual(serialized, expected)
-class IncrementalParserTest(unittest.TestCase):
-
- def _feed(self, parser, data, chunk_size=None):
- if chunk_size is None:
- parser.data_received(data)
- else:
- for i in range(0, len(data), chunk_size):
- parser.data_received(data[i:i+chunk_size])
-
- def assert_event_tags(self, parser, expected):
- events = parser.events()
- self.assertEqual([(action, elem.tag) for action, elem in events],
- expected)
-
- def test_simple_xml(self):
- for chunk_size in (None, 1, 5):
- with self.subTest(chunk_size=chunk_size):
- parser = ET.IncrementalParser()
- self.assert_event_tags(parser, [])
- self._feed(parser, "\n", chunk_size)
- self.assert_event_tags(parser, [])
- self._feed(parser,
- "\n text\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'element')])
- self._feed(parser, "texttail\n", chunk_size)
- self._feed(parser, "\n", chunk_size)
- self.assert_event_tags(parser, [
- ('end', 'element'),
- ('end', 'empty-element'),
- ])
- self._feed(parser, "\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'root')])
- # Receiving EOF sets the `root` attribute
- self.assertIs(parser.root, None)
- parser.eof_received()
- self.assertEqual(parser.root.tag, 'root')
-
- def test_data_received_while_iterating(self):
- parser = ET.IncrementalParser()
- it = parser.events()
- self._feed(parser, "\n text\n")
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'element'))
- self._feed(parser, "\n")
- action, elem = next(it)
- self.assertEqual((action, elem.tag), ('end', 'root'))
- with self.assertRaises(StopIteration):
- next(it)
-
- def test_simple_xml_with_ns(self):
- parser = ET.IncrementalParser()
- self.assert_event_tags(parser, [])
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [])
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [])
- self._feed(parser, "text\n")
- self.assert_event_tags(parser, [('end', '{namespace}element')])
- self._feed(parser, "texttail\n")
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [
- ('end', '{namespace}element'),
- ('end', '{namespace}empty-element'),
- ])
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [('end', '{namespace}root')])
- # Receiving EOF sets the `root` attribute
- self.assertIs(parser.root, None)
- parser.eof_received()
- self.assertEqual(parser.root.tag, '{namespace}root')
-
- def test_ns_events(self):
- parser = ET.IncrementalParser(events=('start-ns', 'end-ns'))
- self._feed(parser, "\n")
- self._feed(parser, "\n")
- self.assertEqual(
- list(parser.events()),
- [('start-ns', ('', 'namespace'))])
- self._feed(parser, "text\n")
- self._feed(parser, "texttail\n")
- self._feed(parser, "\n")
- self._feed(parser, "\n")
- self.assertEqual(list(parser.events()), [('end-ns', None)])
- parser.eof_received()
-
- def test_events(self):
- parser = ET.IncrementalParser(events=())
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [])
-
- parser = ET.IncrementalParser(events=('start', 'end'))
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [])
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [('start', 'root')])
- self._feed(parser, "text\n")
- self.assert_event_tags(parser, [('end', 'element')])
- self._feed(parser,
- "texttail\n")
- self.assert_event_tags(parser, [
- ('start', '{foo}element'),
- ('start', '{foo}empty-element'),
- ('end', '{foo}empty-element'),
- ('end', '{foo}element'),
- ])
- self._feed(parser, "")
- parser.eof_received()
- self.assertIs(parser.root, None)
- self.assert_event_tags(parser, [('end', 'root')])
- self.assertEqual(parser.root.tag, 'root')
-
- parser = ET.IncrementalParser(events=('start',))
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [])
- self._feed(parser, "\n")
- self.assert_event_tags(parser, [('start', 'root')])
- self._feed(parser, "text\n")
- self.assert_event_tags(parser, [])
- self._feed(parser,
- "texttail\n")
- self.assert_event_tags(parser, [
- ('start', '{foo}element'),
- ('start', '{foo}empty-element'),
- ])
- self._feed(parser, "")
- parser.eof_received()
- self.assertEqual(parser.root.tag, 'root')
-
- def test_events_sequence(self):
- # Test that events can be some sequence that's not just a tuple or list
- eventset = {'end', 'start'}
- parser = ET.IncrementalParser(events=eventset)
- self._feed(parser, "bar")
- self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
-
- class DummyIter:
- def __init__(self):
- self.events = iter(['start', 'end', 'start-ns'])
- def __iter__(self):
- return self
- def __next__(self):
- return next(self.events)
-
- parser = ET.IncrementalParser(events=DummyIter())
- self._feed(parser, "bar")
- self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
-
-
- def test_unknown_event(self):
- with self.assertRaises(ValueError):
- ET.IncrementalParser(events=('start', 'end', 'bogus'))
-
-
#
# xinclude tests (samples from appendix C of the xinclude specification)
@@ -2546,7 +2383,6 @@
ElementSlicingTest,
BasicElementTest,
ElementTreeTest,
- IncrementalParserTest,
IOTest,
ParseErrorTest,
XIncludeTest,
diff -r 7b1da249ab6d Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py Sun Aug 25 14:19:29 2013 +0200
+++ b/Lib/xml/etree/ElementTree.py Sun Aug 25 16:08:37 2013 +0200
@@ -1207,87 +1207,63 @@
if not hasattr(source, "read"):
source = open(source, "rb")
close_source = True
+ if parser is None:
+ parser = XMLParser(target=TreeBuilder())
return _IterParseIterator(source, events, parser, close_source)
-class IncrementalParser:
+class _IterParseIterator:
- def __init__(self, events=None, parser=None):
+ def __init__(self, source, events, parser, close_source=False):
# _elementtree.c expects a list, not a deque
self._events_queue = []
self._index = 0
+ self._error = None
self.root = self._root = None
- if not parser:
- parser = XMLParser(target=TreeBuilder())
self._parser = parser
# wire up the parser for event reporting
if events is None:
events = ("end",)
- self._parser._setevents(self._events_queue, events)
+ parser._setevents(self._events_queue, events)
- def data_received(self, data):
- if self._parser is None:
- raise ValueError("data_received() called after end of stream")
- if data:
- try:
- self._parser.feed(data)
- except SyntaxError as exc:
- self._events_queue.append(exc)
-
- def eof_received(self):
- self._root = self._parser.close()
- self._parser = None
- if self._index >= len(self._events_queue):
- self.root = self._root
-
- def events(self):
- events = self._events_queue
- while True:
- index = self._index
- try:
- event = events[self._index]
- # Avoid retaining references to past events
- events[self._index] = None
- except IndexError:
- break
- index += 1
- # Compact the list in a O(1) amortized fashion
- if index * 2 >= len(events):
- events[:index] = []
- self._index = 0
- else:
- self._index = index
- if isinstance(event, Exception):
- raise event
- else:
- yield event
- if self._parser is None:
- self.root = self._root
-
-
-class _IterParseIterator:
-
- def __init__(self, source, events, parser, close_source=False):
- self._parser = IncrementalParser(events, parser)
self._file = source
self._close_file = close_source
- self.root = None
def __next__(self):
- while 1:
- for event in self._parser.events():
- return event
- if self._parser._parser is None:
- self.root = self._parser.root
+ events = self._events_queue
+ index = self._index
+
+ while len(events) <= index:
+ if self._parser is None:
+ self.root = self._root
if self._close_file:
self._file.close()
raise StopIteration
+ if self._error:
+ e = self._error
+ self._error = None
+ raise e
# load event buffer
data = self._file.read(16384)
if data:
- self._parser.data_received(data)
+ try:
+ self._parser.feed(data)
+ except SyntaxError as exc:
+ self._error = exc
else:
- self._parser.eof_received()
+ self._root = self._parser.close()
+ self._parser = None
+
+ event = events[index]
+ # Avoid retaining references to past events
+ events[index] = None
+ index += 1
+ # Compact the list in a O(1) amortized fashion
+ if index * 2 >= len(events):
+ del events[:index]
+ index = 0
+ self._index = index
+ return event
def __iter__(self):
return self