# HG changeset patch
# Parent 7b1da249ab6d2d7c84cac5ec787d2fa6ed58a1b2
diff -r 7b1da249ab6d Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py Sun Aug 25 14:19:29 2013 +0200
+++ b/Lib/test/test_xml_etree.py Mon Aug 26 21:29:10 2013 +0200
@@ -950,24 +950,24 @@
self.assertEqual(serialized, expected)
-class IncrementalParserTest(unittest.TestCase):
+class ParserEventReadingTest(unittest.TestCase):
def _feed(self, parser, data, chunk_size=None):
if chunk_size is None:
- parser.data_received(data)
+ parser.feed(data)
else:
for i in range(0, len(data), chunk_size):
- parser.data_received(data[i:i+chunk_size])
+ parser.feed(data[i:i+chunk_size])
def assert_event_tags(self, parser, expected):
- events = parser.events()
+ events = parser.read_events()
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
def test_simple_xml(self):
for chunk_size in (None, 1, 5):
with self.subTest(chunk_size=chunk_size):
- parser = ET.IncrementalParser()
+ parser = ET.XMLParser(collect_events=('end',))
self.assert_event_tags(parser, [])
self._feed(parser, "\n", chunk_size)
self.assert_event_tags(parser, [])
@@ -985,14 +985,12 @@
])
self._feed(parser, "\n", chunk_size)
self.assert_event_tags(parser, [('end', 'root')])
- # Receiving EOF sets the `root` attribute
- self.assertIs(parser.root, None)
- parser.eof_received()
- self.assertEqual(parser.root.tag, 'root')
+ root = parser.close()
+ self.assertEqual(root.tag, 'root')
def test_data_received_while_iterating(self):
- parser = ET.IncrementalParser()
- it = parser.events()
+ parser = ET.XMLParser(collect_events=('end',))
+ it = parser.read_events()
self._feed(parser, "\n text\n")
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'element'))
@@ -1003,7 +1001,7 @@
next(it)
def test_simple_xml_with_ns(self):
- parser = ET.IncrementalParser()
+ parser = ET.XMLParser(collect_events=('end',))
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
@@ -1021,32 +1019,30 @@
])
self._feed(parser, "\n")
self.assert_event_tags(parser, [('end', '{namespace}root')])
- # Receiving EOF sets the `root` attribute
- self.assertIs(parser.root, None)
- parser.eof_received()
- self.assertEqual(parser.root.tag, '{namespace}root')
+ root = parser.close()
+ self.assertEqual(root.tag, '{namespace}root')
def test_ns_events(self):
- parser = ET.IncrementalParser(events=('start-ns', 'end-ns'))
+ parser = ET.XMLParser(collect_events=('start-ns', 'end-ns'))
self._feed(parser, "\n")
self._feed(parser, "\n")
self.assertEqual(
- list(parser.events()),
+ list(parser.read_events()),
[('start-ns', ('', 'namespace'))])
self._feed(parser, "text\n")
self._feed(parser, "texttail\n")
self._feed(parser, "\n")
self._feed(parser, "\n")
- self.assertEqual(list(parser.events()), [('end-ns', None)])
- parser.eof_received()
+ self.assertEqual(list(parser.read_events()), [('end-ns', None)])
+ parser.close()
def test_events(self):
- parser = ET.IncrementalParser(events=())
+ parser = ET.XMLParser(collect_events=())
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
- parser = ET.IncrementalParser(events=('start', 'end'))
+ parser = ET.XMLParser(collect_events=('start', 'end'))
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
@@ -1064,12 +1060,11 @@
('end', '{foo}element'),
])
self._feed(parser, "")
- parser.eof_received()
- self.assertIs(parser.root, None)
+ root = parser.close()
self.assert_event_tags(parser, [('end', 'root')])
- self.assertEqual(parser.root.tag, 'root')
-
- parser = ET.IncrementalParser(events=('start',))
+ self.assertEqual(root.tag, 'root')
+
+ parser = ET.XMLParser(collect_events=('start',))
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
@@ -1085,13 +1080,13 @@
('start', '{foo}empty-element'),
])
self._feed(parser, "")
- parser.eof_received()
- self.assertEqual(parser.root.tag, 'root')
+ root = parser.close()
+ self.assertEqual(root.tag, 'root')
def test_events_sequence(self):
# Test that events can be some sequence that's not just a tuple or list
eventset = {'end', 'start'}
- parser = ET.IncrementalParser(events=eventset)
+ parser = ET.XMLParser(collect_events=eventset)
self._feed(parser, "bar")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
@@ -1103,14 +1098,14 @@
def __next__(self):
return next(self.events)
- parser = ET.IncrementalParser(events=DummyIter())
+ parser = ET.XMLParser(collect_events=DummyIter())
self._feed(parser, "bar")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
def test_unknown_event(self):
with self.assertRaises(ValueError):
- ET.IncrementalParser(events=('start', 'end', 'bogus'))
+ ET.XMLParser(collect_events=('start', 'end', 'bogus'))
#
@@ -2546,7 +2541,7 @@
ElementSlicingTest,
BasicElementTest,
ElementTreeTest,
- IncrementalParserTest,
+ ParserEventReadingTest,
IOTest,
ParseErrorTest,
XIncludeTest,
diff -r 7b1da249ab6d Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py Sun Aug 25 14:19:29 2013 +0200
+++ b/Lib/xml/etree/ElementTree.py Mon Aug 26 21:29:10 2013 +0200
@@ -1207,87 +1207,57 @@
if not hasattr(source, "read"):
source = open(source, "rb")
close_source = True
- return _IterParseIterator(source, events, parser, close_source)
-
-
-class IncrementalParser:
-
- def __init__(self, events=None, parser=None):
- # _elementtree.c expects a list, not a deque
- self._events_queue = []
- self._index = 0
- self.root = self._root = None
- if not parser:
- parser = XMLParser(target=TreeBuilder())
- self._parser = parser
- # wire up the parser for event reporting
- if events is None:
- events = ("end",)
- self._parser._setevents(self._events_queue, events)
-
- def data_received(self, data):
- if self._parser is None:
- raise ValueError("data_received() called after end of stream")
- if data:
- try:
- self._parser.feed(data)
- except SyntaxError as exc:
- self._events_queue.append(exc)
-
- def eof_received(self):
- self._root = self._parser.close()
- self._parser = None
- if self._index >= len(self._events_queue):
- self.root = self._root
-
- def events(self):
- events = self._events_queue
- while True:
- index = self._index
- try:
- event = events[self._index]
- # Avoid retaining references to past events
- events[self._index] = None
- except IndexError:
- break
- index += 1
- # Compact the list in a O(1) amortized fashion
- if index * 2 >= len(events):
- events[:index] = []
- self._index = 0
- else:
- self._index = index
- if isinstance(event, Exception):
- raise event
- else:
- yield event
- if self._parser is None:
- self.root = self._root
+ if events is None:
+ events = ("end",)
+ if parser is None:
+ parser = XMLParser(target=TreeBuilder(), collect_events=events)
+ else:
+ # essentially, we assume isinstance(parser, XMLParser)
+ # reconfigure the parser to collect the events we want
+ parser._setevents(parser._events_queue, events)
+ return _IterParseIterator(source, parser, close_source)
class _IterParseIterator:
- def __init__(self, source, events, parser, close_source=False):
- self._parser = IncrementalParser(events, parser)
+ def __init__(self, source, parser, close_source=False):
+ self._error = None
+ # it's a very unfortunate design that we must set self.root at the
+ # end. it severly limits the way this can be implemented.
+ self.root = self._root = None
+ self._parser = parser
self._file = source
self._close_file = close_source
- self.root = None
def __next__(self):
+ if self._parser is None:
+ raise StopIteration
+
while 1:
- for event in self._parser.events():
+ # FIXME: this single-step for-loop has a huge overhead
+ for event in self._parser.read_events():
return event
- if self._parser._parser is None:
- self.root = self._parser.root
- if self._close_file:
- self._file.close()
+
+ if self._error:
+ e = self._error
+ self._error = None
+ raise e
+ if self._file is None:
+ self._parser = None
+ self.root = self._root
raise StopIteration
# load event buffer
data = self._file.read(16384)
if data:
- self._parser.data_received(data)
+ try:
+ self._parser.feed(data)
+ except SyntaxError as exc:
+ self._error = exc
else:
- self._parser.eof_received()
+ self._root = self._parser.close()
+ if self._close_file:
+ self._file.close()
+ self._file = None
def __iter__(self):
return self
@@ -1438,7 +1408,7 @@
"""
- def __init__(self, html=0, target=None, encoding=None):
+ def __init__(self, html=0, target=None, encoding=None, collect_events=None):
try:
from xml.parsers import expat
except ImportError:
@@ -1482,6 +1452,11 @@
parser.StartElementHandler = self._start_list
except AttributeError:
pass
+ # _elementtree.c expects a list, not a deque
+ self._events_queue = []
+ self._events_index = 0
+ if collect_events:
+ self._setevents(self._events_queue, collect_events)
self._doctype = None
self.entity = {}
try:
@@ -1651,6 +1626,38 @@
del self.parser, self._parser
del self.target, self._target
+ def read_events(self):
+ # we avoid using yield here to make sure we free references early
+ return _EventsListIterator(self)
+
+
+class _EventsListIterator:
+ # destructive event iterator -- for internal purposes only
+ def __init__(self, parser):
+ self._parser = parser
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ events = self._parser._events_queue
+ index = self._parser._events_index
+ try:
+ event = events[index]
+ # Avoid retaining references to past events
+ events[index] = None
+ except IndexError:
+ pass
+ else:
+ index += 1
+ # Compact the list in a O(1) amortized fashion
+ if index * 2 >= len(events):
+ del events[:index]
+ index = 0
+ self._parser._events_index = index
+ return event
+ raise StopIteration
+
# Import the C accelerators
try: