# HG changeset patch # Parent 472219ffa1d7d6fe01f2e8686d8836c728a7433f Closes #2892: preserve iterparse events in case of SyntaxError. diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -754,6 +754,7 @@ ... print(action, elem.tag) ... except ET.ParseError as v: ... print(v) + end document junk after document element: line 1, column 12 """ diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1250,6 +1250,7 @@ self._close_file = close_source self._events = [] self._index = 0 + self._error = None self.root = self._root = None self._parser = parser # wire up the parser for event reporting @@ -1291,24 +1292,31 @@ while 1: try: item = self._events[self._index] + self._index += 1 + return item except IndexError: - if self._parser is None: - self.root = self._root - if self._close_file: - self._file.close() - raise StopIteration - # load event buffer - del self._events[:] - self._index = 0 - data = self._file.read(16384) - if data: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + if self._close_file: + self._file.close() + raise StopIteration + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: self._parser.feed(data) - else: - self._root = self._parser.close() - self._parser = None + except SyntaxError as exc: + self._error = exc else: - self._index = self._index + 1 - return item + self._root = self._parser.close() + self._parser = None def __iter__(self): return self diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3016,6 +3016,7 @@ " self._file = file\n" " self._events = []\n" " self._index = 0\n" + " self._error = None\n" " self.root = self._root = None\n" " b = cElementTree.TreeBuilder()\n" " self._parser = cElementTree.XMLParser(b)\n" @@ -3024,24 +3025,31 @@ " while 1:\n" " try:\n" " item = self._events[self._index]\n" + " self._index += 1\n" + " return item\n" " except IndexError:\n" - " if self._parser is None:\n" - " self.root = self._root\n" - " if self._close_file:\n" - " self._file.close()\n" - " raise StopIteration\n" - " # load event buffer\n" - " del self._events[:]\n" - " self._index = 0\n" - " data = self._file.read(16384)\n" - " if data:\n" + " pass\n" + " if self._error:\n" + " e = self._error\n" + " self._error = None\n" + " raise e\n" + " if self._parser is None:\n" + " self.root = self._root\n" + " if self._close_file:\n" + " self._file.close()\n" + " raise StopIteration\n" + " # load event buffer\n" + " del self._events[:]\n" + " self._index = 0\n" + " data = self._file.read(16384)\n" + " if data:\n" + " try:\n" " self._parser.feed(data)\n" - " else:\n" - " self._root = self._parser.close()\n" - " self._parser = None\n" + " except SyntaxError as exc:\n" + " self._error = exc\n" " else:\n" - " self._index = self._index + 1\n" - " return item\n" + " self._root = self._parser.close()\n" + " self._parser = None\n" " def __iter__(self):\n" " return self\n" "cElementTree.iterparse = iterparse\n"