from xml.etree import ElementTree as ET import unittest import sys if sys.version_info[0] < 3: from StringIO import StringIO def next(it): return it.next() else: from io import StringIO xrange = range class TestElementTree(unittest.TestCase): def _create_io(self, buf): buf = [''] + buf return StringIO('\n'.join(buf)) def _check(self, it, event_, tag, text, tail): event, elem = next(it) self.assertEqual(event, event_) self.assertEqual(elem.tag, tag) self.assertEqual(elem.text, text) self.assertEqual(elem.tail, tail) def test_iterparse(self): buf = ['text atext btext ctail ctail dtail b'] it = ET.iterparse(self._create_io(buf), ('start', 'end')) self._check(it, 'start', 'a', 'text a', None) self._check(it, 'start', 'b', 'text b', 'tail b') self._check(it, 'start', 'c', 'text c', 'tail c') self._check(it, 'end', 'c', 'text c', 'tail c') self._check(it, 'start', 'd', None, 'tail d') self._check(it, 'end', 'd', None, 'tail d') self._check(it, 'end', 'b', 'text b', 'tail b') self._check(it, 'end', 'a', 'text a', None) self.assertRaises(StopIteration, lambda: next(it)) def test_cross_boundary(self): text = 'This is a very long string that will cross the page bounderies of 8K' buf = [''] for _ in xrange(214): # this number depends on the chunk size in iterparse.next buf.append(' ' + text + '') buf.append('') for event, elem in ET.iterparse(self._create_io(buf), events=('start',)): if elem.tag == 'b': self.assertEqual(elem.text, text) if __name__ == '__main__': unittest.main()