Index: Lib/test/test_pulldom.py =================================================================== --- Lib/test/test_pulldom.py (revision 0) +++ Lib/test/test_pulldom.py (revision 0) @@ -0,0 +1,339 @@ +from io import StringIO +import xml.sax +from xml.sax.xmlreader import AttributesImpl +from xml.dom import pulldom + +from test.support import run_unittest, findfile +import unittest + +tstfile = findfile("test.xml", subdir="xmltestdata") + +# A handy XML snippet, containing attributes, a namespace prefix, and a +# self-closing tag: +SMALL_SAMPLE = """ + + +Introduction to XSL +
+

A. Namespace

+""" + + +class PullDOMTestCase(unittest.TestCase): + def test_parse(self): + """Minimal test of DOMEventStream.parse()""" + + # This just tests that parsing from a stream works. Actual parser + # semantics are tested using parseString with a more focused XML + # fragment. + + # Test with a filename: + items = pulldom.parse(tstfile) + for _ in items: + pass + + # Test with a file object: + with open(tstfile, 'rb') as fin: + items = pulldom.parse(fin) + for _ in items: + pass + + def test_parse_semantics(self): + """Test DOMEventStream parsing semantics.""" + + items = pulldom.parseString(SMALL_SAMPLE) + evt, node = next(items) + # Just check the node is a Document: + self.assertIn('createElement', dir(node)) + self.assertEqual(pulldom.START_DOCUMENT, evt) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("html", node.tagName) + self.assertEqual(2, len(node.attributes)) + self.assertEqual(node.attributes.getNamedItem('xmlns:xdc').value, + "http://www.xml.com/books") + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) # Line break + evt, node = next(items) + # XXX - A comment should be reported here! + # self.assertEqual(pulldom.COMMENT, evt) + # Line break after swallowed comment: + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual("title", node.tagName) + title_node = node + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + self.assertEqual("Introduction to XSL", node.data) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("title", node.tagName) + self.assertTrue(title_node is node) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("hr", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("hr", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("p", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual("xdc:author", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual("xdc:author", node.tagName) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + evt, node = next(items) + self.assertEqual(pulldom.CHARACTERS, evt) + evt, node = next(items) + self.assertEqual(pulldom.END_ELEMENT, evt) + # XXX No END_DOCUMENT item is ever obtained: + #evt, node = next(items) + #self.assertEqual(pulldom.END_DOCUMENT, evt) + + def test_expandItem(self): + """Ensure expandItem works as expected.""" + items = pulldom.parseString(SMALL_SAMPLE) + # Loop through the nodes until we get to a 'title' start tag: + for evt, item in items: + if evt == pulldom.START_ELEMENT and item.tagName == "title": + items.expandNode(item) + self.assertEqual(1, len(item.childNodes)) + break + else: + self.fail("No 'title' element detected in SMALL_SAMPLE!") + # Loop until we get to the next start-element: + for evt, node in items: + if evt == pulldom.START_ELEMENT: break + self.assertEqual("hr", node.tagName, + "expandNode did not leave DOMEventStream in the correct state.") + # Attempt to expand a standalone element: + items.expandNode(node) + self.assertEqual(next(items)[0], pulldom.CHARACTERS) + evt, node = next(items) + self.assertEqual(node.tagName, "p") + items.expandNode(node) + next(items) # Skip character data + evt, node = next(items) + self.assertEqual(node.tagName, "html") + with self.assertRaises(StopIteration): + next(items) + items.clear() + self.assertIsNone(items.parser) + self.assertIsNone(items.stream) + + @unittest.expectedFailure + def test_comment(self): + '''PullDOM does not receive 'comment' events.''' + items = pulldom.parseString(SMALL_SAMPLE) + for evt, _ in items: + if evt == pulldom.COMMENT: + break + else: + self.fail("No comment was encountered") + + @unittest.expectedFailure + def test_end_document(self): + '''PullDOM does not receive 'end-document' events.''' + items = pulldom.parseString(SMALL_SAMPLE) + # Read all of the nodes up to and including : + for evt, node in items: + if evt == pulldom.END_ELEMENT and node.tagName == "html": + break + try: + # Assert that the next node is END_DOCUMENT: + evt, node = next(items) + self.assertEqual(pulldom.END_DOCUMENT, evt) + except StopIteration: + self.fail( + "Ran out of events, but should have received END_DOCUMENT") + + +class ThoroughTestCase(unittest.TestCase): + '''Test the hard-to-reach parts of pulldom.''' + + def test_thorough_parse(self): + '''Test some of the hard-to-reach parts of PullDOM.''' + self._test_thorough(pulldom.parse(None, parser=SAXExerciser())) + + @unittest.expectedFailure + def test_sax2dom_fail(self): + '''SAX2DOM can't handle a PI before the root element.''' + pd = SAX2DOMTestHelper(None, SAXExerciser(), 12) + self._test_thorough(pd) + + def test_thorough_sax2dom(self): + '''Test some of the hard-to-reach parts of SAX2DOM.''' + pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12) + self._test_thorough(pd, False) + + def _test_thorough(self, pd, before_root=True): + '''Test some of the hard-to-reach parts of the parser, using a mock + parser.''' + + evt, node = next(pd) + self.assertEqual(pulldom.START_DOCUMENT, evt) + # Just check the node is a Document: + self.assertIn('createElement', dir(node)) + + if before_root: + evt, node = next(pd) + self.assertEqual(pulldom.COMMENT, evt) + self.assertEqual('a comment', node.data) + evt, node = next(pd) + self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) + self.assertEqual('target', node.target) + self.assertEqual('data', node.data) + + evt, node = next(pd) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual('html', node.tagName) + + evt, node = next(pd) + self.assertEqual(pulldom.COMMENT, evt) + self.assertEqual('a comment', node.data) + evt, node = next(pd) + self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) + self.assertEqual('target', node.target) + self.assertEqual('data', node.data) + + evt, node = next(pd) + self.assertEqual(pulldom.START_ELEMENT, evt) + self.assertEqual('p', node.tagName) + + evt, node = next(pd) + self.assertEqual(pulldom.CHARACTERS, evt) + self.assertEqual('text', node.data) + evt, node = next(pd) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual('p', node.tagName) + evt, node = next(pd) + self.assertEqual(pulldom.END_ELEMENT, evt) + self.assertEqual('html', node.tagName) + evt, node = next(pd) + self.assertEqual(pulldom.END_DOCUMENT, evt) + + +class SAXExerciser(object): + ''''A fake sax parser that calls some of the harder-to-reach sax methods to + ensure it emits the correct events''' + def setContentHandler(self, handler): + self._handler = handler + + def parse(self, _): + h = self._handler + h.startDocument() + + # The next two items ensure that items preceding the first + # start_element are properly stored and emitted: + h.comment('a comment') + h.processingInstruction('target', 'data') + + h.startElement('html', AttributesImpl({})) + + h.comment('a comment') + h.processingInstruction('target', 'data') + + h.startElement('p', AttributesImpl({'class': 'paraclass'})) + h.characters('text') + h.endElement('p') + h.endElement('html') + h.endDocument() + + def stub(self, *args, **kwargs): + '''Stub method. Does nothing.''' + pass + setProperty = stub + setFeature = stub + + +class SAX2DOMExerciser(SAXExerciser): + '''The same as SAXExerciser, but without the processing instruction and + comment before the root element, because S2D can't handle it''' + + def parse(self, _): + h = self._handler + h.startDocument() + h.startElement('html', AttributesImpl({})) + h.comment('a comment') + h.processingInstruction('target', 'data') + h.startElement('p', AttributesImpl({'class': 'paraclass'})) + h.characters('text') + h.endElement('p') + h.endElement('html') + h.endDocument() + + +class SAX2DOMTestHelper(pulldom.DOMEventStream): + """Allows us to drive SAX2DOM from a DOMEventStream.""" + def reset(self): + self.pulldom = pulldom.SAX2DOM() + # This content handler relies on namespace support + self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) + self.parser.setContentHandler(self.pulldom) + + +class SAX2DOMTestCase(unittest.TestCase): + def confirm(self, test, testname = "Test"): + self.assertTrue(test, testname) + + def test_basic(self): + '''Ensure SAX2DOM can parse from a stream.''' + with StringIO(SMALL_SAMPLE) as fin: + sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), + len(SMALL_SAMPLE)) + for evt, node in sd: + if evt == pulldom.START_ELEMENT and node.tagName == 'html': + break + # Because the buffer is the same length as the XML, all the + # nodes should have been parsed and added: + self.assertGreater(len(node.childNodes), 0) + + def testSAX2DOM(self): + '''Ensure SAX2DOM expands nodes as expected.''' + sax2dom = pulldom.SAX2DOM() + sax2dom.startDocument() + sax2dom.startElement("doc", {}) + sax2dom.characters("text") + sax2dom.startElement("subelm", {}) + sax2dom.characters("text") + sax2dom.endElement("subelm") + sax2dom.characters("text") + sax2dom.endElement("doc") + sax2dom.endDocument() + + doc = sax2dom.document + root = doc.documentElement + (text1, elm1, text2) = root.childNodes + text3 = elm1.childNodes[0] + + self.confirm(text1.previousSibling is None and + text1.nextSibling is elm1 and + elm1.previousSibling is text1 and + elm1.nextSibling is text2 and + text2.previousSibling is elm1 and + text2.nextSibling is None and + text3.previousSibling is None and + text3.nextSibling is None, "testSAX2DOM - siblings") + + self.confirm(root.parentNode is doc and + text1.parentNode is root and + elm1.parentNode is root and + text2.parentNode is root and + text3.parentNode is elm1, "testSAX2DOM - parents") + doc.unlink() + +def test_main(): + run_unittest(PullDOMTestCase, SAX2DOMTestCase, ThoroughTestCase) + +if __name__ == "__main__": + test_main() \ No newline at end of file Index: Lib/test/test_minidom.py =================================================================== --- Lib/test/test_minidom.py (revision 83132) +++ Lib/test/test_minidom.py (working copy) @@ -4,9 +4,7 @@ from test.support import verbose, run_unittest, findfile import unittest -import xml.dom import xml.dom.minidom -import xml.parsers.expat from xml.dom.minidom import parse, Node, Document, parseString from xml.dom.minidom import getDOMImplementation @@ -14,7 +12,6 @@ tstfile = findfile("test.xml", subdir="xmltestdata") - # The tests of DocumentType importing use these helpers to construct # the documents to work with, since not all DOM builders actually # create the DocumentType nodes. @@ -1000,41 +997,6 @@ "test NodeList.item()") doc.unlink() - def testSAX2DOM(self): - from xml.dom import pulldom - - sax2dom = pulldom.SAX2DOM() - sax2dom.startDocument() - sax2dom.startElement("doc", {}) - sax2dom.characters("text") - sax2dom.startElement("subelm", {}) - sax2dom.characters("text") - sax2dom.endElement("subelm") - sax2dom.characters("text") - sax2dom.endElement("doc") - sax2dom.endDocument() - - doc = sax2dom.document - root = doc.documentElement - (text1, elm1, text2) = root.childNodes - text3 = elm1.childNodes[0] - - self.confirm(text1.previousSibling is None and - text1.nextSibling is elm1 and - elm1.previousSibling is text1 and - elm1.nextSibling is text2 and - text2.previousSibling is elm1 and - text2.nextSibling is None and - text3.previousSibling is None and - text3.nextSibling is None, "testSAX2DOM - siblings") - - self.confirm(root.parentNode is doc and - text1.parentNode is root and - elm1.parentNode is root and - text2.parentNode is root and - text3.parentNode is elm1, "testSAX2DOM - parents") - doc.unlink() - def testEncodings(self): doc = parseString('') self.assertEqual(doc.toxml(), @@ -1480,6 +1442,7 @@ doc = create_doc_without_doctype() doc.appendChild(doc.createComment("foo--bar")) self.assertRaises(ValueError, doc.toxml) + def test_main(): run_unittest(MinidomTest)