diff -r afc21da5935f Doc/library/xml.sax.reader.rst --- a/Doc/library/xml.sax.reader.rst Thu Mar 26 08:51:33 2015 +0200 +++ b/Doc/library/xml.sax.reader.rst Thu Mar 26 09:24:01 2015 +0200 @@ -100,8 +100,10 @@ The :class:`XMLReader` interface support system identifier (a string identifying the input source -- typically a file name or an URL), a file-like object, or an :class:`InputSource` object. When :meth:`parse` returns, the input is completely processed, and the parser object - can be discarded or reset. As a limitation, the current implementation only - accepts byte streams; processing of character streams is for further study. + can be discarded or reset. + + .. versionchanged:: 3.5 + Added support of character streams. .. method:: XMLReader.getContentHandler() @@ -288,8 +290,7 @@ InputSource Objects .. method:: InputSource.setByteStream(bytefile) - Set the byte stream (a Python file-like object which does not perform - byte-to-character conversion) for this input source. + Set the byte stream (a :term:`binary file`) for this input source. The SAX parser will ignore this if there is also a character stream specified, but it will use a byte stream in preference to opening a URI connection itself. @@ -308,8 +309,7 @@ InputSource Objects .. method:: InputSource.setCharacterStream(charfile) - Set the character stream for this input source. (The stream must be a Python 1.6 - Unicode-wrapped file-like that performs conversion to strings.) + Set the character stream (a :term:`text file`) for this input source. If there is a character stream specified, the SAX parser will ignore any byte stream and will not attempt to open a URI connection to the system identifier. diff -r afc21da5935f Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Thu Mar 26 08:51:33 2015 +0200 +++ b/Doc/whatsnew/3.5.rst Thu Mar 26 09:24:01 2015 +0200 @@ -440,6 +440,13 @@ xmlrpc * :class:`xmlrpc.client.ServerProxy` is now a :term:`context manager`. (Contributed by Claudiu Popa in :issue:`20627`.) +xml.sax +------- + +* SAX parsers now support a character stream of + :class:`~xml.sax.xmlreader.InputSource` object. + (Contributed by Serhiy Storchaka in :issue:`2175`.) + faulthandler ------------ diff -r afc21da5935f Lib/test/test_minidom.py --- a/Lib/test/test_minidom.py Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/test/test_minidom.py Thu Mar 26 09:24:01 2015 +0200 @@ -49,8 +49,14 @@ class MinidomTest(unittest.TestCase): t = node.wholeText self.confirm(t == s, "looking for %r, found %r" % (s, t)) - def testParseFromFile(self): - with open(tstfile) as file: + def testParseFromBinaryFile(self): + with open(tstfile, 'rb') as file: + dom = parse(file) + dom.unlink() + self.confirm(isinstance(dom, Document)) + + def testParseFromTextFile(self): + with open(tstfile, 'r', encoding='iso-8859-1') as file: dom = parse(file) dom.unlink() self.confirm(isinstance(dom, Document)) diff -r afc21da5935f Lib/test/test_sax.py --- a/Lib/test/test_sax.py Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/test/test_sax.py Thu Mar 26 09:24:01 2015 +0200 @@ -10,7 +10,7 @@ except SAXReaderNotAvailable: # don't try to test this module if we cannot create a parser raise unittest.SkipTest("no XML parsers available") from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ - XMLFilterBase + XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl @@ -172,6 +172,81 @@ class SaxutilsTest(unittest.TestCase): p = make_parser(['xml.parsers.no_such_parser']) +class PrepareInputSourceTest(unittest.TestCase): + + # Fixture methods + def setUp(self): + self.file = support.TESTFN + with open(self.file, "w") as tmp: + tmp.write("This was read from a file.") + + def tearDown(self): + support.unlink(self.file) + + def make_byte_stream(self): + return BytesIO(b"This is a byte stream.") + + def make_character_stream(self): + return StringIO("This is a character stream.") + + def checkContent(self, stream, content): + self.assertIsNotNone(stream) + self.assertEqual(stream.read(), content) + stream.close() + + # The tests + def test_character_stream(self): + # If the source is an InputSource with a character stream, use it. + src = InputSource(self.file) + src.setCharacterStream(self.make_character_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getByteStream()) + self.checkContent(prep.getCharacterStream(), + "This is a character stream.") + + def test_byte_stream(self): + # If the source is an InputSource that does not have a character + # stream but does have a byte stream, use the byte stream. + src = InputSource(self.file) + src.setByteStream(self.make_byte_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_system_id(self): + # If the source is an InputSource that has neither a character + # stream nor a byte stream, open the system ID. + src = InputSource(self.file) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_string(self): + # If the source is a string, use it as a system ID and open it. + prep = prepare_input_source(self.file) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_binary_file(self): + # If the source is a binary file-like object, use it as a byte + # stream. + prep = prepare_input_source(self.make_byte_stream()) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_text_file(self): + # If the source is a text file-like object, use it as a character + # stream. + prep = prepare_input_source(self.make_character_stream()) + self.assertIsNone(prep.getByteStream()) + self.checkContent(prep.getCharacterStream(), + "This is a character stream.") + + # ===== XMLGenerator class XmlgenTest: @@ -622,7 +697,7 @@ class ExpatReaderTest(XmlTestBase): # ===== XMLReader support - def test_expat_file(self): + def test_expat_binary_file(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) @@ -633,8 +708,19 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_text_file(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + @requires_nonascii_filenames - def test_expat_file_nonascii(self): + def test_expat_binary_file_nonascii(self): fname = support.TESTFN_UNICODE shutil.copyfile(TEST_XMLFILE, fname) self.addCleanup(support.unlink, fname) @@ -644,7 +730,7 @@ class ExpatReaderTest(XmlTestBase): xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) - parser.parse(open(fname)) + parser.parse(open(fname, 'rb')) self.assertEqual(result.getvalue(), xml_test_out) @@ -826,7 +912,7 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), xml_test_out) - def test_expat_inpsource_stream(self): + def test_expat_inpsource_byte_stream(self): parser = create_parser() result = BytesIO() xmlgen = XMLGenerator(result) @@ -839,6 +925,19 @@ class ExpatReaderTest(XmlTestBase): self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_inpsource_character_stream(self): + parser = create_parser() + result = BytesIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + inpsrc.setCharacterStream(f) + parser.parse(inpsrc) + + self.assertEqual(result.getvalue(), xml_test_out) + # ===== IncrementalParser support def test_expat_incremental(self): @@ -1018,6 +1117,7 @@ class XmlReaderTest(XmlTestBase): def test_main(): run_unittest(MakeParserTest, SaxutilsTest, + PrepareInputSourceTest, StringXmlgenTest, BytesXmlgenTest, WriterXmlgenTest, diff -r afc21da5935f Lib/test/xmltestdata/test.xml --- a/Lib/test/xmltestdata/test.xml Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/test/xmltestdata/test.xml Thu Mar 26 09:24:01 2015 +0200 @@ -1,4 +1,4 @@ - + Introduction to XSL

Introduction to XSL

@@ -110,6 +110,6 @@ - +µ diff -r afc21da5935f Lib/test/xmltestdata/test.xml.out --- a/Lib/test/xmltestdata/test.xml.out Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/test/xmltestdata/test.xml.out Thu Mar 26 09:24:01 2015 +0200 @@ -110,6 +110,6 @@ - +µ \ No newline at end of file diff -r afc21da5935f Lib/xml/sax/expatreader.py --- a/Lib/xml/sax/expatreader.py Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/xml/sax/expatreader.py Thu Mar 26 09:24:01 2015 +0200 @@ -219,9 +219,14 @@ class ExpatParser(xmlreader.IncrementalP self._parsing = 0 # break cycle created by expat handlers pointing to our methods self._parser = None - bs = self._source.getByteStream() - if bs is not None: - bs.close() + try: + file = self._source.getCharacterStream() + if file is not None: + file.close() + finally: + file = self._source.getByteStream() + if file is not None: + file.close() def _reset_cont_handler(self): self._parser.ProcessingInstructionHandler = \ diff -r afc21da5935f Lib/xml/sax/saxutils.py --- a/Lib/xml/sax/saxutils.py Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/xml/sax/saxutils.py Thu Mar 26 09:24:01 2015 +0200 @@ -345,11 +345,14 @@ def prepare_input_source(source, base="" elif hasattr(source, "read"): f = source source = xmlreader.InputSource() - source.setByteStream(f) + if isinstance(f.read(0), str): + source.setCharacterStream(f) + else: + source.setByteStream(f) if hasattr(f, "name") and isinstance(f.name, str): source.setSystemId(f.name) - if source.getByteStream() is None: + if source.getCharacterStream() is None and source.getByteStream() is None: sysid = source.getSystemId() basehead = os.path.dirname(os.path.normpath(base)) sysidfilename = os.path.join(basehead, sysid) diff -r afc21da5935f Lib/xml/sax/xmlreader.py --- a/Lib/xml/sax/xmlreader.py Thu Mar 26 08:51:33 2015 +0200 +++ b/Lib/xml/sax/xmlreader.py Thu Mar 26 09:24:01 2015 +0200 @@ -117,7 +117,9 @@ class IncrementalParser(XMLReader): source = saxutils.prepare_input_source(source) self.prepareParser(source) - file = source.getByteStream() + file = source.getCharacterStream() + if file is None: + file = source.getByteStream() buffer = file.read(self._bufsize) while buffer: self.feed(buffer) diff -r afc21da5935f Misc/NEWS --- a/Misc/NEWS Thu Mar 26 08:51:33 2015 +0200 +++ b/Misc/NEWS Thu Mar 26 09:24:01 2015 +0200 @@ -30,6 +30,8 @@ Core and Builtins Library ------- +- Issue #2175: SAX parsers now support a character stream of InputSource object. + - Issue #23775: pprint() of OrderedDict now outputs the same representation as repr().