diff -r b1bbe519770b Lib/test/test_sax.py --- a/Lib/test/test_sax.py Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/test/test_sax.py Wed Feb 13 19:41:46 2013 +0200 @@ -9,7 +9,7 @@ # don't try to test this module if we cannot create a parser raise ImportError("no XML parsers available") from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ - XMLFilterBase + XMLFilterBase, prepare_input_source from xml.sax.expatreader import create_parser from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl @@ -18,7 +18,7 @@ import os.path import shutil import test.test_support as support -from test.test_support import findfile, run_unittest +from test.test_support import findfile, run_unittest, TESTFN import unittest TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") @@ -167,6 +167,81 @@ p = make_parser(['xml.parsers.no_such_parser']) +class PrepareInputSourceTest(unittest.TestCase): + + # Fixture methods + def setUp(self): + self.file = TESTFN + with open(self.file, "w") as tmp: + tmp.write("This was read from a file.") + + def tearDown(self): + support.unlink(self.file) + + def make_byte_stream(self): + return io.BytesIO(b"This is a byte stream.") + + def make_character_stream(self): + return io.StringIO(u"This is a character stream.") + + def checkContent(self, stream, content): + self.assertIsNotNone(stream) + self.assertEqual(stream.read(), content) + stream.close() + + # The tests + def test_character_stream(self): + '''If the source is an InputSource with a character stream, use it.''' + src = InputSource(self.file) + src.setCharacterStream(self.make_character_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getByteStream()) + self.checkContent(prep.getCharacterStream(), + u"This is a character stream.") + + def test_byte_stream(self): + '''If the source is an InputSource that does not have a character + stream but does have a byte stream, use the byte stream.''' + src = InputSource(self.file) + src.setByteStream(self.make_byte_stream()) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_system_id(self): + '''If the source is an InputSource that has neither a character + stream nor a byte stream, open the system ID.''' + src = InputSource(self.file) + prep = prepare_input_source(src) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_string(self): + '''If the source is a string, use it as a system ID and open it.''' + prep = prepare_input_source(self.file) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This was read from a file.") + + def test_binary_file(self): + '''If the source is a binary file-like object, use it as a byte + stream.''' + prep = prepare_input_source(self.make_byte_stream()) + self.assertIsNone(prep.getCharacterStream()) + self.checkContent(prep.getByteStream(), + b"This is a byte stream.") + + def test_text_file(self): + '''If the source is a text file-like object, use it as a character + stream.''' + prep = prepare_input_source(self.make_character_stream()) + self.assertIsNone(prep.getByteStream()) + self.checkContent(prep.getCharacterStream(), + u"This is a character stream.") + + # ===== XMLGenerator start = '\n' @@ -446,7 +521,7 @@ # ===== XMLReader support - def test_expat_file(self): + def test_expat_binary_file(self): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) @@ -456,6 +531,17 @@ self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_text_file(self): + parser = create_parser() + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + with io.open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + parser.parse(f) + + self.assertEqual(result.getvalue(), xml_test_out) + @requires_unicode_filenames def test_expat_file_unicode(self): fname = support.TESTFN_UNICODE @@ -625,7 +711,7 @@ self.assertEqual(result.getvalue(), xml_test_out) - def test_expat_inpsource_stream(self): + def test_expat_inpsource_byte_stream(self): parser = create_parser() result = StringIO() xmlgen = XMLGenerator(result) @@ -637,6 +723,19 @@ self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_inpsource_character_stream(self): + parser = create_parser() + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + with io.open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: + inpsrc.setCharacterStream(f) + parser.parse(inpsrc) + + self.assertEqual(result.getvalue(), xml_test_out) + # ===== IncrementalParser support def test_expat_incremental(self): @@ -861,6 +960,7 @@ def test_main(): run_unittest(MakeParserTest, SaxutilsTest, + PrepareInputSourceTest, StringXmlgenTest, BytesIOXmlgenTest, WriterXmlgenTest, diff -r b1bbe519770b Lib/test/xmltestdata/test.xml --- a/Lib/test/xmltestdata/test.xml Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/test/xmltestdata/test.xml Wed Feb 13 19:41:46 2013 +0200 @@ -1,4 +1,4 @@ - + Introduction to XSL

Introduction to XSL

@@ -110,6 +110,6 @@ - +µ diff -r b1bbe519770b Lib/test/xmltestdata/test.xml.out --- a/Lib/test/xmltestdata/test.xml.out Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/test/xmltestdata/test.xml.out Wed Feb 13 19:41:46 2013 +0200 @@ -110,6 +110,6 @@ - +µ \ No newline at end of file diff -r b1bbe519770b Lib/xml/sax/expatreader.py --- a/Lib/xml/sax/expatreader.py Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/xml/sax/expatreader.py Wed Feb 13 19:41:46 2013 +0200 @@ -202,6 +202,8 @@ self._parsing = 1 self._cont_handler.startDocument() + if isinstance(data, unicode): + data = data.encode('utf-8') try: # The isFinal parameter is internal to the expat reader. # If it is set to true, expat will check validity of the entire @@ -222,6 +224,14 @@ self._parsing = 0 # break cycle created by expat handlers pointing to our methods self._parser = None + try: + file = self._source.getCharacterStream() + if file is not None: + file.close() + finally: + file = self._source.getByteStream() + if file is not None: + file.close() def _reset_cont_handler(self): self._parser.ProcessingInstructionHandler = \ @@ -245,14 +255,18 @@ parser.EndDoctypeDeclHandler = lex.endDTD def reset(self): + if self._source.getCharacterStream() is None: + encoding = self._source.getEncoding() + else: + encoding = 'UTF-8' if self._namespaces: - self._parser = expat.ParserCreate(self._source.getEncoding(), " ", + self._parser = expat.ParserCreate(encoding, " ", intern=self._interning) self._parser.namespace_prefixes = 1 self._parser.StartElementHandler = self.start_element_ns self._parser.EndElementHandler = self.end_element_ns else: - self._parser = expat.ParserCreate(self._source.getEncoding(), + self._parser = expat.ParserCreate(encoding, intern = self._interning) self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element diff -r b1bbe519770b Lib/xml/sax/saxutils.py --- a/Lib/xml/sax/saxutils.py Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/xml/sax/saxutils.py Wed Feb 13 19:41:46 2013 +0200 @@ -305,11 +305,14 @@ elif hasattr(source, "read"): f = source source = xmlreader.InputSource() - source.setByteStream(f) + if isinstance(f.read(0), unicode): + source.setCharacterStream(f) + else: + source.setByteStream(f) if hasattr(f, "name"): source.setSystemId(f.name) - if source.getByteStream() is None: + if source.getCharacterStream() is None and source.getByteStream() is None: try: sysid = source.getSystemId() basehead = os.path.dirname(os.path.normpath(base)) diff -r b1bbe519770b Lib/xml/sax/xmlreader.py --- a/Lib/xml/sax/xmlreader.py Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/xml/sax/xmlreader.py Wed Feb 13 19:41:46 2013 +0200 @@ -117,7 +117,9 @@ source = saxutils.prepare_input_source(source) self.prepareParser(source) - file = source.getByteStream() + file = source.getCharacterStream() + if file is None: + file = source.getByteStream() buffer = file.read(self._bufsize) while buffer != "": self.feed(buffer) diff -r b1bbe519770b Misc/NEWS --- a/Misc/NEWS Wed Feb 13 12:05:14 2013 +0000 +++ b/Misc/NEWS Wed Feb 13 19:41:46 2013 +0200 @@ -205,6 +205,8 @@ Library ------- +- Issue #2174: SAX parsers now support a character stream of InputSource object. + - Issue #11311: StringIO.readline(0) now returns an empty string as all other file-like objects.