diff -r 81b2a30da853 Lib/test/test_sax.py --- a/Lib/test/test_sax.py Sun Jan 20 16:35:09 2013 +0200 +++ b/Lib/test/test_sax.py Sun Jan 20 17:24:13 2013 +0200 @@ -13,7 +13,7 @@ from xml.sax.expatreader import create_parser from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl -from io import StringIO +from io import BytesIO, StringIO from test.support import findfile, run_unittest import unittest @@ -158,31 +158,29 @@ # ===== XMLGenerator -start = '\n' - -class XmlgenTest(unittest.TestCase): +class XmlgenTest: def test_xmlgen_basic(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() gen.startElement("doc", {}) gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), self.xml("")) def test_xmlgen_basic_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() gen.startElement("doc", {}) gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), self.xml("")) def test_xmlgen_content(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -191,10 +189,10 @@ gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "huhei") + self.assertEqual(result.getvalue(), self.xml("huhei")) def test_xmlgen_content_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -203,10 +201,10 @@ gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "huhei") + self.assertEqual(result.getvalue(), self.xml("huhei")) def test_xmlgen_pi(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -215,10 +213,11 @@ gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), + self.xml("")) def test_xmlgen_content_escape(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -228,10 +227,10 @@ gen.endDocument() self.assertEqual(result.getvalue(), - start + "<huhei&") + self.xml("<huhei&")) def test_xmlgen_attr_escape(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -245,13 +244,43 @@ gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + - ("" - "" - "")) + self.assertEqual(result.getvalue(), self.xml( + "" + "" + "")) + + def test_xmlgen_encoding(self): + encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', + 'utf-16', 'utf-16be', 'utf-16le', + 'utf-32', 'utf-32be', 'utf-32le') + for encoding in encodings: + result = self.ioclass() + gen = XMLGenerator(result, encoding=encoding) + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('\u20ac', encoding=encoding)) + + def test_xmlgen_unencodable(self): + result = self.ioclass() + gen = XMLGenerator(result, encoding='ascii') + + gen.startDocument() + gen.startElement("doc", {"a": '\u20ac'}) + gen.characters("\u20ac") + gen.endElement("doc") + gen.endDocument() + + self.assertEqual(result.getvalue(), + self.xml('', encoding='ascii')) def test_xmlgen_ignorable(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -260,10 +289,10 @@ gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + " ") + self.assertEqual(result.getvalue(), self.xml(" ")) def test_xmlgen_ignorable_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -272,10 +301,10 @@ gen.endElement("doc") gen.endDocument() - self.assertEqual(result.getvalue(), start + " ") + self.assertEqual(result.getvalue(), self.xml(" ")) def test_xmlgen_ns(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -288,12 +317,12 @@ gen.endPrefixMapping("ns1") gen.endDocument() - self.assertEqual(result.getvalue(), start + \ - ('' % + self.assertEqual(result.getvalue(), self.xml( + '' % ns_uri)) def test_xmlgen_ns_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -306,12 +335,12 @@ gen.endPrefixMapping("ns1") gen.endDocument() - self.assertEqual(result.getvalue(), start + \ - ('' % + self.assertEqual(result.getvalue(), self.xml( + '' % ns_uri)) def test_1463026_1(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -319,10 +348,10 @@ gen.endElementNS((None, 'a'), 'a') gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_1_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -330,10 +359,10 @@ gen.endElementNS((None, 'a'), 'a') gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_2(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -343,10 +372,10 @@ gen.endPrefixMapping(None) gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_2_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -356,10 +385,10 @@ gen.endPrefixMapping(None) gen.endDocument() - self.assertEqual(result.getvalue(), start+'') + self.assertEqual(result.getvalue(), self.xml('')) def test_1463026_3(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -370,10 +399,10 @@ gen.endDocument() self.assertEqual(result.getvalue(), - start+'') + self.xml('')) def test_1463026_3_empty(self): - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result, short_empty_elements=True) gen.startDocument() @@ -384,7 +413,7 @@ gen.endDocument() self.assertEqual(result.getvalue(), - start+'') + self.xml('')) def test_5027_1(self): # The xml prefix (as in xml:lang below) is reserved and bound by @@ -401,13 +430,13 @@ parser = make_parser() parser.setFeature(feature_namespaces, True) - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) parser.setContentHandler(gen) parser.parse(test_xml) self.assertEqual(result.getvalue(), - start + ( + self.xml( '' 'Hello' '')) @@ -420,7 +449,7 @@ # # This test demonstrates the bug by direct manipulation of the # XMLGenerator. - result = StringIO() + result = self.ioclass() gen = XMLGenerator(result) gen.startDocument() @@ -435,15 +464,60 @@ gen.endDocument() self.assertEqual(result.getvalue(), - start + ( + self.xml( '' 'Hello' '')) + def test_no_close_file(self): + result = self.ioclass() + def func(out): + gen = XMLGenerator(out) + gen.startDocument() + gen.startElement("doc", {}) + func(result) + self.assertFalse(result.closed) + +class StringXmlgenTest(XmlgenTest, unittest.TestCase): + ioclass = StringIO + + def xml(self, doc, encoding='iso-8859-1'): + return '\n%s' % (encoding, doc) + + test_xmlgen_unencodable = None + +class BytesXmlgenTest(XmlgenTest, unittest.TestCase): + ioclass = BytesIO + + def xml(self, doc, encoding='iso-8859-1'): + return ('\n%s' % + (encoding, doc)).encode(encoding, 'xmlcharrefreplace') + +class WriterXmlgenTest(BytesXmlgenTest): + class ioclass(list): + write = list.append + closed = False + + def seekable(self): + return True + + def tell(self): + # return 0 at start and not 0 after start + return len(self) + + def getvalue(self): + return b''.join(self) + + def close(self): + self.closed = True + + +start = b'\n' + class XMLFilterBaseTest(unittest.TestCase): def test_filter_basic(self): - result = StringIO() + result = BytesIO() gen = XMLGenerator(result) filter = XMLFilterBase() filter.setContentHandler(gen) @@ -455,7 +529,7 @@ filter.endElement("doc") filter.endDocument() - self.assertEqual(result.getvalue(), start + "content ") + self.assertEqual(result.getvalue(), start + b"content ") # =========================================================================== # @@ -463,7 +537,7 @@ # # =========================================================================== -with open(TEST_XMLFILE_OUT) as f: +with open(TEST_XMLFILE_OUT, 'rb') as f: xml_test_out = f.read() class ExpatReaderTest(XmlTestBase): @@ -472,11 +546,11 @@ def test_expat_file(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) - with open(TEST_XMLFILE) as f: + with open(TEST_XMLFILE, 'rb') as f: parser.parse(f) self.assertEqual(result.getvalue(), xml_test_out) @@ -517,13 +591,13 @@ def resolveEntity(self, publicId, systemId): inpsrc = InputSource() - inpsrc.setByteStream(StringIO("")) + inpsrc.setByteStream(BytesIO(b"")) return inpsrc def test_expat_entityresolver(self): parser = create_parser() parser.setEntityResolver(self.TestEntityResolver()) - result = StringIO() + result = BytesIO() parser.setContentHandler(XMLGenerator(result)) parser.feed('") + b"") # ===== Attributes support @@ -602,7 +676,7 @@ def test_expat_inpsource_filename(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) @@ -612,7 +686,7 @@ def test_expat_inpsource_sysid(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) @@ -622,12 +696,12 @@ def test_expat_inpsource_stream(self): parser = create_parser() - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) inpsrc = InputSource() - with open(TEST_XMLFILE) as f: + with open(TEST_XMLFILE, 'rb') as f: inpsrc.setByteStream(f) parser.parse(inpsrc) @@ -636,7 +710,7 @@ # ===== IncrementalParser support def test_expat_incremental(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -645,10 +719,10 @@ parser.feed("") parser.close() - self.assertEqual(result.getvalue(), start + "") + self.assertEqual(result.getvalue(), start + b"") def test_expat_incremental_reset(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -656,7 +730,7 @@ parser.feed("") parser.feed("text") - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser.setContentHandler(xmlgen) parser.reset() @@ -666,12 +740,12 @@ parser.feed("") parser.close() - self.assertEqual(result.getvalue(), start + "text") + self.assertEqual(result.getvalue(), start + b"text") # ===== Locator support def test_expat_locator_noinfo(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -685,7 +759,7 @@ self.assertEqual(parser.getLineNumber(), 1) def test_expat_locator_withinfo(self): - result = StringIO() + result = BytesIO() xmlgen = XMLGenerator(result) parser = create_parser() parser.setContentHandler(xmlgen) @@ -706,7 +780,7 @@ parser = create_parser() parser.setContentHandler(ContentHandler()) # do nothing source = InputSource() - source.setByteStream(StringIO("")) #ill-formed + source.setByteStream(BytesIO(b"")) #ill-formed name = "a file name" source.setSystemId(name) try: @@ -797,7 +871,9 @@ def test_main(): run_unittest(MakeParserTest, SaxutilsTest, - XmlgenTest, + StringXmlgenTest, + BytesXmlgenTest, + WriterXmlgenTest, ExpatReaderTest, ErrorReportingTest, XmlReaderTest) diff -r 81b2a30da853 Lib/xml/sax/saxutils.py --- a/Lib/xml/sax/saxutils.py Sun Jan 20 16:35:09 2013 +0200 +++ b/Lib/xml/sax/saxutils.py Sun Jan 20 17:24:13 2013 +0200 @@ -4,18 +4,10 @@ """ import os, urllib.parse, urllib.request +import io from . import handler from . import xmlreader -# See whether the xmlcharrefreplace error handler is -# supported -try: - from codecs import xmlcharrefreplace_errors - _error_handling = "xmlcharrefreplace" - del xmlcharrefreplace_errors -except ImportError: - _error_handling = "strict" - def __dict_replace(s, d): """Replace substrings of a string using a dictionary.""" for key, value in d.items(): @@ -75,15 +67,55 @@ data = '"%s"' % data return data +def _wrap(obj): + class _wrapper: + __class__ = obj.__class__ + def __getattr__(self, name): + return getattr(obj, name) + return _wrapper() + class XMLGenerator(handler.ContentHandler): def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False): + handler.ContentHandler.__init__(self) if out is None: import sys out = sys.stdout - handler.ContentHandler.__init__(self) - self._out = out + elif isinstance(out, io.TextIOBase): + # use a text writer as is + pass + else: + # wrap a binary writer with TextIOWrapper + if isinstance(out, io.BufferedIOBase): + # Keep the original file open when the TextIOWrapper is + # destroyed + out = _wrap(out) + out.close = lambda: None + elif isinstance(out, io.RawIOBase): + out = io.BufferedWriter(out) + # Keep the original file open when the TextIOWrapper is + # destroyed + out.close = lambda: None + else: + # This is to handle passed objects that aren't in the + # IOBase hierarchy, but just have a write method + writer = out + out = io.BufferedIOBase() + out.writable = lambda: True + out.write = writer.write + try: + # TextIOWrapper uses this methods to determine + # if BOM (for UTF-16, etc) should be added + out.seekable = writer.seekable + out.tell = writer.tell + except AttributeError: + pass + out = io.TextIOWrapper(out, encoding=encoding, + errors='xmlcharrefreplace', + newline='\n') + self._write = out.write + self._flush = out.flush self._ns_contexts = [{}] # contains uri -> prefix dicts self._current_context = self._ns_contexts[-1] self._undeclared_ns_maps = [] @@ -91,12 +123,6 @@ self._short_empty_elements = short_empty_elements self._pending_start_element = False - def _write(self, text): - if isinstance(text, str): - self._out.write(text) - else: - self._out.write(text.encode(self._encoding, _error_handling)) - def _qname(self, name): """Builds a qualified name from a (ns_url, localname) pair""" if name[0]: @@ -125,6 +151,9 @@ self._write('\n' % self._encoding) + def endDocument(self): + self._flush() + def startPrefixMapping(self, prefix, uri): self._ns_contexts.append(self._current_context.copy()) self._current_context[uri] = prefix @@ -157,9 +186,9 @@ for prefix, uri in self._undeclared_ns_maps: if prefix: - self._out.write(' xmlns:%s="%s"' % (prefix, uri)) + self._write(' xmlns:%s="%s"' % (prefix, uri)) else: - self._out.write(' xmlns="%s"' % uri) + self._write(' xmlns="%s"' % uri) self._undeclared_ns_maps = [] for (name, value) in attrs.items():