diff -r cf70f030a744 Lib/test/test_sax.py --- a/Lib/test/test_sax.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_sax.py Mon Jun 23 21:36:44 2014 +0300 @@ -239,6 +239,7 @@ "" "")) + @support.requires_unicode def test_xmlgen_encoding(self): encodings = ('iso-8859-15', 'utf-8', 'utf-16be', 'utf-16le', @@ -248,23 +249,24 @@ gen = XMLGenerator(result, encoding=encoding) gen.startDocument() - gen.startElement("doc", {"a": u'\u20ac'}) - gen.characters(u"\u20ac") + gen.startElement("doc", {"a": unichr(0x20AC)}) + gen.characters(unichr(0x20AC)) gen.endElement("doc") gen.endDocument() self.assertEqual(result.getvalue(), ( u'\n' - u'\u20ac' % encoding + u'%c' % (encoding, 0x20AC, 0x20AC) ).encode(encoding, 'xmlcharrefreplace')) + @support.requires_unicode def test_xmlgen_unencodable(self): result = self.ioclass() gen = XMLGenerator(result, encoding='ascii') gen.startDocument() - gen.startElement("doc", {"a": u'\u20ac'}) - gen.characters(u"\u20ac") + gen.startElement("doc", {"a": unichr(0x20AC)}) + gen.characters(unichr(0x20AC)) gen.endElement("doc") gen.endDocument() @@ -284,6 +286,7 @@ self.assertEqual(result.getvalue(), start + " ") + @support.requires_unicode def test_xmlgen_encoding_bytes(self): encodings = ('iso-8859-15', 'utf-8', 'utf-16be', 'utf-16le', @@ -293,15 +296,15 @@ gen = XMLGenerator(result, encoding=encoding) gen.startDocument() - gen.startElement("doc", {"a": u'\u20ac'}) - gen.characters(u"\u20ac".encode(encoding)) + gen.startElement("doc", {"a": unichr(0x20AC)}) + gen.characters(unichr(0x20AC).encode(encoding)) gen.ignorableWhitespace(" ".encode(encoding)) gen.endElement("doc") gen.endDocument() self.assertEqual(result.getvalue(), ( u'\n' - u'\u20ac ' % encoding + u'%c ' % (encoding, 0x20AC, 0x20AC) ).encode(encoding, 'xmlcharrefreplace')) def test_xmlgen_ns(self): diff -r cf70f030a744 Lib/xml/sax/expatreader.py --- a/Lib/xml/sax/expatreader.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/xml/sax/expatreader.py Mon Jun 23 21:36:44 2014 +0300 @@ -43,6 +43,14 @@ _mkproxy = weakref.proxy del weakref, _weakref +try: + _unicode = unicode +except NameError: + # If Python is built without Unicode support, the unicode type + # will not exist. Fake one. + class _unicode(object): + pass + # --- ExpatLocator class ExpatLocator(xmlreader.Locator): @@ -109,7 +117,7 @@ def prepareParser(self, source): if source.getSystemId() is not None: base = source.getSystemId() - if isinstance(base, unicode): + if isinstance(base, _unicode): base = base.encode('utf-8') self._parser.SetBase(base) diff -r cf70f030a744 Lib/xml/sax/saxutils.py --- a/Lib/xml/sax/saxutils.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/xml/sax/saxutils.py Mon Jun 23 21:36:44 2014 +0300 @@ -10,10 +10,13 @@ import xmlreader try: + _UnicodeType = types.UnicodeType _StringTypes = [types.StringType, types.UnicodeType] except AttributeError: + _UnicodeType = types.StringType _StringTypes = [types.StringType] + def __dict_replace(s, d): """Replace substrings of a string using a dictionary.""" for key, value in d.items(): @@ -97,6 +100,10 @@ buffer.tell = out.tell except AttributeError: pass + try: + unicode + except NameError: + return buffer # wrap a binary writer with TextIOWrapper class UnbufferedTextIOWrapper(io.TextIOWrapper): def write(self, s): @@ -180,12 +187,12 @@ self._write(u'' % self._qname(name)) def characters(self, content): - if not isinstance(content, unicode): + if not isinstance(content, _UnicodeType): content = unicode(content, self._encoding) self._write(escape(content)) def ignorableWhitespace(self, content): - if not isinstance(content, unicode): + if not isinstance(content, _UnicodeType): content = unicode(content, self._encoding) self._write(content) @@ -321,19 +328,23 @@ try: sysid = source.getSystemId() basehead = os.path.dirname(os.path.normpath(base)) - encoding = sys.getfilesystemencoding() - if isinstance(sysid, unicode): - if not isinstance(basehead, unicode): - try: - basehead = basehead.decode(encoding) - except UnicodeDecodeError: - sysid = sysid.encode(encoding) + try: + encoding = sys.getfilesystemencoding() + except AttributeError: + pass else: - if isinstance(basehead, unicode): - try: - sysid = sysid.decode(encoding) - except UnicodeDecodeError: - basehead = basehead.encode(encoding) + if isinstance(sysid, unicode): + if not isinstance(basehead, unicode): + try: + basehead = basehead.decode(encoding) + except UnicodeDecodeError: + sysid = sysid.encode(encoding) + else: + if isinstance(basehead, unicode): + try: + sysid = sysid.decode(encoding) + except UnicodeDecodeError: + basehead = basehead.encode(encoding) sysidfilename = os.path.join(basehead, sysid) isfile = os.path.isfile(sysidfilename) except UnicodeError: