diff -r c5451d4a0cdd Lib/test/test_sax.py --- a/Lib/test/test_sax.py Sun Jan 13 21:57:14 2013 +0200 +++ b/Lib/test/test_sax.py Mon Jan 14 12:53:56 2013 +0200 @@ -14,6 +14,10 @@ from xml.sax.handler import feature_namespaces from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl from cStringIO import StringIO +import os +import shutil +import sys +import test.test_support as support from test.test_support import findfile, run_unittest import unittest @@ -384,6 +388,20 @@ self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_file_unicode(self): + fname = support.TESTFN_UNICODE + shutil.copyfile(TEST_XMLFILE, fname) + self.addCleanup(os.remove, fname) + + parser = create_parser() + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + parser.parse(open(fname)) + + self.assertEqual(result.getvalue(), xml_test_out) + # ===== DTDHandler support class TestDTDHandler: @@ -523,6 +541,20 @@ self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_inpsource_sysid_unicode(self): + fname = support.TESTFN_UNICODE + shutil.copyfile(TEST_XMLFILE, fname) + self.addCleanup(os.remove, fname) + + parser = create_parser() + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + parser.parse(InputSource(fname)) + + self.assertEqual(result.getvalue(), xml_test_out) + def test_expat_inpsource_stream(self): parser = create_parser() result = StringIO() @@ -596,6 +628,20 @@ self.assertEqual(parser.getSystemId(), TEST_XMLFILE) self.assertEqual(parser.getPublicId(), None) + def test_expat_locator_withinfo_unicode(self): + fname = support.TESTFN_UNICODE + shutil.copyfile(TEST_XMLFILE, fname) + self.addCleanup(os.remove, fname) + + result = StringIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + parser.parse(fname) + + self.assertEqual(parser.getSystemId(), fname) + self.assertEqual(parser.getPublicId(), None) + # =========================================================================== # diff -r c5451d4a0cdd Lib/xml/sax/expatreader.py --- a/Lib/xml/sax/expatreader.py Sun Jan 13 21:57:14 2013 +0200 +++ b/Lib/xml/sax/expatreader.py Mon Jan 14 12:53:56 2013 +0200 @@ -108,7 +108,10 @@ def prepareParser(self, source): if source.getSystemId() is not None: - self._parser.SetBase(source.getSystemId()) + base = source.getSystemId() + if isinstance(base, unicode): + base = base.encode('utf-8') + self._parser.SetBase(base) # Redefined setContentHandler to allow changing handlers during parsing diff -r c5451d4a0cdd Lib/xml/sax/saxutils.py --- a/Lib/xml/sax/saxutils.py Sun Jan 13 21:57:14 2013 +0200 +++ b/Lib/xml/sax/saxutils.py Mon Jan 14 12:53:56 2013 +0200 @@ -4,6 +4,7 @@ """ import os, urlparse, urllib, types +import sys import handler import xmlreader @@ -293,14 +294,31 @@ source.setSystemId(f.name) if source.getByteStream() is None: - sysid = source.getSystemId() - basehead = os.path.dirname(os.path.normpath(base)) - sysidfilename = os.path.join(basehead, sysid) - if os.path.isfile(sysidfilename): + try: + sysid = source.getSystemId() + basehead = os.path.dirname(os.path.normpath(base)) + encoding = sys.getfilesystemencoding() + if isinstance(sysid, unicode): + if not isinstance(basehead, unicode): + try: + basehead = basehead.decode(encoding) + except UnicodeDecodeError: + sysid = sysid.encode(encoding) + else: + if isinstance(basehead, unicode): + try: + sysid = sysid.decode(encoding) + except UnicodeDecodeError: + basehead = basehead.encode(encoding) + sysidfilename = os.path.join(basehead, sysid) + isfile = os.path.isfile(sysidfilename) + except UnicodeError: + isfile = False + if isfile: source.setSystemId(sysidfilename) f = open(sysidfilename, "rb") else: - source.setSystemId(urlparse.urljoin(base, sysid)) + source.setSystemId(urlparse.urljoin(base, source.getSystemId())) f = urllib.urlopen(source.getSystemId()) source.setByteStream(f)