diff -r 12ce8e0413f1 Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py Fri Sep 02 12:12:23 2016 +0200 +++ b/Lib/test/test_xml_etree.py Fri Sep 02 12:56:38 2016 +0200 @@ -17,6 +17,7 @@ import warnings import weakref from itertools import product +from unittest import mock from test import support from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr @@ -157,8 +158,8 @@ class ElementTestCase: class ElementTreeTest(unittest.TestCase): - def serialize_check(self, elem, expected): - self.assertEqual(serialize(elem), expected) + def serialize_check(self, elem, expected, **options): + self.assertEqual(serialize(elem, **options), expected) def test_interface(self): # Test element tree interface. @@ -490,7 +491,6 @@ class ElementTreeTest(unittest.TestCase) self.assertEqual(b"".join(ET.tostringlist(element)), b'text') self.assertEqual(ET.tostring(element, "ascii"), - b"\n" b"text") _, ids = ET.XMLID("text") self.assertEqual(len(ids), 0) @@ -498,6 +498,59 @@ class ElementTreeTest(unittest.TestCase) self.assertEqual(len(ids), 1) self.assertEqual(ids["body"].tag, 'body') + def test_write_xml_declaration(self): + elem = ET.XML("") + + # Encodings compatible with UTF-8: XML declaration is optional + utf8_encodings = ("ASCII", "us-ascii", "UTF-8", "utf8") + other_encodings = ("latin9", "GBK") + + # No XML declaration by default + # + # By default, the "unicode" encoding doesn't use + # locale.getpreferredencoding(). + with mock.patch('locale.getpreferredencoding', return_value='latin9'): + self.serialize_check(elem, '') + for encoding in utf8_encodings: + self.serialize_check(elem, b'', + encoding=encoding) + + # Test the "unicode" encoding with XML declaration which relies on + # locale.getpreferredencoding() + with mock.patch('locale.getpreferredencoding', return_value='UTF-8'): + self.serialize_check(elem, + "\n" + "", + xml_declaration=True) + + with mock.patch('locale.getpreferredencoding', return_value='latin9'): + self.serialize_check(elem, + "\n" + "", + xml_declaration=True) + + # Force XML declaration + for encoding in utf8_encodings: + expected = (f"\n" + f"").encode(encoding) + self.serialize_check(elem, expected, + encoding=encoding, + xml_declaration=True) + + # Non-UTF-8 encodings: XML declaration is always needed + for encoding in other_encodings: + expected = (f"\n" + f"").encode(encoding) + self.serialize_check(elem, expected, + encoding=encoding) + self.serialize_check(elem, expected, + encoding=encoding, + xml_declaration=True) + + # Invalid codec name + with self.assertRaises(LookupError): + serialize(elem, encoding="xxxxx") + def test_iterparse(self): # Test iterparse interface. @@ -1636,12 +1689,10 @@ class BugsTest(unittest.TestCase): e = ET.XML(b"" b't\xc3\xa3g') self.assertEqual(ET.tostring(e, 'ascii'), - b"\n" b'tãg') e = ET.XML(b"" b't\xe3g') self.assertEqual(ET.tostring(e, 'ascii'), - b"\n" b'tãg') def test_issue3151(self): diff -r 12ce8e0413f1 Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py Fri Sep 02 12:12:23 2016 +0200 +++ b/Lib/xml/etree/ElementTree.py Fri Sep 02 12:56:38 2016 +0200 @@ -91,12 +91,13 @@ VERSION = "1.3.0" -import sys -import re -import warnings -import io +import codecs import collections import contextlib +import io +import re +import sys +import warnings from . import ElementPath @@ -756,10 +757,13 @@ class ElementTree: else: encoding = "us-ascii" enc_lower = encoding.lower() + if enc_lower != "unicode": + # Normalize the encoding name + enc_lower = codecs.lookup(enc_lower).name with _get_writer(file_or_filename, enc_lower) as write: if method == "xml" and (xml_declaration or (xml_declaration is None and - enc_lower not in ("utf-8", "us-ascii", "unicode"))): + enc_lower not in {"utf-8", "ascii", "unicode"})): declared_encoding = encoding if enc_lower == "unicode": # Retrieve the default encoding for the xml declaration