diff -r f2c6b0485ce6 Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py Fri Dec 13 17:21:42 2013 -0500 +++ b/Lib/test/test_xml_etree.py Fri Dec 13 15:15:02 2013 -0800 @@ -151,7 +151,7 @@ # -------------------------------------------------------------------- # element tree tests -class ElementTreeTest(unittest.TestCase): +class ElementTreeTest(ElementTestCase, unittest.TestCase): def serialize_check(self, elem, expected): self.assertEqual(serialize(elem), expected) @@ -882,6 +882,75 @@ self.assertNotEqual(q1, 'ns:tag') self.assertEqual(q1, '{ns}tag') + def test_namespace_attribs(self): + # Unprefixed attributes are unqualified even if a default + # namespace is in effect. (This is a little unclear in some + # versions of the XML TR but is clarified in errata and other + # versions.) See bugs.python.org issue 17088. + # + # The reasoning behind this, alluded to in the spec, is that + # attribute meanings already depend on the element they're + # attached to; attributes have always lived in per-element + # namespaces even before explicit XML namespaces were + # introduced. For that reason qualified attribute names are + # only really needed when one XML module defines attributes + # that can be placed on elements defined in a different module + # (such as happens with XLINK or, for that matter, the XML + # namespace spec itself). + e = ET.XML('' + '' + '' + '' + '') + self.assertEqual(e.tag, '{space1}elt') + self.assertEqual(e.get('foo'), 'value') + self.assertIsNone(e.get('{space1}foo')) + self.assertIsNone(e.get('{space2}foo')) + self.assertEqual(e[0].tag, '{space1}foo') + self.assertEqual(e[0].attrib, { 'foo': 'value2', + '{space2}foo': 'value3' }) + self.assertEqual(e[1].tag, '{space2}foo') + self.assertEqual(e[1].attrib, { 'foo': 'value4', + '{space1}foo': 'value5', + '{space2}foo': 'value6' }) + self.assertEqual(e[2].tag, 'foo') + self.assertEqual(e[2].attrib, { 'foo': 'value7', + '{space1}foo': 'value8' }) + + serialized1 = ( '' + '' + '' + '' + '') + self.assertEqual(serialize(e), serialized1) + self.assertEqualElements(e, ET.XML(serialized1)) + + # Test writing with a default namespace. + with self.assertRaisesRegex(ValueError, + 'cannot use non-qualified name.* with default_namespace option'): + serialize(e, default_namespace="space1") + + # Remove the unqualified element from the tree so we can test + # further + del e[2] + + # Serialization can require a namespace prefix to be declared for + # space1 even if no elements use that prefix, in order to + # write an attribute name in that namespace. + serialized2 = ( '' + '' + '' + '' ) + self.assertEqual(serialize(e, default_namespace="space2"), serialized2) + self.assertEqualElements(e, ET.XML(serialized2)) + + serialized3 = ( '' + '' + '' + '' ) + self.assertEqual(serialize(e, default_namespace="space1"), serialized3) + self.assertEqualElements(e, ET.XML(serialized3)) + def test_doctype_public(self): # Test PUBLIC doctype. @@ -1520,10 +1589,9 @@ e = ET.Element("{default}elem") s = ET.SubElement(e, "{default}elem") s = ET.SubElement(e, "elem") # unprefixed name - with self.assertRaises(ValueError) as cm: + with self.assertRaisesRegex(ValueError, + 'cannot use non-qualified name.* with default_namespace option'): serialize(e, default_namespace="default") # 3 - self.assertEqual(str(cm.exception), - 'cannot use non-qualified names with default_namespace option') def test_bug_200709_register_namespace(self): e = ET.Element("{http://namespace.invalid/does/not/exist/}title") diff -r f2c6b0485ce6 Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py Fri Dec 13 17:21:42 2013 -0500 +++ b/Lib/xml/etree/ElementTree.py Fri Dec 13 15:15:02 2013 -0800 @@ -772,9 +772,9 @@ if method == "text": _serialize_text(write, self._root) else: - qnames, namespaces = _namespaces(self._root, default_namespace) + elt_qnames, attr_qnames, namespaces = _namespaces(self._root, default_namespace) serialize = _serialize[method] - serialize(write, self._root, qnames, namespaces, + serialize(write, self._root, elt_qnames, attr_qnames, namespaces, short_empty_elements=short_empty_elements) def write_c14n(self, file): @@ -837,39 +837,58 @@ yield file.write def _namespaces(elem, default_namespace=None): - # identify namespaces used in this tree + # identify namespaces used in this tree, assign namespace prefixes + # as needed, and create cache dicts which map element and attribute + # names to their serialized representations # maps qnames to *encoded* prefix:local names - qnames = {None: None} + # elts and attrs may need distinct mappings because default + # namespaces affect them differently + elt_qnames = {None: None} + if default_namespace is None: + attr_qnames = elt_qnames + else: + attr_qnames = {None: None} # maps uri:s to prefixes namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - def add_qname(qname): + # this offset is just here to make our generated namespace + # prefixes predictable to the unit tests + ns0 = 1 if default_namespace else 0 + + def add_qname(qname, defaultable): # calculate serialized qname representation + qnames = elt_qnames if defaultable else attr_qnames try: if qname[:1] == "{": uri, tag = qname[1:].rsplit("}", 1) + if defaultable and uri == default_namespace: + qnames[qname] = tag # Default namespace, no prefix + return + prefix = namespaces.get(uri) if prefix is None: + # Assign a namespace prefix prefix = _namespace_map.get(uri) if prefix is None: - prefix = "ns%d" % len(namespaces) + prefix = "ns%d" % (ns0 + len(namespaces)) if prefix != "xml": namespaces[uri] = prefix - if prefix: - qnames[qname] = "%s:%s" % (prefix, tag) - else: - qnames[qname] = tag # default element + qnames[qname] = "%s:%s" % (prefix, tag) else: - if default_namespace: - # FIXME: can this be handled in XML 1.0? + if defaultable and default_namespace: + # A default namespace can be undeclared + # (see http://www.w3.org/TR/REC-xml-names/#defaulting) + # but only by placing an xmlns="" attribute on the + # element and possibly re-declaring the default + # namespace for child elements. Our serializers + # can't do that. FIXME. raise ValueError( - "cannot use non-qualified names with " - "default_namespace option" + "cannot use non-qualified names (<%s>) with " + "default_namespace option" % (qname,) ) + # Unqualified name -> unprefixed serialized name qnames[qname] = qname except TypeError: _raise_serialization_error(qname) @@ -878,26 +897,36 @@ for elem in elem.iter(): tag = elem.tag if isinstance(tag, QName): - if tag.text not in qnames: - add_qname(tag.text) + if tag.text not in elt_qnames: + add_qname(tag.text, True) elif isinstance(tag, str): - if tag not in qnames: - add_qname(tag) + if tag not in elt_qnames: + add_qname(tag, True) elif tag is not None and tag is not Comment and tag is not PI: _raise_serialization_error(tag) for key, value in elem.items(): if isinstance(key, QName): key = key.text - if key not in qnames: - add_qname(key) - if isinstance(value, QName) and value.text not in qnames: - add_qname(value.text) + if key not in attr_qnames: + add_qname(key, False) + if isinstance(value, QName) and value.text not in attr_qnames: + # FIXME: Should default ns be applied to attrib *values*? + # Opting for correctness here even if it results in an + # unneeded namespace prefix sometimes. + add_qname(value.text, False) text = elem.text - if isinstance(text, QName) and text.text not in qnames: - add_qname(text.text) - return qnames, namespaces + if isinstance(text, QName) and text.text not in elt_qnames: + add_qname(text.text, True) + + if default_namespace: + prefixes_list = [ (default_namespace, "") ] + prefixes_list.extend(namespaces.items()) + else: + prefixes_list = namespaces.items() + + return elt_qnames, attr_qnames, prefixes_list -def _serialize_xml(write, elem, qnames, namespaces, +def _serialize_xml(write, elem, elt_qnames, attr_qnames, namespaces, short_empty_elements, **kwargs): tag = elem.tag text = elem.text @@ -906,19 +935,19 @@ elif tag is ProcessingInstruction: write("" % text) else: - tag = qnames[tag] + tag = elt_qnames[tag] if tag is None: if text: write(_escape_cdata(text)) for e in elem: - _serialize_xml(write, e, qnames, None, + _serialize_xml(write, e, elt_qnames, attr_qnames, None, short_empty_elements=short_empty_elements) else: write("<" + tag) items = list(elem.items()) if items or namespaces: if namespaces: - for v, k in sorted(namespaces.items(), + for v, k in sorted(namespaces, key=lambda x: x[1]): # sort on prefix if k: k = ":" + k @@ -930,16 +959,16 @@ if isinstance(k, QName): k = k.text if isinstance(v, QName): - v = qnames[v.text] + v = attr_qnames[v.text] else: v = _escape_attrib(v) - write(" %s=\"%s\"" % (qnames[k], v)) + write(" %s=\"%s\"" % (attr_qnames[k], v)) if text or len(elem) or not short_empty_elements: write(">") if text: write(_escape_cdata(text)) for e in elem: - _serialize_xml(write, e, qnames, None, + _serialize_xml(write, e, elt_qnames, attr_qnames, None, short_empty_elements=short_empty_elements) write("") else: @@ -955,7 +984,7 @@ except NameError: pass -def _serialize_html(write, elem, qnames, namespaces, **kwargs): +def _serialize_html(write, elem, elt_qnames, attr_qnames, namespaces, **kwargs): tag = elem.tag text = elem.text if tag is Comment: @@ -963,18 +992,18 @@ elif tag is ProcessingInstruction: write("" % _escape_cdata(text)) else: - tag = qnames[tag] + tag = elt_qnames[tag] if tag is None: if text: write(_escape_cdata(text)) for e in elem: - _serialize_html(write, e, qnames, None) + _serialize_html(write, e, elt_qnames, attr_qnames, None) else: write("<" + tag) items = list(elem.items()) if items or namespaces: if namespaces: - for v, k in sorted(namespaces.items(), + for v, k in sorted(namespaces, key=lambda x: x[1]): # sort on prefix if k: k = ":" + k @@ -986,11 +1015,11 @@ if isinstance(k, QName): k = k.text if isinstance(v, QName): - v = qnames[v.text] + v = attr_qnames[v.text] else: v = _escape_attrib_html(v) # FIXME: handle boolean attributes - write(" %s=\"%s\"" % (qnames[k], v)) + write(" %s=\"%s\"" % (attr_qnames[k], v)) write(">") ltag = tag.lower() if text: @@ -999,7 +1028,7 @@ else: write(_escape_cdata(text)) for e in elem: - _serialize_html(write, e, qnames, None) + _serialize_html(write, e, elt_qnames, attr_qnames, None) if ltag not in HTML_EMPTY: write("") if elem.tail: