diff -r f2c6b0485ce6 Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py Fri Dec 13 17:21:42 2013 -0500
+++ b/Lib/test/test_xml_etree.py Fri Dec 13 15:15:02 2013 -0800
@@ -151,7 +151,7 @@
# --------------------------------------------------------------------
# element tree tests
-class ElementTreeTest(unittest.TestCase):
+class ElementTreeTest(ElementTestCase, unittest.TestCase):
def serialize_check(self, elem, expected):
self.assertEqual(serialize(elem), expected)
@@ -882,6 +882,75 @@
self.assertNotEqual(q1, 'ns:tag')
self.assertEqual(q1, '{ns}tag')
+ def test_namespace_attribs(self):
+ # Unprefixed attributes are unqualified even if a default
+ # namespace is in effect. (This is a little unclear in some
+ # versions of the XML TR but is clarified in errata and other
+ # versions.) See bugs.python.org issue 17088.
+ #
+ # The reasoning behind this, alluded to in the spec, is that
+ # attribute meanings already depend on the element they're
+ # attached to; attributes have always lived in per-element
+ # namespaces even before explicit XML namespaces were
+ # introduced. For that reason qualified attribute names are
+ # only really needed when one XML module defines attributes
+ # that can be placed on elements defined in a different module
+ # (such as happens with XLINK or, for that matter, the XML
+ # namespace spec itself).
+ e = ET.XML(''
+ ''
+ ''
+ ''
+ '')
+ self.assertEqual(e.tag, '{space1}elt')
+ self.assertEqual(e.get('foo'), 'value')
+ self.assertIsNone(e.get('{space1}foo'))
+ self.assertIsNone(e.get('{space2}foo'))
+ self.assertEqual(e[0].tag, '{space1}foo')
+ self.assertEqual(e[0].attrib, { 'foo': 'value2',
+ '{space2}foo': 'value3' })
+ self.assertEqual(e[1].tag, '{space2}foo')
+ self.assertEqual(e[1].attrib, { 'foo': 'value4',
+ '{space1}foo': 'value5',
+ '{space2}foo': 'value6' })
+ self.assertEqual(e[2].tag, 'foo')
+ self.assertEqual(e[2].attrib, { 'foo': 'value7',
+ '{space1}foo': 'value8' })
+
+ serialized1 = ( ''
+ ''
+ ''
+ ''
+ '')
+ self.assertEqual(serialize(e), serialized1)
+ self.assertEqualElements(e, ET.XML(serialized1))
+
+ # Test writing with a default namespace.
+ with self.assertRaisesRegex(ValueError,
+ 'cannot use non-qualified name.* with default_namespace option'):
+ serialize(e, default_namespace="space1")
+
+ # Remove the unqualified element from the tree so we can test
+ # further
+ del e[2]
+
+ # Serialization can require a namespace prefix to be declared for
+ # space1 even if no elements use that prefix, in order to
+ # write an attribute name in that namespace.
+ serialized2 = ( ''
+ ''
+ ''
+ '' )
+ self.assertEqual(serialize(e, default_namespace="space2"), serialized2)
+ self.assertEqualElements(e, ET.XML(serialized2))
+
+ serialized3 = ( ''
+ ''
+ ''
+ '' )
+ self.assertEqual(serialize(e, default_namespace="space1"), serialized3)
+ self.assertEqualElements(e, ET.XML(serialized3))
+
def test_doctype_public(self):
# Test PUBLIC doctype.
@@ -1520,10 +1589,9 @@
e = ET.Element("{default}elem")
s = ET.SubElement(e, "{default}elem")
s = ET.SubElement(e, "elem") # unprefixed name
- with self.assertRaises(ValueError) as cm:
+ with self.assertRaisesRegex(ValueError,
+ 'cannot use non-qualified name.* with default_namespace option'):
serialize(e, default_namespace="default") # 3
- self.assertEqual(str(cm.exception),
- 'cannot use non-qualified names with default_namespace option')
def test_bug_200709_register_namespace(self):
e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
diff -r f2c6b0485ce6 Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py Fri Dec 13 17:21:42 2013 -0500
+++ b/Lib/xml/etree/ElementTree.py Fri Dec 13 15:15:02 2013 -0800
@@ -772,9 +772,9 @@
if method == "text":
_serialize_text(write, self._root)
else:
- qnames, namespaces = _namespaces(self._root, default_namespace)
+ elt_qnames, attr_qnames, namespaces = _namespaces(self._root, default_namespace)
serialize = _serialize[method]
- serialize(write, self._root, qnames, namespaces,
+ serialize(write, self._root, elt_qnames, attr_qnames, namespaces,
short_empty_elements=short_empty_elements)
def write_c14n(self, file):
@@ -837,39 +837,58 @@
yield file.write
def _namespaces(elem, default_namespace=None):
- # identify namespaces used in this tree
+ # identify namespaces used in this tree, assign namespace prefixes
+ # as needed, and create cache dicts which map element and attribute
+ # names to their serialized representations
# maps qnames to *encoded* prefix:local names
- qnames = {None: None}
+ # elts and attrs may need distinct mappings because default
+ # namespaces affect them differently
+ elt_qnames = {None: None}
+ if default_namespace is None:
+ attr_qnames = elt_qnames
+ else:
+ attr_qnames = {None: None}
# maps uri:s to prefixes
namespaces = {}
- if default_namespace:
- namespaces[default_namespace] = ""
- def add_qname(qname):
+ # this offset is just here to make our generated namespace
+ # prefixes predictable to the unit tests
+ ns0 = 1 if default_namespace else 0
+
+ def add_qname(qname, defaultable):
# calculate serialized qname representation
+ qnames = elt_qnames if defaultable else attr_qnames
try:
if qname[:1] == "{":
uri, tag = qname[1:].rsplit("}", 1)
+ if defaultable and uri == default_namespace:
+ qnames[qname] = tag # Default namespace, no prefix
+ return
+
prefix = namespaces.get(uri)
if prefix is None:
+ # Assign a namespace prefix
prefix = _namespace_map.get(uri)
if prefix is None:
- prefix = "ns%d" % len(namespaces)
+ prefix = "ns%d" % (ns0 + len(namespaces))
if prefix != "xml":
namespaces[uri] = prefix
- if prefix:
- qnames[qname] = "%s:%s" % (prefix, tag)
- else:
- qnames[qname] = tag # default element
+ qnames[qname] = "%s:%s" % (prefix, tag)
else:
- if default_namespace:
- # FIXME: can this be handled in XML 1.0?
+ if defaultable and default_namespace:
+ # A default namespace can be undeclared
+ # (see http://www.w3.org/TR/REC-xml-names/#defaulting)
+ # but only by placing an xmlns="" attribute on the
+ # element and possibly re-declaring the default
+ # namespace for child elements. Our serializers
+ # can't do that. FIXME.
raise ValueError(
- "cannot use non-qualified names with "
- "default_namespace option"
+ "cannot use non-qualified names (<%s>) with "
+ "default_namespace option" % (qname,)
)
+ # Unqualified name -> unprefixed serialized name
qnames[qname] = qname
except TypeError:
_raise_serialization_error(qname)
@@ -878,26 +897,36 @@
for elem in elem.iter():
tag = elem.tag
if isinstance(tag, QName):
- if tag.text not in qnames:
- add_qname(tag.text)
+ if tag.text not in elt_qnames:
+ add_qname(tag.text, True)
elif isinstance(tag, str):
- if tag not in qnames:
- add_qname(tag)
+ if tag not in elt_qnames:
+ add_qname(tag, True)
elif tag is not None and tag is not Comment and tag is not PI:
_raise_serialization_error(tag)
for key, value in elem.items():
if isinstance(key, QName):
key = key.text
- if key not in qnames:
- add_qname(key)
- if isinstance(value, QName) and value.text not in qnames:
- add_qname(value.text)
+ if key not in attr_qnames:
+ add_qname(key, False)
+ if isinstance(value, QName) and value.text not in attr_qnames:
+ # FIXME: Should default ns be applied to attrib *values*?
+ # Opting for correctness here even if it results in an
+ # unneeded namespace prefix sometimes.
+ add_qname(value.text, False)
text = elem.text
- if isinstance(text, QName) and text.text not in qnames:
- add_qname(text.text)
- return qnames, namespaces
+ if isinstance(text, QName) and text.text not in elt_qnames:
+ add_qname(text.text, True)
+
+ if default_namespace:
+ prefixes_list = [ (default_namespace, "") ]
+ prefixes_list.extend(namespaces.items())
+ else:
+ prefixes_list = namespaces.items()
+
+ return elt_qnames, attr_qnames, prefixes_list
-def _serialize_xml(write, elem, qnames, namespaces,
+def _serialize_xml(write, elem, elt_qnames, attr_qnames, namespaces,
short_empty_elements, **kwargs):
tag = elem.tag
text = elem.text
@@ -906,19 +935,19 @@
elif tag is ProcessingInstruction:
write("%s?>" % text)
else:
- tag = qnames[tag]
+ tag = elt_qnames[tag]
if tag is None:
if text:
write(_escape_cdata(text))
for e in elem:
- _serialize_xml(write, e, qnames, None,
+ _serialize_xml(write, e, elt_qnames, attr_qnames, None,
short_empty_elements=short_empty_elements)
else:
write("<" + tag)
items = list(elem.items())
if items or namespaces:
if namespaces:
- for v, k in sorted(namespaces.items(),
+ for v, k in sorted(namespaces,
key=lambda x: x[1]): # sort on prefix
if k:
k = ":" + k
@@ -930,16 +959,16 @@
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
- v = qnames[v.text]
+ v = attr_qnames[v.text]
else:
v = _escape_attrib(v)
- write(" %s=\"%s\"" % (qnames[k], v))
+ write(" %s=\"%s\"" % (attr_qnames[k], v))
if text or len(elem) or not short_empty_elements:
write(">")
if text:
write(_escape_cdata(text))
for e in elem:
- _serialize_xml(write, e, qnames, None,
+ _serialize_xml(write, e, elt_qnames, attr_qnames, None,
short_empty_elements=short_empty_elements)
write("" + tag + ">")
else:
@@ -955,7 +984,7 @@
except NameError:
pass
-def _serialize_html(write, elem, qnames, namespaces, **kwargs):
+def _serialize_html(write, elem, elt_qnames, attr_qnames, namespaces, **kwargs):
tag = elem.tag
text = elem.text
if tag is Comment:
@@ -963,18 +992,18 @@
elif tag is ProcessingInstruction:
write("%s?>" % _escape_cdata(text))
else:
- tag = qnames[tag]
+ tag = elt_qnames[tag]
if tag is None:
if text:
write(_escape_cdata(text))
for e in elem:
- _serialize_html(write, e, qnames, None)
+ _serialize_html(write, e, elt_qnames, attr_qnames, None)
else:
write("<" + tag)
items = list(elem.items())
if items or namespaces:
if namespaces:
- for v, k in sorted(namespaces.items(),
+ for v, k in sorted(namespaces,
key=lambda x: x[1]): # sort on prefix
if k:
k = ":" + k
@@ -986,11 +1015,11 @@
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
- v = qnames[v.text]
+ v = attr_qnames[v.text]
else:
v = _escape_attrib_html(v)
# FIXME: handle boolean attributes
- write(" %s=\"%s\"" % (qnames[k], v))
+ write(" %s=\"%s\"" % (attr_qnames[k], v))
write(">")
ltag = tag.lower()
if text:
@@ -999,7 +1028,7 @@
else:
write(_escape_cdata(text))
for e in elem:
- _serialize_html(write, e, qnames, None)
+ _serialize_html(write, e, elt_qnames, attr_qnames, None)
if ltag not in HTML_EMPTY:
write("" + tag + ">")
if elem.tail: