# HG changeset patch # Parent b3c1a504ebc1cf2852b6a8e9ead82a042133761a Issue #13378: use non global namespaces for the serializer. diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -151,29 +151,35 @@ arguments. Returns an element instance. -.. function:: tostring(element, encoding="us-ascii", method="xml") +.. function:: tostring(element, encoding="us-ascii", method="xml", namespaces=None) Generates a string representation of an XML element, including all subelements. *element* is an :class:`Element` instance. *encoding* [1]_ is the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to generate a Unicode string. *method* is either ``"xml"``, - ``"html"`` or ``"text"`` (default is ``"xml"``). Returns an (optionally) - encoded string containing the XML data. + ``"html"`` or ``"text"`` (default is ``"xml"``). *namespaces* is a + dictionary which maps URI to prefixes in addition to the global registry. + Returns an (optionally) encoded string containing the XML data. + .. versionchanged:: 3.3 + The *namespaces* argument was added. -.. function:: tostringlist(element, encoding="us-ascii", method="xml") +.. function:: tostringlist(element, encoding="us-ascii", method="xml", namespaces=None) Generates a string representation of an XML element, including all subelements. *element* is an :class:`Element` instance. *encoding* [1]_ is the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to generate a Unicode string. *method* is either ``"xml"``, - ``"html"`` or ``"text"`` (default is ``"xml"``). Returns a list of - (optionally) encoded strings containing the XML data. It does not guarantee - any specific sequence, except that ``"".join(tostringlist(element)) == - tostring(element)``. + ``"html"`` or ``"text"`` (default is ``"xml"``). *namespaces* is a + dictionary which maps URI to prefixes in addition to the global registry. + Returns a list of (optionally) encoded strings containing the XML data. It + does not guarantee any specific sequence, except that + ``"".join(tostringlist(element)) == tostring(element)``. .. versionadded:: 3.2 + .. versionchanged:: 3.3 + The *namespaces* argument was added. .. function:: XML(text, parser=None) @@ -465,7 +471,7 @@ root element. - .. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml") + .. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml", namespaces=None) Writes the element tree to a file, as XML. *file* is a file name, or a :term:`file object` opened for writing. *encoding* [1]_ is the output encoding @@ -474,7 +480,11 @@ should be added to the file. Use False for never, True for always, None for only if not US-ASCII or UTF-8 or Unicode (default is None). *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``). - Returns an (optionally) encoded string. + *namespaces* is a dictionary which maps URI to prefixes in addition to the + global registry. Returns an (optionally) encoded string. + + .. versionchanged:: 3.3 + The *namespaces* argument was added. This is the XML file that is going to be manipulated:: diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1717,6 +1717,8 @@ >>> s = ET.SubElement(e, "{not-default}elem") >>> serialize(e, default_namespace="default") # 2 '' + >>> serialize(e, namespaces={"default": ""}) + '' >>> e = ET.Element("{default}elem") >>> s = ET.SubElement(e, "{default}elem") @@ -1724,6 +1726,9 @@ >>> serialize(e, default_namespace="default") # 3 Traceback (most recent call last): ValueError: cannot use non-qualified names with default_namespace option + >>> serialize(e, namespaces={"default": ""}) + Traceback (most recent call last): + ValueError: cannot use non-qualified names with default_namespace option """ @@ -1851,6 +1856,48 @@ >>> ET.register_namespace('test10777', 'http://myuri/') """ +def check_issue13378(): + """ + Pass specific, non-global namespaces to the serializer. + + >>> elem = ET.XML('') + >>> serialize(elem, namespaces={'http://localhost/house': 'house'}) + '' + >>> serialize(elem) + '' + >>> serialize(elem, namespaces={'http://localhost/house': 'home'}) + '' + + Avoid prefix collisions. + + >>> elem2 = ET.XML('' + ... '' + ... '' + ... '') + >>> namespaces = { + ... 'http://localhost/house': 'house', + ... 'http://localhost/home': 'house', + ... 'http://localhost/geo': 'geo', + ... } + >>> serialize(elem2, namespaces=namespaces) # doctest: +NORMALIZE_WHITESPACE + '' + >>> namespaces['http://localhost/house'] = 'geo' + >>> serialize(elem2, namespaces=namespaces) + Traceback (most recent call last): + ValueError: cannot share the same prefix between two namespaces + >>> namespaces['http://localhost/house'] = 'xml' + >>> serialize(elem2, namespaces=namespaces) + Traceback (most recent call last): + ValueError: cannot share the same prefix between two namespaces + + >>> serialize(elem, namespaces=namespaces) + '' + >>> namespaces['http://localhost/house'] = '' + >>> serialize(elem, namespaces=namespaces) + '' + """ + # -------------------------------------------------------------------- diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -813,7 +813,8 @@ encoding=None, xml_declaration=None, default_namespace=None, - method=None): + method=None, + namespaces=None): # assert self._root is not None if not method: method = "xml" @@ -857,7 +858,11 @@ if method == "text": _serialize_text(write, self._root) else: - qnames, namespaces = _namespaces(self._root, default_namespace) + # if custom namespaces... + namespaces = dict(namespaces or {}) + if default_namespace: + namespaces[default_namespace] = "" + qnames = _qnames(self._root, namespaces) serialize = _serialize[method] serialize(write, self._root, qnames, namespaces) if file_or_filename is not file: @@ -870,17 +875,26 @@ # -------------------------------------------------------------------- # serialization support -def _namespaces(elem, default_namespace=None): +def _qnames(elem, namespaces): # identify namespaces used in this tree # maps qnames to *encoded* prefix:local names qnames = {None: None} + # any uri is mapped to ""? + has_default_namespace = ("" in namespaces.values()) + + if namespaces: + namespace_map = _namespace_map.copy() + namespace_map.update(namespaces) + namespaces.clear() + else: + namespace_map = _namespace_map + + # Prevent collisions + prefixes = set() + # maps uri:s to prefixes - namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - def add_qname(qname): # calculate serialized qname representation try: @@ -888,9 +902,15 @@ uri, tag = qname[1:].rsplit("}", 1) prefix = namespaces.get(uri) if prefix is None: - prefix = _namespace_map.get(uri) + prefix = namespace_map.get(uri) if prefix is None: prefix = "ns%d" % len(namespaces) + if prefix in prefixes: + raise ValueError( + "cannot share the same prefix " + "between two namespaces" + ) + prefixes.add(prefix) if prefix != "xml": namespaces[uri] = prefix if prefix: @@ -898,7 +918,7 @@ else: qnames[qname] = tag # default element else: - if default_namespace: + if has_default_namespace: # FIXME: can this be handled in XML 1.0? raise ValueError( "cannot use non-qualified names with " @@ -909,11 +929,7 @@ _raise_serialization_error(qname) # populate qname and namespaces table - try: - iterate = elem.iter - except AttributeError: - iterate = elem.getiterator # cET compatibility - for elem in iterate(): + for elem in elem.iter(): tag = elem.tag if isinstance(tag, QName): if tag.text not in qnames: @@ -933,7 +949,7 @@ text = elem.text if isinstance(text, QName) and text.text not in qnames: add_qname(text.text) - return qnames, namespaces + return qnames def _serialize_xml(write, elem, qnames, namespaces): tag = elem.tag @@ -1152,13 +1168,14 @@ # @return An (optionally) encoded string containing the XML data. # @defreturn string -def tostring(element, encoding=None, method=None): +def tostring(element, encoding=None, method=None, namespaces=None): class dummy: pass data = [] file = dummy() file.write = data.append - ElementTree(element).write(file, encoding, method=method) + ElementTree(element).write(file, encoding, method=method, + namespaces=namespaces) if encoding in (str, "unicode"): return "".join(data) else: @@ -1179,13 +1196,14 @@ # @defreturn sequence # @since 1.3 -def tostringlist(element, encoding=None, method=None): +def tostringlist(element, encoding=None, method=None, namespaces=None): class dummy: pass data = [] file = dummy() file.write = data.append - ElementTree(element).write(file, encoding, method=method) + ElementTree(element).write(file, encoding, method=method, + namespaces=namespaces) # FIXME: merge small fragments into larger parts return data