Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(10654)

Delta Between Two Patch Sets: Lib/xml/etree/ElementTree.py

Issue 17088: ElementTree incorrectly refuses to write attributes without namespaces when default_namespace is used
Left Patch Set: Created 6 years, 7 months ago
Right Patch Set: Created 6 years, 6 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Lib/test/test_xml_etree.py ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 """Lightweight XML support for Python. 1 """Lightweight XML support for Python.
2 2
3 XML is an inherently hierarchical data format, and the most natural way to 3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree. This module has two classes for this purpose: 4 represent it is with a tree. This module has two classes for this purpose:
5 5
6 1. ElementTree represents the whole XML document as a tree and 6 1. ElementTree represents the whole XML document as a tree and
7 7
8 2. Element represents a single node in this tree. 8 2. Element represents a single node in this tree.
9 9
10 Interactions with the whole document (reading and writing to/from files) are 10 Interactions with the whole document (reading and writing to/from files) are
(...skipping 754 matching lines...) Expand 10 before | Expand all | Expand 10 after
765 declared_encoding = encoding 765 declared_encoding = encoding
766 if encoding == "unicode": 766 if encoding == "unicode":
767 # Retrieve the default encoding for the xml declaration 767 # Retrieve the default encoding for the xml declaration
768 import locale 768 import locale
769 declared_encoding = locale.getpreferredencoding() 769 declared_encoding = locale.getpreferredencoding()
770 write("<?xml version='1.0' encoding='%s'?>\n" % ( 770 write("<?xml version='1.0' encoding='%s'?>\n" % (
771 declared_encoding,)) 771 declared_encoding,))
772 if method == "text": 772 if method == "text":
773 _serialize_text(write, self._root) 773 _serialize_text(write, self._root)
774 else: 774 else:
775 qnames, namespaces = _namespaces(self._root, default_namespace) 775 elt_qnames, attr_qnames, namespaces = _namespaces(self._root, de fault_namespace)
776 serialize = _serialize[method] 776 serialize = _serialize[method]
777 serialize(write, self._root, qnames, namespaces, 777 serialize(write, self._root, elt_qnames, attr_qnames, namespaces ,
778 short_empty_elements=short_empty_elements) 778 short_empty_elements=short_empty_elements)
779 779
780 def write_c14n(self, file): 780 def write_c14n(self, file):
781 # lxml.etree compatibility. use output method instead 781 # lxml.etree compatibility. use output method instead
782 return self.write(file, method="c14n") 782 return self.write(file, method="c14n")
783 783
784 # -------------------------------------------------------------------- 784 # --------------------------------------------------------------------
785 # serialization support 785 # serialization support
786 786
787 @contextlib.contextmanager 787 @contextlib.contextmanager
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
830 file = io.TextIOWrapper(file, 830 file = io.TextIOWrapper(file,
831 encoding=encoding, 831 encoding=encoding,
832 errors="xmlcharrefreplace", 832 errors="xmlcharrefreplace",
833 newline="\n") 833 newline="\n")
834 # Keep the original file open when the TextIOWrapper is 834 # Keep the original file open when the TextIOWrapper is
835 # destroyed 835 # destroyed
836 stack.callback(file.detach) 836 stack.callback(file.detach)
837 yield file.write 837 yield file.write
838 838
839 def _namespaces(elem, default_namespace=None): 839 def _namespaces(elem, default_namespace=None):
840 # identify namespaces used in this tree 840 # identify namespaces used in this tree, assign namespace prefixes
841 # as needed, and create cache dicts which map element and attribute
842 # names to their serialized representations
841 843
842 # maps qnames to *encoded* prefix:local names 844 # maps qnames to *encoded* prefix:local names
843 qnames = {None: None} 845 # elts and attrs may need distinct mappings because default
846 # namespaces affect them differently
847 elt_qnames = {None: None}
848 if default_namespace is None:
849 attr_qnames = elt_qnames
850 else:
851 attr_qnames = {None: None}
844 852
845 # maps uri:s to prefixes 853 # maps uri:s to prefixes
846 namespaces = {} 854 namespaces = {}
847 if default_namespace: 855
848 namespaces[default_namespace] = "" 856 # this offset is just here to make our generated namespace
849 857 # prefixes predictable to the unit tests
850 def add_qname(qname, is_attrname): 858 ns0 = 1 if default_namespace else 0
859
860 def add_qname(qname, defaultable):
851 # calculate serialized qname representation 861 # calculate serialized qname representation
862 qnames = elt_qnames if defaultable else attr_qnames
852 try: 863 try:
853 if qname[:1] == "{": 864 if qname[:1] == "{":
854 uri, tag = qname[1:].rsplit("}", 1) 865 uri, tag = qname[1:].rsplit("}", 1)
866 if defaultable and uri == default_namespace:
867 qnames[qname] = tag # Default namespace, no prefix
868 return
869
855 prefix = namespaces.get(uri) 870 prefix = namespaces.get(uri)
856 if prefix is None: 871 if prefix is None:
872 # Assign a namespace prefix
857 prefix = _namespace_map.get(uri) 873 prefix = _namespace_map.get(uri)
858 if prefix is None: 874 if prefix is None:
859 prefix = "ns%d" % len(namespaces) 875 prefix = "ns%d" % (ns0 + len(namespaces))
860 if prefix != "xml": 876 if prefix != "xml":
861 namespaces[uri] = prefix 877 namespaces[uri] = prefix
862 if prefix: 878 qnames[qname] = "%s:%s" % (prefix, tag)
863 qnames[qname] = "%s:%s" % (prefix, tag)
864 else:
865 # FIXME: (if is_attrname and prefix == ''): If an
866 # attribute name is in the default namespace, we
867 # will write it out without a prefix, which is
868 # incorrect (an unprefixed attribute is always
869 # unqualified, even if a default namespace is in
870 # effect). For completely correct behavior, we
871 # would need to emit both a default namespace
872 # declaration *and* a prefix declaration for the
873 # use of attributes.
874 qnames[qname] = tag # default element
875 else: 879 else:
876 if default_namespace and not is_attrname: 880 if defaultable and default_namespace:
877 # A default namespace can be undeclared 881 # A default namespace can be undeclared
878 # (see http://www.w3.org/TR/REC-xml-names/#defaulting) 882 # (see http://www.w3.org/TR/REC-xml-names/#defaulting)
879 # but only by placing an xmlns="" attribute on the 883 # but only by placing an xmlns="" attribute on the
880 # element and possibly re-declaring the default 884 # element and possibly re-declaring the default
881 # namespace for child elements. Our serializers 885 # namespace for child elements. Our serializers
882 # can't do that. FIXME. 886 # can't do that. FIXME.
883 raise ValueError( 887 raise ValueError(
884 "cannot use non-qualified names with " 888 "cannot use non-qualified names (<%s>) with "
885 "default_namespace option" 889 "default_namespace option" % (qname,)
886 ) 890 )
891 # Unqualified name -> unprefixed serialized name
887 qnames[qname] = qname 892 qnames[qname] = qname
888 except TypeError: 893 except TypeError:
889 _raise_serialization_error(qname) 894 _raise_serialization_error(qname)
890 895
891 # populate qname and namespaces table 896 # populate qname and namespaces table
892 for elem in elem.iter(): 897 for elem in elem.iter():
893 tag = elem.tag 898 tag = elem.tag
894 if isinstance(tag, QName): 899 if isinstance(tag, QName):
895 if tag.text not in qnames: 900 if tag.text not in elt_qnames:
896 add_qname(tag.text, False) 901 add_qname(tag.text, True)
897 elif isinstance(tag, str): 902 elif isinstance(tag, str):
898 if tag not in qnames: 903 if tag not in elt_qnames:
899 add_qname(tag, False) 904 add_qname(tag, True)
900 elif tag is not None and tag is not Comment and tag is not PI: 905 elif tag is not None and tag is not Comment and tag is not PI:
901 _raise_serialization_error(tag) 906 _raise_serialization_error(tag)
902 for key, value in elem.items(): 907 for key, value in elem.items():
903 if isinstance(key, QName): 908 if isinstance(key, QName):
904 key = key.text 909 key = key.text
905 if key not in qnames: 910 if key not in attr_qnames:
906 add_qname(key, True) 911 add_qname(key, False)
907 if isinstance(value, QName) and value.text not in qnames: 912 if isinstance(value, QName) and value.text not in attr_qnames:
913 # FIXME: Should default ns be applied to attrib *values*?
914 # Opting for correctness here even if it results in an
915 # unneeded namespace prefix sometimes.
908 add_qname(value.text, False) 916 add_qname(value.text, False)
909 text = elem.text 917 text = elem.text
910 if isinstance(text, QName) and text.text not in qnames: 918 if isinstance(text, QName) and text.text not in elt_qnames:
911 add_qname(text.text, False) 919 add_qname(text.text, True)
912 return qnames, namespaces 920
913 921 if default_namespace:
914 def _serialize_xml(write, elem, qnames, namespaces, 922 prefixes_list = [ (default_namespace, "") ]
923 prefixes_list.extend(namespaces.items())
924 else:
925 prefixes_list = namespaces.items()
926
927 return elt_qnames, attr_qnames, prefixes_list
928
929 def _serialize_xml(write, elem, elt_qnames, attr_qnames, namespaces,
915 short_empty_elements, **kwargs): 930 short_empty_elements, **kwargs):
916 tag = elem.tag 931 tag = elem.tag
917 text = elem.text 932 text = elem.text
918 if tag is Comment: 933 if tag is Comment:
919 write("<!--%s-->" % text) 934 write("<!--%s-->" % text)
920 elif tag is ProcessingInstruction: 935 elif tag is ProcessingInstruction:
921 write("<?%s?>" % text) 936 write("<?%s?>" % text)
922 else: 937 else:
923 tag = qnames[tag] 938 tag = elt_qnames[tag]
924 if tag is None: 939 if tag is None:
925 if text: 940 if text:
926 write(_escape_cdata(text)) 941 write(_escape_cdata(text))
927 for e in elem: 942 for e in elem:
928 _serialize_xml(write, e, qnames, None, 943 _serialize_xml(write, e, elt_qnames, attr_qnames, None,
929 short_empty_elements=short_empty_elements) 944 short_empty_elements=short_empty_elements)
930 else: 945 else:
931 write("<" + tag) 946 write("<" + tag)
932 items = list(elem.items()) 947 items = list(elem.items())
933 if items or namespaces: 948 if items or namespaces:
934 if namespaces: 949 if namespaces:
935 for v, k in sorted(namespaces.items(), 950 for v, k in sorted(namespaces,
936 key=lambda x: x[1]): # sort on prefix 951 key=lambda x: x[1]): # sort on prefix
937 if k: 952 if k:
938 k = ":" + k 953 k = ":" + k
939 write(" xmlns%s=\"%s\"" % ( 954 write(" xmlns%s=\"%s\"" % (
940 k, 955 k,
941 _escape_attrib(v) 956 _escape_attrib(v)
942 )) 957 ))
943 for k, v in sorted(items): # lexical order 958 for k, v in sorted(items): # lexical order
944 if isinstance(k, QName): 959 if isinstance(k, QName):
945 k = k.text 960 k = k.text
946 if isinstance(v, QName): 961 if isinstance(v, QName):
947 v = qnames[v.text] 962 v = attr_qnames[v.text]
948 else: 963 else:
949 v = _escape_attrib(v) 964 v = _escape_attrib(v)
950 write(" %s=\"%s\"" % (qnames[k], v)) 965 write(" %s=\"%s\"" % (attr_qnames[k], v))
951 if text or len(elem) or not short_empty_elements: 966 if text or len(elem) or not short_empty_elements:
952 write(">") 967 write(">")
953 if text: 968 if text:
954 write(_escape_cdata(text)) 969 write(_escape_cdata(text))
955 for e in elem: 970 for e in elem:
956 _serialize_xml(write, e, qnames, None, 971 _serialize_xml(write, e, elt_qnames, attr_qnames, None,
957 short_empty_elements=short_empty_elements) 972 short_empty_elements=short_empty_elements)
958 write("</" + tag + ">") 973 write("</" + tag + ">")
959 else: 974 else:
960 write(" />") 975 write(" />")
961 if elem.tail: 976 if elem.tail:
962 write(_escape_cdata(elem.tail)) 977 write(_escape_cdata(elem.tail))
963 978
964 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 979 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
965 "img", "input", "isindex", "link", "meta", "param") 980 "img", "input", "isindex", "link", "meta", "param")
966 981
967 try: 982 try:
968 HTML_EMPTY = set(HTML_EMPTY) 983 HTML_EMPTY = set(HTML_EMPTY)
969 except NameError: 984 except NameError:
970 pass 985 pass
971 986
972 def _serialize_html(write, elem, qnames, namespaces, **kwargs): 987 def _serialize_html(write, elem, elt_qnames, attr_qnames, namespaces, **kwargs):
973 tag = elem.tag 988 tag = elem.tag
974 text = elem.text 989 text = elem.text
975 if tag is Comment: 990 if tag is Comment:
976 write("<!--%s-->" % _escape_cdata(text)) 991 write("<!--%s-->" % _escape_cdata(text))
977 elif tag is ProcessingInstruction: 992 elif tag is ProcessingInstruction:
978 write("<?%s?>" % _escape_cdata(text)) 993 write("<?%s?>" % _escape_cdata(text))
979 else: 994 else:
980 tag = qnames[tag] 995 tag = elt_qnames[tag]
981 if tag is None: 996 if tag is None:
982 if text: 997 if text:
983 write(_escape_cdata(text)) 998 write(_escape_cdata(text))
984 for e in elem: 999 for e in elem:
985 _serialize_html(write, e, qnames, None) 1000 _serialize_html(write, e, elt_qnames, attr_qnames, None)
986 else: 1001 else:
987 write("<" + tag) 1002 write("<" + tag)
988 items = list(elem.items()) 1003 items = list(elem.items())
989 if items or namespaces: 1004 if items or namespaces:
990 if namespaces: 1005 if namespaces:
991 for v, k in sorted(namespaces.items(), 1006 for v, k in sorted(namespaces,
992 key=lambda x: x[1]): # sort on prefix 1007 key=lambda x: x[1]): # sort on prefix
993 if k: 1008 if k:
994 k = ":" + k 1009 k = ":" + k
995 write(" xmlns%s=\"%s\"" % ( 1010 write(" xmlns%s=\"%s\"" % (
996 k, 1011 k,
997 _escape_attrib(v) 1012 _escape_attrib(v)
998 )) 1013 ))
999 for k, v in sorted(items): # lexical order 1014 for k, v in sorted(items): # lexical order
1000 if isinstance(k, QName): 1015 if isinstance(k, QName):
1001 k = k.text 1016 k = k.text
1002 if isinstance(v, QName): 1017 if isinstance(v, QName):
1003 v = qnames[v.text] 1018 v = attr_qnames[v.text]
1004 else: 1019 else:
1005 v = _escape_attrib_html(v) 1020 v = _escape_attrib_html(v)
1006 # FIXME: handle boolean attributes 1021 # FIXME: handle boolean attributes
1007 write(" %s=\"%s\"" % (qnames[k], v)) 1022 write(" %s=\"%s\"" % (attr_qnames[k], v))
1008 write(">") 1023 write(">")
1009 ltag = tag.lower() 1024 ltag = tag.lower()
1010 if text: 1025 if text:
1011 if ltag == "script" or ltag == "style": 1026 if ltag == "script" or ltag == "style":
1012 write(text) 1027 write(text)
1013 else: 1028 else:
1014 write(_escape_cdata(text)) 1029 write(_escape_cdata(text))
1015 for e in elem: 1030 for e in elem:
1016 _serialize_html(write, e, qnames, None) 1031 _serialize_html(write, e, elt_qnames, attr_qnames, None)
1017 if ltag not in HTML_EMPTY: 1032 if ltag not in HTML_EMPTY:
1018 write("</" + tag + ">") 1033 write("</" + tag + ">")
1019 if elem.tail: 1034 if elem.tail:
1020 write(_escape_cdata(elem.tail)) 1035 write(_escape_cdata(elem.tail))
1021 1036
1022 def _serialize_text(write, elem): 1037 def _serialize_text(write, elem):
1023 for part in elem.itertext(): 1038 for part in elem.itertext():
1024 write(part) 1039 write(part)
1025 if elem.tail: 1040 if elem.tail:
1026 write(elem.tail) 1041 write(elem.tail)
(...skipping 645 matching lines...) Expand 10 before | Expand all | Expand 10 after
1672 try: 1687 try:
1673 # Element is going to be shadowed by the C implementation. We need to keep 1688 # Element is going to be shadowed by the C implementation. We need to keep
1674 # the Python version of it accessible for some "creative" by external code 1689 # the Python version of it accessible for some "creative" by external code
1675 # (see tests) 1690 # (see tests)
1676 _Element_Py = Element 1691 _Element_Py = Element
1677 1692
1678 # Element, SubElement, ParseError, TreeBuilder, XMLParser 1693 # Element, SubElement, ParseError, TreeBuilder, XMLParser
1679 from _elementtree import * 1694 from _elementtree import *
1680 except ImportError: 1695 except ImportError:
1681 pass 1696 pass
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+