Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(9)

Side by Side Diff: Lib/xml/etree/ElementTree.py

Issue 16954: Add docstrings for ElementTree module
Patch Set: Created 7 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Lightweight XML support for Python 2.3 and later.
ezio.melotti 2013/02/20 19:55:42 The "2.3 and later" part should be removed.
eric.araujo 2013/02/20 23:47:06 Also “lightweight” is disputed, there was a python
ezio.melotti 2013/02/21 11:23:27 Wasn't that minidom?
eric.araujo 2013/02/21 17:09:08 Duh yes it was minidom. The fact that I don’t lik
2
3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree. This module has two classes for this purpose:
5
6 1. ElementTree represents the whole XML document as a tree and
7
8 2. Element represents a single node in this tree.
9
10 Interactions with the whole document (reading and writing to/from files) are
11 usually done on the ElementTree level. Interactions with a single XML element
12 and its sub-elements are done on the Element level.
13
14 Element is a flexible container object designed to store hierarchical data
15 structures in memory. It can be described as a cross between a list and a
16 dictionary. Each Element has a number of properties associated with it:
17
18 'tag' - A string identifying what kind of data this element represents
19 (the element type, in other words).
20
21 'attributes' - a Python dictionary storing the element's attributes.
22
23 'text' - a string containing the elements text content.
24
25 'tail' - an optional string containing text after the elements end tag.
26
27 And a number of child elements stored in a Python sequence.
ezio.melotti 2013/02/20 19:55:42 Is this 'children'?
eric.araujo 2013/02/20 23:47:06 When a noun is used as adjective, it’s used in the
ezio.melotti 2013/02/21 11:23:27 I was referring to the name of the attribute. (I'm
eric.araujo 2013/02/21 17:09:08 Well if there is an attribute named “children” it
28
29 To create an element instance, use the Element constructor,
30 or the SubElement factory function.
31
32 You can also use the ElementTree class to wrap an element structure
33 and convert it to and from XML.
34
35 """
36
1 # 37 #
2 # ElementTree 38 # ElementTree
3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 39 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4 # 40 #
5 # light-weight XML support for Python 2.3 and later. 41 # light-weight XML support for Python 2.3 and later.
6 # 42 #
7 # history (since 1.2.6): 43 # history (since 1.2.6):
8 # 2005-11-12 fl added tostringlist/fromstringlist helpers 44 # 2005-11-12 fl added tostringlist/fromstringlist helpers
9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox 45 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox
10 # 2006-07-05 fl removed support for 2.1 and earlier 46 # 2006-07-05 fl removed support for 2.1 and earlier
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
67 "SubElement", 103 "SubElement",
68 "tostring", "tostringlist", 104 "tostring", "tostringlist",
69 "TreeBuilder", 105 "TreeBuilder",
70 "VERSION", 106 "VERSION",
71 "XML", "XMLID", 107 "XML", "XMLID",
72 "XMLParser", "XMLTreeBuilder", 108 "XMLParser", "XMLTreeBuilder",
73 "register_namespace", 109 "register_namespace",
74 ] 110 ]
75 111
76 VERSION = "1.3.0" 112 VERSION = "1.3.0"
77
78 ##
79 # The <b>Element</b> type is a flexible container object, designed to
80 # store hierarchical data structures in memory. The type can be
81 # described as a cross between a list and a dictionary.
82 # <p>
83 # Each element has a number of properties associated with it:
84 # <ul>
85 # <li>a <i>tag</i>. This is a string identifying what kind of data
86 # this element represents (the element type, in other words).</li>
87 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
88 # <li>a <i>text</i> string.</li>
89 # <li>an optional <i>tail</i> string.</li>
90 # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
91 # </ul>
92 #
93 # To create an element instance, use the {@link #Element} constructor
94 # or the {@link #SubElement} factory function.
95 # <p>
96 # The {@link #ElementTree} class can be used to wrap an element
97 # structure, and convert it from and to XML.
98 ##
99 113
100 import sys 114 import sys
101 import re 115 import re
102 import warnings 116 import warnings
103 import io 117 import io
104 import contextlib 118 import contextlib
105 119
106 from . import ElementPath 120 from . import ElementPath
107 121
108 122
109 ##
110 # Parser error. This is a subclass of <b>SyntaxError</b>.
111 # <p>
112 # In addition to the exception value, an exception instance contains a
113 # specific exception code in the <b>code</b> attribute, and the line and
114 # column of the error in the <b>position</b> attribute.
115 123
116 class ParseError(SyntaxError): 124 class ParseError(SyntaxError):
125 """An error when parsing an XML document.
126
127 In addition to its exception value, a ParseError contains
128 two extra attributes:
129 'code' - the specific exception code
130 'position' - the line and column of the error
131
132 """
117 pass 133 pass
118 134
119 # -------------------------------------------------------------------- 135 # --------------------------------------------------------------------
120 136
121 ##
122 # Checks if an object appears to be a valid element object.
123 #
124 # @param An element instance.
125 # @return A true value if this is an element object.
126 # @defreturn flag
127 137
128 def iselement(element): 138 def iselement(element):
139 """Check if object appears to be a valid element.
140
141 'element' is an element instance.
142
143 Return True if 'element' is an element object.
144
145 """
129 # FIXME: not sure about this; 146 # FIXME: not sure about this;
130 # isinstance(element, Element) or look for tag/attrib/text attributes 147 # isinstance(element, Element) or look for tag/attrib/text attributes
131 return hasattr(element, 'tag') 148 return hasattr(element, 'tag')
132 149
133 ##
134 # Element class. This class defines the Element interface, and
135 # provides a reference implementation of this interface.
136 # <p>
137 # The element name, attribute names, and attribute values can be
138 # either ASCII strings (ordinary Python strings containing only 7-bit
139 # ASCII characters) or Unicode strings.
140 #
141 # @param tag The element name.
142 # @param attrib An optional dictionary, containing element attributes.
143 # @param **extra Additional attributes, given as keyword arguments.
144 # @see Element
145 # @see SubElement
146 # @see Comment
147 # @see ProcessingInstruction
148 150
149 class Element: 151 class Element:
150 # <tag attrib>text<child/>...</tag>tail 152 """An XML element.
151 153
152 ## 154 This class is the reference implementation of the Element interface.
153 # (Attribute) Element tag. 155
156 The element name, attribute names, and attribute values can be either
157 bytestrings or Unicode strings. 'tag' is the element name. 'attrib' is
ezio.melotti 2013/02/20 19:55:42 I thinks this should be 'ASCII-only byte strings',
158 an optional dictionary, containing element attributes. 'extra'
159 contains additional element attributes given as keyword arguments.
ezio.melotti 2013/02/20 19:55:42 This paragraph could be moved to the constructor d
160
161 Example form:
162 <tag attrib>text<child/>...</tag>tail
163
164 """
154 165
155 tag = None 166 tag = None
156 167 """The element's tag name."""
157 ##
158 # (Attribute) Element attribute dictionary. Where possible, use
159 # {@link #Element.get},
160 # {@link #Element.set},
161 # {@link #Element.keys}, and
162 # {@link #Element.items} to access
163 # element attributes.
164 168
165 attrib = None 169 attrib = None
166 170 """Dictionary of the tag's attributes."""
eric.araujo 2013/02/20 23:47:06 s/tag/element/ A tag is the concrete bytes used t
ezio.melotti 2013/02/21 11:23:27 While this is true and I agree it should be fixed,
167 ##
168 # (Attribute) Text before first subelement. This is either a
169 # string or the value None. Note that if there was no text, this
170 # attribute may be either None or an empty string, depending on
171 # the parser.
172 171
173 text = None 172 text = None
173 """
174 Text before first subelement. This is either a string or the value None.
175 Note that if there was no text, this attribute may be either None or an
176 empty string, depending on the parser.
174 177
175 ## 178 """
176 # (Attribute) Text after this element's end tag, but before the
177 # next sibling element's start tag. This is either a string or
178 # the value None. Note that if there was no text, this attribute
179 # may be either None or an empty string, depending on the parser.
180 179
181 tail = None # text after end tag, if any 180 tail = None
181 """
182 Text after this element's end tag, but before the next sibling element's
183 start tag. This is either a string or the value None. Note that if there
184 was no text, this attribute may be either None or an empty string,
185 depending on the parser.
182 186
183 # constructor 187 """
184 188
185 def __init__(self, tag, attrib={}, **extra): 189 def __init__(self, tag, attrib={}, **extra):
ezio.melotti 2013/02/20 19:55:42 This should probably be documented.
eric.araujo 2013/02/20 23:47:06 Note that __init__ methods are typically documente
186 if not isinstance(attrib, dict): 190 if not isinstance(attrib, dict):
187 raise TypeError("attrib must be dict, not %s" % ( 191 raise TypeError("attrib must be dict, not %s" % (
188 attrib.__class__.__name__,)) 192 attrib.__class__.__name__,))
189 attrib = attrib.copy() 193 attrib = attrib.copy()
190 attrib.update(extra) 194 attrib.update(extra)
191 self.tag = tag 195 self.tag = tag
192 self.attrib = attrib 196 self.attrib = attrib
193 self._children = [] 197 self._children = []
194 198
195 def __repr__(self): 199 def __repr__(self):
196 return "<Element %s at 0x%x>" % (repr(self.tag), id(self)) 200 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
197 201
198 ##
199 # Creates a new element object of the same type as this element.
200 #
201 # @param tag Element tag.
202 # @param attrib Element attributes, given as a dictionary.
203 # @return A new element instance.
204 202
205 def makeelement(self, tag, attrib): 203 def makeelement(self, tag, attrib):
204 """Create new element object consisting of the same type.
205
206 'tag' is a string containing the Element tag name. 'attrib' is a
207 dictionary containing the Element attributes.
208
209 Do not call this method, use the SubElement factory function instead.
210
211 """
206 return self.__class__(tag, attrib) 212 return self.__class__(tag, attrib)
207 213
208 ##
209 # (Experimental) Copies the current element. This creates a
210 # shallow copy; subelements will be shared with the original tree.
211 #
212 # @return A new element instance.
213 214
215 # (Experimental)
ezio.melotti 2013/02/20 19:55:42 I would keep this in the docstring, or remove it a
214 def copy(self): 216 def copy(self):
217 """Return copy of current element.
218
219 This creates a shallow copy. Subelements will be shared with the
220 original tree.
221
222 """
215 elem = self.makeelement(self.tag, self.attrib) 223 elem = self.makeelement(self.tag, self.attrib)
216 elem.text = self.text 224 elem.text = self.text
217 elem.tail = self.tail 225 elem.tail = self.tail
218 elem[:] = self 226 elem[:] = self
219 return elem 227 return elem
220 228
221 ##
222 # Returns the number of subelements. Note that this only counts
223 # full elements; to check if there's any content in an element, you
224 # have to check both the length and the <b>text</b> attribute.
225 #
226 # @return The number of subelements.
227 229
228 def __len__(self): 230 def __len__(self):
231 """Return number of subelements.
232
233 Note that this only counts full elements. That means if you want to
234 check if there's any content in an element, you have to check both its
235 length and its text attribute.
236
237 """
eric.araujo 2013/02/20 23:47:06 Magic methods don’t usually have docstrings, becau
229 return len(self._children) 238 return len(self._children)
230 239
231 def __bool__(self): 240 def __bool__(self):
232 warnings.warn( 241 warnings.warn(
233 "The behavior of this method will change in future versions. " 242 "The behavior of this method will change in future versions. "
234 "Use specific 'len(elem)' or 'elem is not None' test instead.", 243 "Use specific 'len(elem)' or 'elem is not None' test instead.",
235 FutureWarning, stacklevel=2 244 FutureWarning, stacklevel=2
236 ) 245 )
237 return len(self._children) != 0 # emulate old behaviour, for now 246 return len(self._children) != 0 # emulate old behaviour, for now
238 247
239 ##
240 # Returns the given subelement, by index.
241 #
242 # @param index What subelement to return.
243 # @return The given subelement.
244 # @exception IndexError If the given element does not exist.
245 248
246 def __getitem__(self, index): 249 def __getitem__(self, index):
250 """Return subelement at index.
251
252 IndexError is raised if the given element does not exist.
253
254 """
eric.araujo 2013/02/20 23:47:06 This docstring is valid for all sequence __getitem
247 return self._children[index] 255 return self._children[index]
248 256
249 ##
250 # Replaces the given subelement, by index.
251 #
252 # @param index What subelement to replace.
253 # @param element The new element value.
254 # @exception IndexError If the given element does not exist.
255 257
256 def __setitem__(self, index, element): 258 def __setitem__(self, index, element):
259 """Replace subelement at index.
260
261 IndexError is raised if the given element does not exist.
262
263 """
257 # if isinstance(index, slice): 264 # if isinstance(index, slice):
258 # for elt in element: 265 # for elt in element:
259 # assert iselement(elt) 266 # assert iselement(elt)
260 # else: 267 # else:
261 # assert iselement(element) 268 # assert iselement(element)
262 self._children[index] = element 269 self._children[index] = element
263 270
264 ##
265 # Deletes the given subelement, by index.
266 #
267 # @param index What subelement to delete.
268 # @exception IndexError If the given element does not exist.
269 271
270 def __delitem__(self, index): 272 def __delitem__(self, index):
273 """Delete subelement at index.
274
275 IndexError is raised if the given element does not exist.
276
277 """
271 del self._children[index] 278 del self._children[index]
272 279
273 ##
274 # Adds a subelement to the end of this element. In document order,
275 # the new element will appear after the last existing subelement (or
276 # directly after the text, if it's the first subelement), but before
277 # the end tag for this element.
278 #
279 # @param element The element to add.
280 280
281 def append(self, element): 281 def append(self, subelement):
282 self._assert_is_element(element) 282 """Add subelement to the end of this element.
283 self._children.append(element)
284 283
285 ## 284 The new element will appear in document order after the last existing
286 # Appends subelements from a sequence. 285 subelement (or directly after the text, if it's the first subelement),
287 # 286 but before the end tag for this element.
288 # @param elements A sequence object with zero or more elements. 287
289 # @since 1.3 288 """
289 self._assert_is_element(subelement)
290 self._children.append(subelement)
290 291
291 def extend(self, elements): 292 def extend(self, elements):
293 """Append subelements from a sequence.
294
295 'elements' is a sequence object with zero or more elements.
eric.araujo 2013/02/20 23:47:06 s/sequence object/sequence/
296
297 """
292 for element in elements: 298 for element in elements:
293 self._assert_is_element(element) 299 self._assert_is_element(element)
294 self._children.extend(elements) 300 self._children.extend(elements)
295 301
296 ##
297 # Inserts a subelement at the given position in this element.
298 #
299 # @param index Where to insert the new subelement.
300 302
301 def insert(self, index, element): 303 def insert(self, index, subelement):
302 self._assert_is_element(element) 304 """Insert subelement at given position.
303 self._children.insert(index, element) 305
306 'index' is an int representing where to insert the new subelement.
307 'subelement' is the Element to be inserted.
308 """
309 self._assert_is_element(subelement)
310 self._children.insert(index, subelement)
304 311
305 def _assert_is_element(self, e): 312 def _assert_is_element(self, e):
306 # Need to refer to the actual Python implementation, not the 313 # Need to refer to the actual Python implementation, not the
307 # shadowing C implementation. 314 # shadowing C implementation.
308 if not isinstance(e, _Element): 315 if not isinstance(e, _Element):
309 raise TypeError('expected an Element, not %s' % type(e).__name__) 316 raise TypeError('expected an Element, not %s' % type(e).__name__)
310 317
311 ##
312 # Removes a matching subelement. Unlike the <b>find</b> methods,
313 # this method compares elements based on identity, not on tag
314 # value or contents. To remove subelements by other means, the
315 # easiest way is often to use a list comprehension to select what
316 # elements to keep, and use slice assignment to update the parent
317 # element.
318 #
319 # @param element What element to remove.
320 # @exception ValueError If a matching element could not be found.
321 318
322 def remove(self, element): 319 def remove(self, subelement):
320 """Remove matching subelement.
321
322 Unlike the find methods, this method compares elements based on
323 identity, not on tag value or contents. To remove subelements by
324 other means, the easiest way is often to use a list comprehension to
325 selectwhat elements to keep, and use slice assignment to update the
326 parent element.
327
328 'subelement' is what element to remove.
329
330 ValueError is raised if a matching element could not be found.
331
332 """
323 # assert iselement(element) 333 # assert iselement(element)
324 self._children.remove(element) 334 self._children.remove(subelement)
325
326 ##
327 # (Deprecated) Returns all subelements. The elements are returned
328 # in document order.
329 #
330 # @return A list of subelements.
331 # @defreturn list of Element instances
332 335
333 def getchildren(self): 336 def getchildren(self):
337 """(Deprecated) Return all subelements.
338
339 Elements are returned in document order.
340
341 """
334 warnings.warn( 342 warnings.warn(
335 "This method will be removed in future versions. " 343 "This method will be removed in future versions. "
336 "Use 'list(elem)' or iteration over elem instead.", 344 "Use 'list(elem)' or iteration over elem instead.",
337 DeprecationWarning, stacklevel=2 345 DeprecationWarning, stacklevel=2
338 ) 346 )
339 return self._children 347 return self._children
340 348
341 ##
342 # Finds the first matching subelement, by tag name or path.
343 #
344 # @param path What element to look for.
345 # @keyparam namespaces Optional namespace prefix map.
346 # @return The first matching element, or None if no element was found.
347 # @defreturn Element or None
348 349
349 def find(self, path, namespaces=None): 350 def find(self, path, namespaces=None):
351 """Find first matching element by tag name or path.
352
353 'path' is a string having either an element tag name or an XPath,
354 'namespaces' is an optional mapping from namespace prefix to full name.
355
356 Return the first matching element, or None if no element was found.
357
358 """
350 return ElementPath.find(self, path, namespaces) 359 return ElementPath.find(self, path, namespaces)
351 360
352 ##
353 # Finds text for the first matching subelement, by tag name or path.
354 #
355 # @param path What element to look for.
356 # @param default What to return if the element was not found.
357 # @keyparam namespaces Optional namespace prefix map.
358 # @return The text content of the first matching element, or the
359 # default value no element was found. Note that if the element
360 # is found, but has no text content, this method returns an
361 # empty string.
362 # @defreturn string
363 361
364 def findtext(self, path, default=None, namespaces=None): 362 def findtext(self, path, default=None, namespaces=None):
363 """Find text for the first matching element by tag name or path.
364
365 'path' is what element to look for, 'default' is the value to return
366 if the element was not found, 'namespaces' is an optional mapping from
367 namespace prefix to full name.
368
369 Return text content of first matching element, or default value if no
370 element was found. Note that if an element is found having no text
371 content, the empty string is returned.
372
373 """
365 return ElementPath.findtext(self, path, default, namespaces) 374 return ElementPath.findtext(self, path, default, namespaces)
366 375
367 ##
368 # Finds all matching subelements, by tag name or path.
369 #
370 # @param path What element to look for.
371 # @keyparam namespaces Optional namespace prefix map.
372 # @return A list or other sequence containing all matching elements,
373 # in document order.
374 # @defreturn list of Element instances
375 376
376 def findall(self, path, namespaces=None): 377 def findall(self, path, namespaces=None):
378 """Find all matching subelements by tag name or path.
379
380 'path' is a string having either an element tag name or an XPath,
381 'namespaces' is an optional mapping from namespace prefix to full name.
382
383 Returns list containing all matching elements in document order.
384
385 """
377 return ElementPath.findall(self, path, namespaces) 386 return ElementPath.findall(self, path, namespaces)
378 387
379 ##
380 # Finds all matching subelements, by tag name or path.
381 #
382 # @param path What element to look for.
383 # @keyparam namespaces Optional namespace prefix map.
384 # @return An iterator or sequence containing all matching elements,
385 # in document order.
386 # @defreturn a generated sequence of Element instances
387 388
388 def iterfind(self, path, namespaces=None): 389 def iterfind(self, path, namespaces=None):
390 """Find all matching subelements by tag name or path.
391
392 'path' is a string having either an element tag name or an XPath,
393 'namespaces' is an optional mapping from namespace prefix to full name.
394
395 Return an iterable yielding all matching elements in document order.
396
397 """
389 return ElementPath.iterfind(self, path, namespaces) 398 return ElementPath.iterfind(self, path, namespaces)
390 399
391 ##
392 # Resets an element. This function removes all subelements, clears
393 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
394 # to None.
395 400
396 def clear(self): 401 def clear(self):
402 """Reset element.
403
404 This function removes all subelements, clears all attributes, and sets
405 the text and tail attributes to None.
406
407 """
397 self.attrib.clear() 408 self.attrib.clear()
398 self._children = [] 409 self._children = []
399 self.text = self.tail = None 410 self.text = self.tail = None
400 411
401 ##
402 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but
403 # some implementations may handle this a bit more efficiently.
404 #
405 # @param key What attribute to look for.
406 # @param default What to return if the attribute was not found.
407 # @return The attribute value, or the default value, if the
408 # attribute was not found.
409 # @defreturn string or None
410 412
411 def get(self, key, default=None): 413 def get(self, key, default=None):
414 """Get element attribute.
415
416 Equivalent to attrib.get, but some implementations may handle this a
417 bit more efficiently. 'key' is what attribute to look for, and
418 'default' is what to return if the attribute was not found.
419
420 Returns a string containing the attribute value, or the default if
421 attribute was not found.
422
423 """
412 return self.attrib.get(key, default) 424 return self.attrib.get(key, default)
413 425
414 ##
415 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
416 # but some implementations may handle this a bit more efficiently.
417 #
418 # @param key What attribute to set.
419 # @param value The attribute value.
420 426
421 def set(self, key, value): 427 def set(self, key, value):
428 """Set element attribute.
429
430 Equivalent to attrib[key] = value, but some implementations may handle
431 this a bit more efficiently. 'key' is what attribute to set, and
432 'value' is the attribute value to set it to.
433
434 """
422 self.attrib[key] = value 435 self.attrib[key] = value
423 436
424 ##
425 # Gets a list of attribute names. The names are returned in an
426 # arbitrary order (just like for an ordinary Python dictionary).
427 # Equivalent to <b>attrib.keys()</b>.
428 #
429 # @return A list of element attribute names.
430 # @defreturn list of strings
431 437
432 def keys(self): 438 def keys(self):
439 """Get list of attribute names.
440
441 Names are returned in an arbitrary order, just like an ordinary
442 Python dict. Equivalent to attrib.keys()
443
444 Return list of element attribute names.
445
446 """
433 return self.attrib.keys() 447 return self.attrib.keys()
434 448
435 ##
436 # Gets element attributes, as a sequence. The attributes are
437 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
438 #
439 # @return A list of (name, value) tuples for all attributes.
440 # @defreturn list of (string, string) tuples
441 449
442 def items(self): 450 def items(self):
451 """Get element attributes as a sequence.
452
453 The attributes are returned in arbitrary order. Equivalent to
454 attrib.items().
455
456 Return a list of (name, value) tuples.
457
458 """
443 return self.attrib.items() 459 return self.attrib.items()
444 460
445 ##
446 # Creates a tree iterator. The iterator loops over this element
447 # and all subelements, in document order, and returns all elements
448 # with a matching tag.
449 # <p>
450 # If the tree structure is modified during iteration, new or removed
451 # elements may or may not be included. To get a stable set, use the
452 # list() function on the iterator, and loop over the resulting list.
453 #
454 # @param tag What tags to look for (default is to return all elements).
455 # @return An iterator containing all the matching elements.
456 # @defreturn iterator
457 461
458 def iter(self, tag=None): 462 def iter(self, tag=None):
463 """Create tree iterator.
464
465 The iterator loops over the element and all subelements in document
466 order, returning all elements with a matching tag.
467
468 If the tree structure is modified during iteration, new or removed
469 elements may or may not be included. To get a stable set, use the
470 list() function on the iterator, and loop over the resulting list.
471
472 'tag' is what tags to look for (default is to return all elements)
473
474 Return an iterator containing all the matching elements.
475
476 """
459 if tag == "*": 477 if tag == "*":
460 tag = None 478 tag = None
461 if tag is None or self.tag == tag: 479 if tag is None or self.tag == tag:
462 yield self 480 yield self
463 for e in self._children: 481 for e in self._children:
464 yield from e.iter(tag) 482 yield from e.iter(tag)
465 483
466 # compatibility 484 # compatibility
467 def getiterator(self, tag=None): 485 def getiterator(self, tag=None):
468 # Change for a DeprecationWarning in 1.4 486 # Change for a DeprecationWarning in 1.4
469 warnings.warn( 487 warnings.warn(
470 "This method will be removed in future versions. " 488 "This method will be removed in future versions. "
471 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 489 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
472 PendingDeprecationWarning, stacklevel=2 490 PendingDeprecationWarning, stacklevel=2
473 ) 491 )
474 return list(self.iter(tag)) 492 return list(self.iter(tag))
475 493
476 ##
477 # Creates a text iterator. The iterator loops over this element
478 # and all subelements, in document order, and returns all inner
479 # text.
480 #
481 # @return An iterator containing all inner text.
482 # @defreturn iterator
483 494
484 def itertext(self): 495 def itertext(self):
496 """Create text iterator.
497
498 The iterator loops over the element and all subelements in document
499 order, returning all inner text.
500
501 """
485 tag = self.tag 502 tag = self.tag
486 if not isinstance(tag, str) and tag is not None: 503 if not isinstance(tag, str) and tag is not None:
487 return 504 return
488 if self.text: 505 if self.text:
489 yield self.text 506 yield self.text
490 for e in self: 507 for e in self:
491 yield from e.itertext() 508 yield from e.itertext()
492 if e.tail: 509 if e.tail:
493 yield e.tail 510 yield e.tail
494 511
495 # compatibility 512 # compatibility
496 _Element = _ElementInterface = Element 513 _Element = _ElementInterface = Element
497 514
498 ##
499 # Subelement factory. This function creates an element instance, and
500 # appends it to an existing element.
501 # <p>
502 # The element name, attribute names, and attribute values can be
503 # either 8-bit ASCII strings or Unicode strings.
504 #
505 # @param parent The parent element.
506 # @param tag The subelement name.
507 # @param attrib An optional dictionary, containing element attributes.
508 # @param **extra Additional attributes, given as keyword arguments.
509 # @return An element instance.
510 # @defreturn Element
511 515
512 def SubElement(parent, tag, attrib={}, **extra): 516 def SubElement(parent, tag, attrib={}, **extra):
517 """Subelement factory which creates an element instance, and appends it
518 to an existing parent.
519
520 The element name, attribute names, and attribute values can be either
521 8-bit ASCII strings or Unicode strings.
522
523 'parent' is the parent element, 'tag' is the Subelement name, 'attrib' is
524 an optional directory containing element attributes, 'extra' are
525 additional attributes given as keyword arguments.
526
527 """
513 attrib = attrib.copy() 528 attrib = attrib.copy()
514 attrib.update(extra) 529 attrib.update(extra)
515 element = parent.makeelement(tag, attrib) 530 element = parent.makeelement(tag, attrib)
516 parent.append(element) 531 parent.append(element)
517 return element 532 return element
518 533
519 ##
520 # Comment element factory. This factory function creates a special
521 # element that will be serialized as an XML comment by the standard
522 # serializer.
523 # <p>
524 # The comment string can be either an 8-bit ASCII string or a Unicode
525 # string.
526 #
527 # @param text A string containing the comment string.
528 # @return An element instance, representing a comment.
529 # @defreturn Element
530 534
531 def Comment(text=None): 535 def Comment(text=None):
536 """Comment element factory.
537
538 This function creates a special element which the standard serializer
539 serializes as an XML comment.
540
541 'text' is a string containing the comment string. Return an element
542 instance representing a comment.
543
544 """
532 element = Element(Comment) 545 element = Element(Comment)
533 element.text = text 546 element.text = text
534 return element 547 return element
535 548
536 ##
537 # PI element factory. This factory function creates a special element
538 # that will be serialized as an XML processing instruction by the standard
539 # serializer.
540 #
541 # @param target A string containing the PI target.
542 # @param text A string containing the PI contents, if any.
543 # @return An element instance, representing a PI.
544 # @defreturn Element
545 549
546 def ProcessingInstruction(target, text=None): 550 def ProcessingInstruction(target, text=None):
551 """Processing Instruction element factory.
552
553 This function creates a special element which the standard serializer
554 serializes as an XML comment.
555
556 'target' is a string containing the processing instruction, 'text' is a
557 string containing the processing instruction contents, if any. Return
558 an Element instance representing the processing instruction.
559
560 """
547 element = Element(ProcessingInstruction) 561 element = Element(ProcessingInstruction)
548 element.text = target 562 element.text = target
549 if text: 563 if text:
550 element.text = element.text + " " + text 564 element.text = element.text + " " + text
551 return element 565 return element
552 566
553 PI = ProcessingInstruction 567 PI = ProcessingInstruction
554 568
555 ##
556 # QName wrapper. This can be used to wrap a QName attribute value, in
557 # order to get proper namespace handling on output.
558 #
559 # @param text A string containing the QName value, in the form {uri}local,
560 # or, if the tag argument is given, the URI part of a QName.
561 # @param tag Optional tag. If given, the first argument is interpreted as
562 # an URI, and this argument is interpreted as a local name.
563 # @return An opaque object, representing the QName.
564 569
565 class QName: 570 class QName:
571 """Qualified name wrapper.
572
573 This class can be used to wrap a QName attribute value in order to get
574 proper namespace handing on output.
575
576 'text_or_uri' is a string containing the QName value either in the form
577 {uri}local, or if the tag argument is given, the URI part of a QName.
578
579 'tag' is an optional argument which if given, will make the first
580 argument (text_or_uri) be interpreted as a URI, and this argument (text)
581 be interpreted as a local name.
582
583 """
566 def __init__(self, text_or_uri, tag=None): 584 def __init__(self, text_or_uri, tag=None):
567 if tag: 585 if tag:
568 text_or_uri = "{%s}%s" % (text_or_uri, tag) 586 text_or_uri = "{%s}%s" % (text_or_uri, tag)
569 self.text = text_or_uri 587 self.text = text_or_uri
570 def __str__(self): 588 def __str__(self):
571 return self.text 589 return self.text
572 def __repr__(self): 590 def __repr__(self):
573 return '<QName %r>' % (self.text,) 591 return '<QName %r>' % (self.text,)
574 def __hash__(self): 592 def __hash__(self):
575 return hash(self.text) 593 return hash(self.text)
(...skipping 17 matching lines...) Expand all
593 if isinstance(other, QName): 611 if isinstance(other, QName):
594 return self.text == other.text 612 return self.text == other.text
595 return self.text == other 613 return self.text == other
596 def __ne__(self, other): 614 def __ne__(self, other):
597 if isinstance(other, QName): 615 if isinstance(other, QName):
598 return self.text != other.text 616 return self.text != other.text
599 return self.text != other 617 return self.text != other
600 618
601 # -------------------------------------------------------------------- 619 # --------------------------------------------------------------------
602 620
603 ##
604 # ElementTree wrapper class. This class represents an entire element
605 # hierarchy, and adds some extra support for serialization to and from
606 # standard XML.
607 #
608 # @param element Optional root element.
609 # @keyparam file Optional file handle or file name. If given, the
610 # tree is initialized with the contents of this XML file.
611 621
612 class ElementTree: 622 class ElementTree:
623 """An XML element hierarchy.
613 624
625 This class also provides support for serialization to and from
626 standard XML.
627
628 'element' is an optional root element node,
629 'file' is an optional file handle or file name of an XML file whose
630 contents will be used to initialize the tree with.
631
632 """
614 def __init__(self, element=None, file=None): 633 def __init__(self, element=None, file=None):
615 # assert element is None or iselement(element) 634 # assert element is None or iselement(element)
616 self._root = element # first node 635 self._root = element # first node
617 if file: 636 if file:
618 self.parse(file) 637 self.parse(file)
619 638
620 ##
621 # Gets the root element for this tree.
622 #
623 # @return An element instance.
624 # @defreturn Element
625
626 def getroot(self): 639 def getroot(self):
640 """Return root element of this tree."""
627 return self._root 641 return self._root
628 642
629 ##
630 # Replaces the root element for this tree. This discards the
631 # current contents of the tree, and replaces it with the given
632 # element. Use with care.
633 #
634 # @param element An element instance.
635 643
636 def _setroot(self, element): 644 def _setroot(self, element):
645 """Replace root element of this tree.
646
647 This will discard the current contents of the tree and replace it
648 with the given element. Use with care!
649
650 """
637 # assert iselement(element) 651 # assert iselement(element)
638 self._root = element 652 self._root = element
639 653
640 ##
641 # Loads an external XML document into this element tree.
642 #
643 # @param source A file name or file object. If a file object is
644 # given, it only has to implement a <b>read(n)</b> method.
645 # @keyparam parser An optional parser instance. If not given, the
646 # standard {@link XMLParser} parser is used.
647 # @return The document root element.
648 # @defreturn Element
649 # @exception ParseError If the parser fails to parse the document.
650 654
651 def parse(self, source, parser=None): 655 def parse(self, source, parser=None):
656 """Load external XML document into element tree.
657
658 'source' is a file name or file object, 'parser' is an optional parser
659 instance that defaults to XMLParser.
660
661 ParseError is raised if the parser fails to parse the document.
662
663 Returns the root element of the given source document.
664
665 """
652 close_source = False 666 close_source = False
653 if not hasattr(source, "read"): 667 if not hasattr(source, "read"):
654 source = open(source, "rb") 668 source = open(source, "rb")
655 close_source = True 669 close_source = True
656 try: 670 try:
657 if not parser: 671 if not parser:
658 parser = XMLParser(target=TreeBuilder()) 672 parser = XMLParser(target=TreeBuilder())
659 while 1: 673 while 1:
660 data = source.read(65536) 674 data = source.read(65536)
661 if not data: 675 if not data:
662 break 676 break
663 parser.feed(data) 677 parser.feed(data)
664 self._root = parser.close() 678 self._root = parser.close()
665 return self._root 679 return self._root
666 finally: 680 finally:
667 if close_source: 681 if close_source:
668 source.close() 682 source.close()
669 683
670 ##
671 # Creates a tree iterator for the root element. The iterator loops
672 # over all elements in this tree, in document order.
673 #
674 # @param tag What tags to look for (default is to return all elements)
675 # @return An iterator.
676 # @defreturn iterator
677 684
678 def iter(self, tag=None): 685 def iter(self, tag=None):
686 """Create and return tree iterator for the root element.
687
688 The iterator loops over all elements in this tree, in document order.
689
690 'tag' is a string with the tag name to iterate over (default is
691 to return all elements).
692
693 """
679 # assert self._root is not None 694 # assert self._root is not None
680 return self._root.iter(tag) 695 return self._root.iter(tag)
681 696
682 # compatibility 697 # compatibility
683 def getiterator(self, tag=None): 698 def getiterator(self, tag=None):
684 # Change for a DeprecationWarning in 1.4 699 # Change for a DeprecationWarning in 1.4
685 warnings.warn( 700 warnings.warn(
686 "This method will be removed in future versions. " 701 "This method will be removed in future versions. "
687 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 702 "Use 'tree.iter()' or 'list(tree.iter())' instead.",
688 PendingDeprecationWarning, stacklevel=2 703 PendingDeprecationWarning, stacklevel=2
689 ) 704 )
690 return list(self.iter(tag)) 705 return list(self.iter(tag))
691 706
692 ##
693 # Finds the first toplevel element with given tag.
694 # Same as getroot().find(path).
695 #
696 # @param path What element to look for.
697 # @keyparam namespaces Optional namespace prefix map.
698 # @return The first matching element, or None if no element was found.
699 # @defreturn Element or None
700 707
701 def find(self, path, namespaces=None): 708 def find(self, path, namespaces=None):
709 """Find first matching element by tag name or path.
710
711 Same as getroot().find(path), which is Element.find()
712
713 'path' is a string having either an element tag name or an XPath,
714 'namespaces' is an optional mapping from namespace prefix to full name.
715
716 Return the first matching element, or None if no element was found.
717
718 """
702 # assert self._root is not None 719 # assert self._root is not None
703 if path[:1] == "/": 720 if path[:1] == "/":
704 path = "." + path 721 path = "." + path
705 warnings.warn( 722 warnings.warn(
706 "This search is broken in 1.3 and earlier, and will be " 723 "This search is broken in 1.3 and earlier, and will be "
707 "fixed in a future version. If you rely on the current " 724 "fixed in a future version. If you rely on the current "
708 "behaviour, change it to %r" % path, 725 "behaviour, change it to %r" % path,
709 FutureWarning, stacklevel=2 726 FutureWarning, stacklevel=2
710 ) 727 )
711 return self._root.find(path, namespaces) 728 return self._root.find(path, namespaces)
712 729
713 ##
714 # Finds the element text for the first toplevel element with given
715 # tag. Same as getroot().findtext(path).
716 #
717 # @param path What toplevel element to look for.
718 # @param default What to return if the element was not found.
719 # @keyparam namespaces Optional namespace prefix map.
720 # @return The text content of the first matching element, or the
721 # default value no element was found. Note that if the element
722 # is found, but has no text content, this method returns an
723 # empty string.
724 # @defreturn string
725 730
726 def findtext(self, path, default=None, namespaces=None): 731 def findtext(self, path, default=None, namespaces=None):
732 """Find first matching element by tag name or path.
733
734 Same as getroot().findtext(path), which is Element.findtext()
735
736 'path' is a string having either an element tag name or an XPath,
737 'namespaces' is an optional mapping from namespace prefix to full name.
738
739 Return the first matching element, or None if no element was found.
740
741 """
727 # assert self._root is not None 742 # assert self._root is not None
728 if path[:1] == "/": 743 if path[:1] == "/":
729 path = "." + path 744 path = "." + path
730 warnings.warn( 745 warnings.warn(
731 "This search is broken in 1.3 and earlier, and will be " 746 "This search is broken in 1.3 and earlier, and will be "
732 "fixed in a future version. If you rely on the current " 747 "fixed in a future version. If you rely on the current "
733 "behaviour, change it to %r" % path, 748 "behaviour, change it to %r" % path,
734 FutureWarning, stacklevel=2 749 FutureWarning, stacklevel=2
735 ) 750 )
736 return self._root.findtext(path, default, namespaces) 751 return self._root.findtext(path, default, namespaces)
737 752
738 ##
739 # Finds all toplevel elements with the given tag.
740 # Same as getroot().findall(path).
741 #
742 # @param path What element to look for.
743 # @keyparam namespaces Optional namespace prefix map.
744 # @return A list or iterator containing all matching elements,
745 # in document order.
746 # @defreturn list of Element instances
747 753
748 def findall(self, path, namespaces=None): 754 def findall(self, path, namespaces=None):
755 """Find all matching subelements by tag name or path.
756
757 Same as getroot().findall(path), which is Element.findall().
758
759 'path' is a string having either an element tag name or an XPath,
760 'namespaces' is an optional mapping from namespace prefix to full name.
761
762 Return list containing all matching elements in document order.
763
764 """
749 # assert self._root is not None 765 # assert self._root is not None
750 if path[:1] == "/": 766 if path[:1] == "/":
751 path = "." + path 767 path = "." + path
752 warnings.warn( 768 warnings.warn(
753 "This search is broken in 1.3 and earlier, and will be " 769 "This search is broken in 1.3 and earlier, and will be "
754 "fixed in a future version. If you rely on the current " 770 "fixed in a future version. If you rely on the current "
755 "behaviour, change it to %r" % path, 771 "behaviour, change it to %r" % path,
756 FutureWarning, stacklevel=2 772 FutureWarning, stacklevel=2
757 ) 773 )
758 return self._root.findall(path, namespaces) 774 return self._root.findall(path, namespaces)
759 775
760 ##
761 # Finds all matching subelements, by tag name or path.
762 # Same as getroot().iterfind(path).
763 #
764 # @param path What element to look for.
765 # @keyparam namespaces Optional namespace prefix map.
766 # @return An iterator or sequence containing all matching elements,
767 # in document order.
768 # @defreturn a generated sequence of Element instances
769 776
770 def iterfind(self, path, namespaces=None): 777 def iterfind(self, path, namespaces=None):
778 """Find all matching subelements by tag name or path.
779
780 Same as getroot().iterfind(path), which is element.iterfind()
781
782 'path' is a string having either an element tag name or an XPath,
783 'namespaces' is an optional mapping from namespace prefix to full name.
784
785 Return an iterable yielding all matching elements in document order.
786
787 """
771 # assert self._root is not None 788 # assert self._root is not None
772 if path[:1] == "/": 789 if path[:1] == "/":
773 path = "." + path 790 path = "." + path
774 warnings.warn( 791 warnings.warn(
775 "This search is broken in 1.3 and earlier, and will be " 792 "This search is broken in 1.3 and earlier, and will be "
776 "fixed in a future version. If you rely on the current " 793 "fixed in a future version. If you rely on the current "
777 "behaviour, change it to %r" % path, 794 "behaviour, change it to %r" % path,
778 FutureWarning, stacklevel=2 795 FutureWarning, stacklevel=2
779 ) 796 )
780 return self._root.iterfind(path, namespaces) 797 return self._root.iterfind(path, namespaces)
781 798
782 def write(self, file_or_filename, 799 def write(self, file_or_filename,
783 encoding=None, 800 encoding=None,
784 xml_declaration=None, 801 xml_declaration=None,
785 default_namespace=None, 802 default_namespace=None,
786 method=None, *, 803 method=None, *,
787 short_empty_elements=True): 804 short_empty_elements=True):
788 """Write the element tree to a file, as XML. 'file_or_filename' is a 805 """Write element tree to a file as XML.
789 file name or a file object opened for writing. 806
790 'encoding' is the output encoding (default is US-ASCII). 807 Arguments:
791 'xml_declaration' controls if an XML declaration should be added 808 'file_or_filename' -- file name or a file object opened for writing
792 to the output. Use False for never, True for always, None for only 809
793 if not US-ASCII or UTF-8 or Unicode (default is None). 810 'encoding' -- the output encoding (default: US-ASCII)
794 'default_namespace' sets the default XML namespace (for "xmlns"). 811
795 'method' is either "xml" (default), "html", "text" or "c14n". 812 'xml_declaration' -- bool indicating if an XML declaration should be
796 The keyword-only 'short_empty_elements' parameter controls the 813 added to the output. If None, an XML declaration
797 formatting of elements that contain no content. If True (default), 814 is added if encoding IS NOT either of:
798 they are emitted as a single self-closed tag, otherwise they are 815 US-ASCII, UTF-8, or Unicode
799 emitted as a pair of start/end tags. 816
817 'default_namespace' -- sets the default XML namespace (for "xmlns")
818
819 'method' -- either "xml" (default), "html, "text", or "c14n"
820
821 'short_empty_elements' -- controls the formatting of elements
822 that contain no content. If True (default)
823 they are emitted as a single self-closed
824 tag, otherwise they are emitted as a pair
825 of start/end tags
800 826
801 """ 827 """
802 if not method: 828 if not method:
803 method = "xml" 829 method = "xml"
804 elif method not in _serialize: 830 elif method not in _serialize:
805 raise ValueError("unknown method %r" % method) 831 raise ValueError("unknown method %r" % method)
806 if not encoding: 832 if not encoding:
807 if method == "c14n": 833 if method == "c14n":
808 encoding = "utf-8" 834 encoding = "utf-8"
809 else: 835 else:
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after
1064 write(elem.tail) 1090 write(elem.tail)
1065 1091
1066 _serialize = { 1092 _serialize = {
1067 "xml": _serialize_xml, 1093 "xml": _serialize_xml,
1068 "html": _serialize_html, 1094 "html": _serialize_html,
1069 "text": _serialize_text, 1095 "text": _serialize_text,
1070 # this optional method is imported at the end of the module 1096 # this optional method is imported at the end of the module
1071 # "c14n": _serialize_c14n, 1097 # "c14n": _serialize_c14n,
1072 } 1098 }
1073 1099
1074 ##
1075 # Registers a namespace prefix. The registry is global, and any
1076 # existing mapping for either the given prefix or the namespace URI
1077 # will be removed.
1078 #
1079 # @param prefix Namespace prefix.
1080 # @param uri Namespace uri. Tags and attributes in this namespace
1081 # will be serialized with the given prefix, if at all possible.
1082 # @exception ValueError If the prefix is reserved, or is otherwise
1083 # invalid.
1084 1100
1085 def register_namespace(prefix, uri): 1101 def register_namespace(prefix, uri):
1102 """Register namespace prefix.
1103
1104 The registry is global, and any existing mapping for either the
1105 given prefix or the namespace URI will be removed.
1106
1107 'prefix' is the namespace prefix, 'uri' is a namespace uri. Tags and
1108 attributes in this namespace will be serialized with prefix if possible.
1109
1110 ValueError is raised if prefix is reserved or is invalid.
1111
1112 """
1086 if re.match("ns\d+$", prefix): 1113 if re.match("ns\d+$", prefix):
1087 raise ValueError("Prefix format reserved for internal use") 1114 raise ValueError("Prefix format reserved for internal use")
1088 for k, v in list(_namespace_map.items()): 1115 for k, v in list(_namespace_map.items()):
1089 if k == uri or v == prefix: 1116 if k == uri or v == prefix:
1090 del _namespace_map[k] 1117 del _namespace_map[k]
1091 _namespace_map[uri] = prefix 1118 _namespace_map[uri] = prefix
1092 1119
1093 _namespace_map = { 1120 _namespace_map = {
1094 # "well-known" namespace prefixes 1121 # "well-known" namespace prefixes
1095 "http://www.w3.org/XML/1998/namespace": "xml", 1122 "http://www.w3.org/XML/1998/namespace": "xml",
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
1151 if ">" in text: 1178 if ">" in text:
1152 text = text.replace(">", "&gt;") 1179 text = text.replace(">", "&gt;")
1153 if "\"" in text: 1180 if "\"" in text:
1154 text = text.replace("\"", "&quot;") 1181 text = text.replace("\"", "&quot;")
1155 return text 1182 return text
1156 except (TypeError, AttributeError): 1183 except (TypeError, AttributeError):
1157 _raise_serialization_error(text) 1184 _raise_serialization_error(text)
1158 1185
1159 # -------------------------------------------------------------------- 1186 # --------------------------------------------------------------------
1160 1187
1161 ##
1162 # Generates a string representation of an XML element, including all
1163 # subelements. If encoding is "unicode", the return type is a string;
1164 # otherwise it is a bytes array.
1165 #
1166 # @param element An Element instance.
1167 # @keyparam encoding Optional output encoding (default is US-ASCII).
1168 # Use "unicode" to return a Unicode string.
1169 # @keyparam method Optional output method ("xml", "html", "text" or
1170 # "c14n"; default is "xml").
1171 # @return An (optionally) encoded string containing the XML data.
1172 # @defreturn string
1173
1174 def tostring(element, encoding=None, method=None, *, 1188 def tostring(element, encoding=None, method=None, *,
1175 short_empty_elements=True): 1189 short_empty_elements=True):
1190 """Generate string representation of XML element.
1191
1192 All subelements are included. If encoding is "unicode", a string
1193 is returned. Otherwise a bytes array is returned.
1194
1195 'element' is an Element instance, 'encoding' is an optional output
1196 encoding defaulting to US-ASCII, 'method' is an optional output which can
1197 be one of "xml" (default), "html", "text" or "c14n".
1198
1199 Returns an (optionally) encoded string containing the XML data.
1200
1201 """
1176 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() 1202 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1177 ElementTree(element).write(stream, encoding, method=method, 1203 ElementTree(element).write(stream, encoding, method=method,
1178 short_empty_elements=short_empty_elements) 1204 short_empty_elements=short_empty_elements)
1179 return stream.getvalue() 1205 return stream.getvalue()
1180 1206
1181 ##
1182 # Generates a string representation of an XML element, including all
1183 # subelements.
1184 #
1185 # @param element An Element instance.
1186 # @keyparam encoding Optional output encoding (default is US-ASCII).
1187 # Use "unicode" to return a Unicode string.
1188 # @keyparam method Optional output method ("xml", "html", "text" or
1189 # "c14n"; default is "xml").
1190 # @return A sequence object containing the XML data.
1191 # @defreturn sequence
1192 # @since 1.3
1193
1194 class _ListDataStream(io.BufferedIOBase): 1207 class _ListDataStream(io.BufferedIOBase):
1195 """ An auxiliary stream accumulating into a list reference 1208 """An auxiliary stream accumulating into a list reference."""
1196 """
1197 def __init__(self, lst): 1209 def __init__(self, lst):
1198 self.lst = lst 1210 self.lst = lst
1199 1211
1200 def writable(self): 1212 def writable(self):
1201 return True 1213 return True
1202 1214
1203 def seekable(self): 1215 def seekable(self):
1204 return True 1216 return True
1205 1217
1206 def write(self, b): 1218 def write(self, b):
1207 self.lst.append(b) 1219 self.lst.append(b)
1208 1220
1209 def tell(self): 1221 def tell(self):
1210 return len(self.lst) 1222 return len(self.lst)
1211 1223
1212 def tostringlist(element, encoding=None, method=None, *, 1224 def tostringlist(element, encoding=None, method=None, *,
1213 short_empty_elements=True): 1225 short_empty_elements=True):
1214 lst = [] 1226 lst = []
1215 stream = _ListDataStream(lst) 1227 stream = _ListDataStream(lst)
1216 ElementTree(element).write(stream, encoding, method=method, 1228 ElementTree(element).write(stream, encoding, method=method,
1217 short_empty_elements=short_empty_elements) 1229 short_empty_elements=short_empty_elements)
1218 return lst 1230 return lst
1219 1231
1220 ##
1221 # Writes an element tree or element structure to sys.stdout. This
1222 # function should be used for debugging only.
1223 # <p>
1224 # The exact output format is implementation dependent. In this
1225 # version, it's written as an ordinary XML file.
1226 #
1227 # @param elem An element tree or an individual element.
1228 1232
1229 def dump(elem): 1233 def dump(elem):
1234 """Write element tree or element structure to sys.stdout.
1235
1236 This function should be used for debugging only.
1237
1238 'elem' is either an ElementTree, or a single Element. The exact output
1239 format is implementation dependent. In this version, it's written as an
1240 ordinary XML file.
1241
1242 """
1230 # debugging 1243 # debugging
1231 if not isinstance(elem, ElementTree): 1244 if not isinstance(elem, ElementTree):
1232 elem = ElementTree(elem) 1245 elem = ElementTree(elem)
1233 elem.write(sys.stdout, encoding="unicode") 1246 elem.write(sys.stdout, encoding="unicode")
1234 tail = elem.getroot().tail 1247 tail = elem.getroot().tail
1235 if not tail or tail[-1] != "\n": 1248 if not tail or tail[-1] != "\n":
1236 sys.stdout.write("\n") 1249 sys.stdout.write("\n")
1237 1250
1238 # -------------------------------------------------------------------- 1251 # --------------------------------------------------------------------
1239 # parsing 1252 # parsing
1240 1253
1241 ##
1242 # Parses an XML document into an element tree.
1243 #
1244 # @param source A filename or file object containing XML data.
1245 # @param parser An optional parser instance. If not given, the
1246 # standard {@link XMLParser} parser is used.
1247 # @return An ElementTree instance
1248 1254
1249 def parse(source, parser=None): 1255 def parse(source, parser=None):
1256 """Parse XML document into element tree.
1257
1258 'source' is a filename or file object containing XML data,
1259 'parser' is an optional parser instance defaulting to XMLParser.
1260
1261 Return an ElementTree instance.
1262
1263 """
1250 tree = ElementTree() 1264 tree = ElementTree()
1251 tree.parse(source, parser) 1265 tree.parse(source, parser)
1252 return tree 1266 return tree
1253 1267
1254 ##
1255 # Parses an XML document into an element tree incrementally, and reports
1256 # what's going on to the user.
1257 #
1258 # @param source A filename or file object containing XML data.
1259 # @param events A list of events to report back. If omitted, only "end"
1260 # events are reported.
1261 # @param parser An optional parser instance. If not given, the
1262 # standard {@link XMLParser} parser is used.
1263 # @return A (event, elem) iterator.
1264 1268
1265 def iterparse(source, events=None, parser=None): 1269 def iterparse(source, events=None, parser=None):
1270 """Incrementally parse XML document into ElementTree.
1271
1272 This class also reports what's going on to the user based on the
1273 'events' it is initialized with.
1274
1275 'source' is a filename or file object containing XML data, 'events' is
1276 a list of events to report back (the default is to report only "end"
1277 events), 'parser' is an optional parser instance.
1278
1279 Returns an iterator providing (event, elem) pairs.
1280
1281 """
1266 close_source = False 1282 close_source = False
1267 if not hasattr(source, "read"): 1283 if not hasattr(source, "read"):
1268 source = open(source, "rb") 1284 source = open(source, "rb")
1269 close_source = True 1285 close_source = True
1270 if not parser: 1286 if not parser:
1271 parser = XMLParser(target=TreeBuilder()) 1287 parser = XMLParser(target=TreeBuilder())
1272 return _IterParseIterator(source, events, parser, close_source) 1288 return _IterParseIterator(source, events, parser, close_source)
1273 1289
1274 class _IterParseIterator: 1290 class _IterParseIterator:
1275 1291
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
1342 self._parser.feed(data) 1358 self._parser.feed(data)
1343 except SyntaxError as exc: 1359 except SyntaxError as exc:
1344 self._error = exc 1360 self._error = exc
1345 else: 1361 else:
1346 self._root = self._parser.close() 1362 self._root = self._parser.close()
1347 self._parser = None 1363 self._parser = None
1348 1364
1349 def __iter__(self): 1365 def __iter__(self):
1350 return self 1366 return self
1351 1367
1352 ##
1353 # Parses an XML document from a string constant. This function can
1354 # be used to embed "XML literals" in Python code.
1355 #
1356 # @param source A string containing XML data.
1357 # @param parser An optional parser instance. If not given, the
1358 # standard {@link XMLParser} parser is used.
1359 # @return An Element instance.
1360 # @defreturn Element
1361 1368
1362 def XML(text, parser=None): 1369 def XML(text, parser=None):
1370 """Parse XML document from string constant.
1371
1372 This function can be used to embed "XML Literals" in Python code.
1373
1374 'text' is a string containing XML data, 'parser' is an
1375 optional parser instance, defaulting to the standard XMLParser.
1376
1377 Returns an Element instance.
1378
1379 """
1363 if not parser: 1380 if not parser:
1364 parser = XMLParser(target=TreeBuilder()) 1381 parser = XMLParser(target=TreeBuilder())
1365 parser.feed(text) 1382 parser.feed(text)
1366 return parser.close() 1383 return parser.close()
1367 1384
1368 ##
1369 # Parses an XML document from a string constant, and also returns
1370 # a dictionary which maps from element id:s to elements.
1371 #
1372 # @param source A string containing XML data.
1373 # @param parser An optional parser instance. If not given, the
1374 # standard {@link XMLParser} parser is used.
1375 # @return A tuple containing an Element instance and a dictionary.
1376 # @defreturn (Element, dictionary)
1377 1385
1378 def XMLID(text, parser=None): 1386 def XMLID(text, parser=None):
1387 """Parse XML document from string constant for its IDs.
1388
1389 'text' is a string containing XML data, 'parser' is an
1390 optional parser instance, defaulting to the standard XMLParser.
1391
1392 Returns an (Element, dict) tuple, in which the
1393 dict maps element id:s to elements.
1394
1395 """
1379 if not parser: 1396 if not parser:
1380 parser = XMLParser(target=TreeBuilder()) 1397 parser = XMLParser(target=TreeBuilder())
1381 parser.feed(text) 1398 parser.feed(text)
1382 tree = parser.close() 1399 tree = parser.close()
1383 ids = {} 1400 ids = {}
1384 for elem in tree.iter(): 1401 for elem in tree.iter():
1385 id = elem.get("id") 1402 id = elem.get("id")
1386 if id: 1403 if id:
1387 ids[id] = elem 1404 ids[id] = elem
1388 return tree, ids 1405 return tree, ids
1389 1406
1390 ##
1391 # Parses an XML document from a string constant. Same as {@link #XML}.
1392 #
1393 # @def fromstring(text)
1394 # @param source A string containing XML data.
1395 # @return An Element instance.
1396 # @defreturn Element
1397
1398 fromstring = XML 1407 fromstring = XML
1399 1408 """Parse XML document from string constant. Alias for XML()."""
1400 ##
1401 # Parses an XML document from a sequence of string fragments.
1402 #
1403 # @param sequence A list or other sequence containing XML data fragments.
1404 # @param parser An optional parser instance. If not given, the
1405 # standard {@link XMLParser} parser is used.
1406 # @return An Element instance.
1407 # @defreturn Element
1408 # @since 1.3
1409 1409
1410 def fromstringlist(sequence, parser=None): 1410 def fromstringlist(sequence, parser=None):
1411 """Parse XML document from sequence of string fragments.
1412
1413 'sequence' is a list of other sequence, 'parser' is an optional parser
1414 instance, defaulting to the standard XMLParser.
1415
1416 Returns an Element instance.
1417
1418 """
1411 if not parser: 1419 if not parser:
1412 parser = XMLParser(target=TreeBuilder()) 1420 parser = XMLParser(target=TreeBuilder())
1413 for text in sequence: 1421 for text in sequence:
1414 parser.feed(text) 1422 parser.feed(text)
1415 return parser.close() 1423 return parser.close()
1416 1424
1417 # -------------------------------------------------------------------- 1425 # --------------------------------------------------------------------
1418 1426
1419 ##
1420 # Generic element structure builder. This builder converts a sequence
1421 # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1422 # #TreeBuilder.end} method calls to a well-formed element structure.
1423 # <p>
1424 # You can use this class to build an element structure using a custom XML
1425 # parser, or a parser for some other XML-like format.
1426 #
1427 # @param element_factory Optional element factory. This factory
1428 # is called to create new Element instances, as necessary.
1429 1427
1430 class TreeBuilder: 1428 class TreeBuilder:
1429 """Generic element structure builder.
1431 1430
1431 This builder converts a sequence of start, data, and end method
1432 calls to a well-formed element structure.
1433
1434 You can use this class to build an element structure using a custom XML
1435 parser, or a parser for some other XML-like format.
1436
1437 'element_factory' is an optional element factory which is called
1438 to create new Element instances, as necessary.
1439
1440 """
1432 def __init__(self, element_factory=None): 1441 def __init__(self, element_factory=None):
1433 self._data = [] # data collector 1442 self._data = [] # data collector
1434 self._elem = [] # element stack 1443 self._elem = [] # element stack
1435 self._last = None # last element 1444 self._last = None # last element
1436 self._tail = None # true if we're after an end tag 1445 self._tail = None # true if we're after an end tag
1437 if element_factory is None: 1446 if element_factory is None:
1438 element_factory = Element 1447 element_factory = Element
1439 self._factory = element_factory 1448 self._factory = element_factory
1440 1449
1441 ##
1442 # Flushes the builder buffers, and returns the toplevel document
1443 # element.
1444 #
1445 # @return An Element instance.
1446 # @defreturn Element
1447
1448 def close(self): 1450 def close(self):
1451 """Flush builder buffers and return toplevel document Element."""
1449 assert len(self._elem) == 0, "missing end tags" 1452 assert len(self._elem) == 0, "missing end tags"
1450 assert self._last is not None, "missing toplevel element" 1453 assert self._last is not None, "missing toplevel element"
1451 return self._last 1454 return self._last
1452 1455
1453 def _flush(self): 1456 def _flush(self):
1454 if self._data: 1457 if self._data:
1455 if self._last is not None: 1458 if self._last is not None:
1456 text = "".join(self._data) 1459 text = "".join(self._data)
1457 if self._tail: 1460 if self._tail:
1458 assert self._last.tail is None, "internal error (tail)" 1461 assert self._last.tail is None, "internal error (tail)"
1459 self._last.tail = text 1462 self._last.tail = text
1460 else: 1463 else:
1461 assert self._last.text is None, "internal error (text)" 1464 assert self._last.text is None, "internal error (text)"
1462 self._last.text = text 1465 self._last.text = text
1463 self._data = [] 1466 self._data = []
1464 1467
1465 ##
1466 # Adds text to the current element.
1467 #
1468 # @param data A string. This should be either an 8-bit string
1469 # containing ASCII text, or a Unicode string.
1470 1468
1471 def data(self, data): 1469 def data(self, data):
1470 """Add text to current element.
1471
1472 'data' is a string that should be either an 8-bit string
1473 containing ASCII text, or a Unicode string.
1474
1475 """
1472 self._data.append(data) 1476 self._data.append(data)
1473 1477
1474 ##
1475 # Opens a new element.
1476 #
1477 # @param tag The element name.
1478 # @param attrib A dictionary containing element attributes.
1479 # @return The opened element.
1480 # @defreturn Element
1481 1478
1482 def start(self, tag, attrs): 1479 def start(self, tag, attrs):
1480 """Open new element and return it.
1481
1482 'tag' is the element name, 'attrs' is a dict containing
1483 element attributes.
1484
1485 """
1483 self._flush() 1486 self._flush()
1484 self._last = elem = self._factory(tag, attrs) 1487 self._last = elem = self._factory(tag, attrs)
1485 if self._elem: 1488 if self._elem:
1486 self._elem[-1].append(elem) 1489 self._elem[-1].append(elem)
1487 self._elem.append(elem) 1490 self._elem.append(elem)
1488 self._tail = 0 1491 self._tail = 0
1489 return elem 1492 return elem
1490 1493
1491 ##
1492 # Closes the current element.
1493 #
1494 # @param tag The element name.
1495 # @return The closed element.
1496 # @defreturn Element
1497 1494
1498 def end(self, tag): 1495 def end(self, tag):
1496 """Close and return current Element.
1497
1498 'tag' is the element name.
1499
1500 """
1499 self._flush() 1501 self._flush()
1500 self._last = self._elem.pop() 1502 self._last = self._elem.pop()
1501 assert self._last.tag == tag,\ 1503 assert self._last.tag == tag,\
1502 "end tag mismatch (expected %s, got %s)" % ( 1504 "end tag mismatch (expected %s, got %s)" % (
1503 self._last.tag, tag) 1505 self._last.tag, tag)
1504 self._tail = 1 1506 self._tail = 1
1505 return self._last 1507 return self._last
1506 1508
1507 ##
1508 # Element structure builder for XML source data, based on the
1509 # <b>expat</b> parser.
1510 #
1511 # @keyparam target Target object. If omitted, the builder uses an
1512 # instance of the standard {@link #TreeBuilder} class.
1513 # @keyparam html Predefine HTML entities. This flag is not supported
1514 # by the current implementation.
1515 # @keyparam encoding Optional encoding. If given, the value overrides
1516 # the encoding specified in the XML file.
1517 # @see #ElementTree
1518 # @see #TreeBuilder
1519 1509
1510 # also see ElementTree and TreeBuilder
1520 class XMLParser: 1511 class XMLParser:
1512 """Element structure builder for XML source data based on the expat parser.
1513
1514 'html' are predefined HTML entities (not supported currently),
1515 'target' is an optional target object which defaults to an instance of the
1516 standard TreeBuilder class, 'encoding' is an optional encoding string
1517 which if given, overrides the encoding specified in the XML file:
1518 http://www.iana.org/assignments/character-sets
1519
1520 """
1521 1521
1522 def __init__(self, html=0, target=None, encoding=None): 1522 def __init__(self, html=0, target=None, encoding=None):
1523 try: 1523 try:
1524 from xml.parsers import expat 1524 from xml.parsers import expat
1525 except ImportError: 1525 except ImportError:
1526 try: 1526 try:
1527 import pyexpat as expat 1527 import pyexpat as expat
1528 except ImportError: 1528 except ImportError:
1529 raise ImportError( 1529 raise ImportError(
1530 "No module named expat; use SimpleXMLTreeBuilder instead" 1530 "No module named expat; use SimpleXMLTreeBuilder instead"
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
1652 else: 1652 else:
1653 return 1653 return
1654 if hasattr(self.target, "doctype"): 1654 if hasattr(self.target, "doctype"):
1655 self.target.doctype(name, pubid, system[1:-1]) 1655 self.target.doctype(name, pubid, system[1:-1])
1656 elif self.doctype != self._XMLParser__doctype: 1656 elif self.doctype != self._XMLParser__doctype:
1657 # warn about deprecated call 1657 # warn about deprecated call
1658 self._XMLParser__doctype(name, pubid, system[1:-1]) 1658 self._XMLParser__doctype(name, pubid, system[1:-1])
1659 self.doctype(name, pubid, system[1:-1]) 1659 self.doctype(name, pubid, system[1:-1])
1660 self._doctype = None 1660 self._doctype = None
1661 1661
1662 ##
1663 # (Deprecated) Handles a doctype declaration.
1664 #
1665 # @param name Doctype name.
1666 # @param pubid Public identifier.
1667 # @param system System identifier.
1668
1669 def doctype(self, name, pubid, system): 1662 def doctype(self, name, pubid, system):
1670 """This method of XMLParser is deprecated.""" 1663 """(Deprecated) Handle doctype declaration
1664
1665 'name' is the Doctype name, 'pubid' is the public identifier, and
1666 'system' is the system identifier.
1667
1668 """
1671 warnings.warn( 1669 warnings.warn(
1672 "This method of XMLParser is deprecated. Define doctype() " 1670 "This method of XMLParser is deprecated. Define doctype() "
1673 "method on the TreeBuilder target.", 1671 "method on the TreeBuilder target.",
1674 DeprecationWarning, 1672 DeprecationWarning,
1675 ) 1673 )
1676 1674
1677 # sentinel, if doctype is redefined in a subclass 1675 # sentinel, if doctype is redefined in a subclass
1678 __doctype = doctype 1676 __doctype = doctype
1679 1677
1680 ##
1681 # Feeds data to the parser.
1682 #
1683 # @param data Encoded data.
1684
1685 def feed(self, data): 1678 def feed(self, data):
1679 """Feed encoded data to parser."""
1686 try: 1680 try:
1687 self.parser.Parse(data, 0) 1681 self.parser.Parse(data, 0)
1688 except self._error as v: 1682 except self._error as v:
1689 self._raiseerror(v) 1683 self._raiseerror(v)
1690 1684
1691 ##
1692 # Finishes feeding data to the parser.
1693 #
1694 # @return An element structure.
1695 # @defreturn Element
1696
1697 def close(self): 1685 def close(self):
1686 """Finish feeding data to parser and return element structure."""
1698 try: 1687 try:
1699 self.parser.Parse("", 1) # end of data 1688 self.parser.Parse("", 1) # end of data
1700 except self._error as v: 1689 except self._error as v:
1701 self._raiseerror(v) 1690 self._raiseerror(v)
1702 try: 1691 try:
1703 close_handler = self.target.close 1692 close_handler = self.target.close
1704 except AttributeError: 1693 except AttributeError:
1705 pass 1694 pass
1706 else: 1695 else:
1707 return close_handler() 1696 return close_handler()
1708 finally: 1697 finally:
1709 # get rid of circular references 1698 # get rid of circular references
1710 del self.parser, self._parser 1699 del self.parser, self._parser
1711 del self.target, self._target 1700 del self.target, self._target
1712 1701
1713 1702
1714 # Import the C accelerators 1703 # Import the C accelerators
1715 try: 1704 try:
1716 # Element, SubElement, ParseError, TreeBuilder, XMLParser 1705 # Element, SubElement, ParseError, TreeBuilder, XMLParser
1717 from _elementtree import * 1706 from _elementtree import *
1718 except ImportError: 1707 except ImportError:
1719 pass 1708 pass
1720 else: 1709 else:
1721 # Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser 1710 # Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser
1722 1711
1723 class ElementTree(ElementTree): 1712 class ElementTree(ElementTree):
1713 __doc__ = ElementTree.__doc__
1724 def parse(self, source, parser=None): 1714 def parse(self, source, parser=None):
1715 """Load external XML document into element tree.
1716
1717 'source' is a file name or file object, 'parser' is
1718 an optional parser instance that defaults to XMLParser.
1719
1720 ParseError is raised if the parser fails to parse the document.
1721
1722 Returns the root element of the given source document.
1723
1724 """
1725 close_source = False 1725 close_source = False
1726 if not hasattr(source, 'read'): 1726 if not hasattr(source, 'read'):
1727 source = open(source, 'rb') 1727 source = open(source, 'rb')
1728 close_source = True 1728 close_source = True
1729 try: 1729 try:
1730 if parser is not None: 1730 if parser is not None:
1731 while True: 1731 while True:
1732 data = source.read(65536) 1732 data = source.read(65536)
1733 if not data: 1733 if not data:
1734 break 1734 break
1735 parser.feed(data) 1735 parser.feed(data)
1736 self._root = parser.close() 1736 self._root = parser.close()
1737 else: 1737 else:
1738 parser = XMLParser() 1738 parser = XMLParser()
1739 self._root = parser._parse(source) 1739 self._root = parser._parse(source)
1740 return self._root 1740 return self._root
1741 finally: 1741 finally:
1742 if close_source: 1742 if close_source:
1743 source.close() 1743 source.close()
1744 1744
1745 class iterparse: 1745 class iterparse:
1746 """Parses an XML section into an element tree incrementally. 1746 """Incrementally parse XML document into ElementTree.
1747 1747
1748 Reports what’s going on to the user. 'source' is a filename or file 1748 This class also reports what's going on to the user based on the
1749 object containing XML data. 'events' is a list of events to report back. 1749 'events' it is initialized with.
1750 The supported events are the strings "start", "end", "start-ns" and 1750
1751 "end-ns" (the "ns" events are used to get detailed namespace 1751 'source' is a filename or file object containing XML data, 'events' is
1752 information). If 'events' is omitted, only "end" events are reported. 1752 a list of events to report back (the default is to report only "end"
1753 'parser' is an optional parser instance. If not given, the standard 1753 events), 'parser' is an optional parser instance.
1754 XMLParser parser is used. Returns an iterator providing 1754
1755 (event, elem) pairs. 1755 Returns an iterator providing (event, elem) pairs.
1756
1756 """ 1757 """
1757
1758 root = None 1758 root = None
1759 def __init__(self, file, events=None, parser=None): 1759 def __init__(self, file, events=None, parser=None):
1760 self._close_file = False 1760 self._close_file = False
1761 if not hasattr(file, 'read'): 1761 if not hasattr(file, 'read'):
1762 file = open(file, 'rb') 1762 file = open(file, 'rb')
1763 self._close_file = True 1763 self._close_file = True
1764 self._file = file 1764 self._file = file
1765 self._events = [] 1765 self._events = []
1766 self._index = 0 1766 self._index = 0
1767 self._error = None 1767 self._error = None
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1806 1806
1807 # compatibility 1807 # compatibility
1808 XMLTreeBuilder = XMLParser 1808 XMLTreeBuilder = XMLParser
1809 1809
1810 # workaround circular import. 1810 # workaround circular import.
1811 try: 1811 try:
1812 from ElementC14N import _serialize_c14n 1812 from ElementC14N import _serialize_c14n
1813 _serialize["c14n"] = _serialize_c14n 1813 _serialize["c14n"] = _serialize_c14n
1814 except ImportError: 1814 except ImportError:
1815 pass 1815 pass
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+