diff -r 5470dc81caf9 Doc/library/xml.etree.elementtree.rst --- a/Doc/library/xml.etree.elementtree.rst Tue Jul 17 14:28:44 2012 +0200 +++ b/Doc/library/xml.etree.elementtree.rst Wed Jul 18 09:35:42 2012 +0300 @@ -477,7 +477,6 @@ should be added to the file. Use False for never, True for always, None for only if not US-ASCII or UTF-8 or Unicode (default is None). *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``). - Returns an (optionally) encoded string. This is the XML file that is going to be manipulated:: diff -r 5470dc81caf9 Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py Tue Jul 17 14:28:44 2012 +0200 +++ b/Lib/xml/etree/ElementTree.py Wed Jul 18 09:35:42 2012 +0300 @@ -99,6 +99,8 @@ import sys import re import warnings +import io +import contextlib class _SimpleElementPath: @@ -809,59 +811,38 @@ # None for only if not US-ASCII or UTF-8 or Unicode. None is default. def write(self, file_or_filename, - # keyword arguments encoding=None, xml_declaration=None, default_namespace=None, method=None): - # assert self._root is not None if not method: method = "xml" elif method not in _serialize: - # FIXME: raise an ImportError for c14n if ElementC14N is missing? raise ValueError("unknown method %r" % method) if not encoding: if method == "c14n": encoding = "utf-8" else: encoding = "us-ascii" - elif encoding == str: # lxml.etree compatibility. - encoding = "unicode" else: encoding = encoding.lower() - if hasattr(file_or_filename, "write"): - file = file_or_filename - else: - if encoding != "unicode": - file = open(file_or_filename, "wb") + with _get_writer(file_or_filename, encoding) as write: + if method == "xml" and (xml_declaration or + (xml_declaration is None and + encoding not in ("utf-8", "us-ascii", "unicode"))): + declared_encoding = encoding + if encoding == "unicode": + # Retrieve the default encoding for the xml declaration + import locale + declared_encoding = locale.getpreferredencoding() + write("\n" % ( + declared_encoding,)) + if method == "text": + _serialize_text(write, self._root) else: - file = open(file_or_filename, "w") - if encoding != "unicode": - def write(text): - try: - return file.write(text.encode(encoding, - "xmlcharrefreplace")) - except (TypeError, AttributeError): - _raise_serialization_error(text) - else: - write = file.write - if method == "xml" and (xml_declaration or - (xml_declaration is None and - encoding not in ("utf-8", "us-ascii", "unicode"))): - declared_encoding = encoding - if encoding == "unicode": - # Retrieve the default encoding for the xml declaration - import locale - declared_encoding = locale.getpreferredencoding() - write("\n" % declared_encoding) - if method == "text": - _serialize_text(write, self._root) - else: - qnames, namespaces = _namespaces(self._root, default_namespace) - serialize = _serialize[method] - serialize(write, self._root, qnames, namespaces) - if file_or_filename is not file: - file.close() + qnames, namespaces = _namespaces(self._root, default_namespace) + serialize = _serialize[method] + serialize(write, self._root, qnames, namespaces) def write_c14n(self, file): # lxml.etree compatibility. use output method instead @@ -870,6 +851,62 @@ # -------------------------------------------------------------------- # serialization support +@contextlib.contextmanager +def _get_writer(file_or_filename, encoding): + # returns text write method and release all resourses after using + try: + write = file_or_filename.write + except AttributeError: + # file_or_filename is a file name + if encoding == "unicode": + file = open(file_or_filename, "w") + else: + file = open(file_or_filename, "w", encoding=encoding, + errors="xmlcharrefreplace") + with file: + yield file.write + else: + # file_or_filename is a file-like object + # encoding determines if it is a text or binary writer + if encoding == "unicode": + # use a text writer as is + yield write + else: + # wrap a binary writer with TextIOWrapper + stack = [] + try: + if isinstance(file_or_filename, io.BufferedIOBase): + file = file_or_filename + elif isinstance(file_or_filename, io.RawIOBase): + file = io.BufferedWriter(file_or_filename) + # Keep the original file open when the BufferedWriter is + # destroyed + stack.append(file.detach) + else: + # This is to handle passed objects that aren't in the + # IOBase hierarchy, but just have a write method + file = io.BufferedIOBase() + file.writable = lambda: True + file.write = write + try: + # TextIOWrapper uses this methods to determine + # if BOM (for UTF-16, etc) should be added + file.seekable = file_or_filename.seekable + file.tell = file_or_filename.tell + except AttributeError: + pass + file = io.TextIOWrapper(file, + encoding=encoding, + errors="xmlcharrefreplace", + newline="\n") + # Keep the original file open when the TextIOWrapper is + # destroyed + stack.append(file.detach) + yield file.write + finally: + while stack: + stack.pop()() + def _namespaces(elem, default_namespace=None): # identify namespaces used in this tree @@ -1153,22 +1190,13 @@ # @defreturn string def tostring(element, encoding=None, method=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding, method=method) - if encoding in (str, "unicode"): - return "".join(data) - else: - return b"".join(data) + stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() + ElementTree(element).write(stream, encoding, method=method) + return stream.getvalue() ## # Generates a string representation of an XML element, including all -# subelements. If encoding is False, the string is returned as a -# sequence of string fragments; otherwise it is a sequence of -# bytestrings. +# subelements. # # @param element An Element instance. # @keyparam encoding Optional output encoding (default is US-ASCII). @@ -1179,15 +1207,29 @@ # @defreturn sequence # @since 1.3 +class _ListDataStream(io.BufferedIOBase): + """ An auxiliary stream accumulating into a list reference + """ + def __init__(self, lst): + self.lst = lst + + def writable(self): + return True + + def seekable(self): + return True + + def write(self, b): + self.lst.append(b) + + def tell(self): + return len(self.lst) + def tostringlist(element, encoding=None, method=None): - class dummy: - pass - data = [] - file = dummy() - file.write = data.append - ElementTree(element).write(file, encoding, method=method) - # FIXME: merge small fragments into larger parts - return data + lst = [] + stream = _ListDataStream(lst) + ElementTree(element).write(stream, encoding, method=method) + return lst ## # Writes an element tree or element structure to sys.stdout. This