Index: setup.py =================================================================== --- setup.py (revision 64102) +++ setup.py (working copy) @@ -422,6 +422,7 @@ exts.append( Extension("_functools", ["_functoolsmodule.c"]) ) # Memory-based IO accelerator modules exts.append( Extension("_bytesio", ["_bytesio.c"]) ) + exts.append( Extension("_stringio", ["_stringio.c"]) ) # atexit exts.append( Extension("atexit", ["atexitmodule.c"]) ) # _json speedups Index: Lib/io.py =================================================================== --- Lib/io.py (revision 64102) +++ Lib/io.py (working copy) @@ -1769,20 +1769,20 @@ def newlines(self): return self._decoder.newlines if self._decoder else None -class StringIO(TextIOWrapper): - """An in-memory stream for text. The initial_value argument sets the - value of object. The other arguments are like those of TextIOWrapper's - constructor. +class _StringIO(TextIOWrapper): + """Text I/O implementation using an in-memory buffer. + + The initial_value argument sets the value of object. The newline + argument is like the one of TextIOWrapper's constructor. """ # XXX This is really slow, but fully functional - def __init__(self, initial_value="", encoding="utf-8", - errors="strict", newline="\n"): - super(StringIO, self).__init__(BytesIO(), - encoding=encoding, - errors=errors, - newline=newline) + def __init__(self, initial_value="", newline="\n"): + super(_StringIO, self).__init__(BytesIO(), + encoding="utf-8", + errors="strict", + newline=newline) if initial_value: if not isinstance(initial_value, str): initial_value = str(initial_value) @@ -1792,3 +1792,271 @@ def getvalue(self): self.flush() return self.buffer.getvalue().decode(self._encoding, self._errors) + +try: + import _stringio + + # This subclass is a reimplementation of the TextIOWrapper + # interface without any of its text decoding facilities. All the + # stored data is manipulated with the efficient + # _stringio._StringIO extension type. Also, the newline decoding + # mechanism of IncrementalNewlineDecoder is reimplemented here for + # efficiency. Doing otherwise, would require us to implement a + # fake decoder which would add an additional and unnecessary layer + # on top of the _StringIO methods. + + class StringIO(_stringio._StringIO, TextIOBase): + """Text I/O implementation using an in-memory buffer. + + The initial_value argument sets the value of object. The newline + argument is like the one of TextIOWrapper's constructor. + """ + + _CHUNK_SIZE = 4096 + + def __init__(self, initial_value="", newline="\n"): + if newline not in (None, "", "\n", "\r", "\r\n"): + raise ValueError("illegal newline value: %r" % (newline,)) + + self._readuniversal = not newline + self._readtranslate = newline is None + self._readnl = newline + self._writetranslate = newline != "" + self._writenl = newline or os.linesep + self._pending = "" + self._seennl = 0 + + # Reset the buffer first, in case __init__ is called + # multiple times. + self.truncate(0) + if initial_value is None: + initial_value = "" + self.write(initial_value) + self.seek(0) + + @property + def buffer(self): + raise UnsupportedOperation("%s.buffer attribute is unsupported" % + self.__class__.__name__) + + def _decode_newlines(self, input, final=False): + # decode input (with the eventual \r from a previous pass) + if self._pending: + input = self._pending + input + + # retain last \r even when not translating data: + # then readline() is sure to get \r\n in one pass + if input.endswith("\r") and not final: + input = input[:-1] + self._pending = "\r" + else: + self._pending = "" + + # Record which newlines are read + crlf = input.count('\r\n') + cr = input.count('\r') - crlf + lf = input.count('\n') - crlf + self._seennl |= (lf and self._LF) | (cr and self._CR) \ + | (crlf and self._CRLF) + + if self._readtranslate: + if crlf: + output = input.replace("\r\n", "\n") + if cr: + output = input.replace("\r", "\n") + else: + output = input + + return output + + def writable(self): + return True + + def readable(self): + return True + + def seekable(self): + return True + + _read = _stringio._StringIO.read + _write = _stringio._StringIO.write + _tell = _stringio._StringIO.tell + _seek = _stringio._StringIO.seek + _truncate = _stringio._StringIO.truncate + _getvalue = _stringio._StringIO.getvalue + + def getvalue(self) -> str: + """Retrieve the entire contents of the object.""" + if self.closed: + raise ValueError("read on closed file") + return self._getvalue() + + def write(self, s: str) -> int: + """Write string s to file. + + Returns the number of characters written. + """ + if self.closed: + raise ValueError("write to closed file") + if not isinstance(s, str): + raise TypeError("can't write %s to text stream" % + s.__class__.__name__) + length = len(s) + if self._writetranslate and self._writenl != "\n": + s = s.replace("\n", self._writenl) + self._pending = "" + self._write(s) + return length + + def read(self, n: int = None) -> str: + """Read at most n characters, returned as a string. + + If the argument is negative or omitted, read until EOF + is reached. Return an empty string at EOF. + """ + if self.closed: + raise ValueError("read to closed file") + if n is None: + n = -1 + res = self._pending + if n < 0: + res += self._decode_newlines(self._read(), True) + self._pending = "" + return res + else: + res = self._decode_newlines(self._read(n), True) + self._pending = res[n:] + return res[:n] + + def tell(self) -> int: + """Tell the current file position.""" + if self.closed: + raise ValueError("tell from closed file") + if self._pending: + return self._tell() - len(self._pending) + else: + return self._tell() + + def seek(self, pos: int = None, whence: int = 0) -> int: + """Change stream position. + + Seek to character offset pos relative to position indicated by whence: + 0 Start of stream (the default). pos should be >= 0; + 1 Current position - pos must be 0; + 2 End of stream - pos must be 0. + Returns the new absolute position. + """ + if self.closed: + raise ValueError("seek from closed file") + self._pending = "" + return self._seek(pos, whence) + + def truncate(self, pos: int = None) -> int: + """Truncate size to pos. + + The pos argument defaults to the current file position, as + returned by tell(). Imply an absolute seek to pos. + Returns the new absolute position. + """ + if self.closed: + raise ValueError("truncate from closed file") + self._pending = "" + return self._truncate(pos) + + def readline(self, limit: int = None) -> str: + if self.closed: + raise ValueError("read from closed file") + if limit is None: + limit = -1 + if limit >= 0: + # XXX: Hack to support limit argument, for backwards + # XXX compatibility + line = self.readline() + if len(line) <= limit: + return line + line, self._pending = line[:limit], line[limit:] + self._pending + return line + + line = self._pending + self._pending = "" + + start = 0 + pos = endpos = None + while True: + if self._readtranslate: + # Newlines are already translated, only search for \n + pos = line.find('\n', start) + if pos >= 0: + endpos = pos + 1 + break + else: + start = len(line) + + elif self._readuniversal: + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + + # In C we'd look for these in parallel of course. + nlpos = line.find("\n", start) + crpos = line.find("\r", start) + if crpos == -1: + if nlpos == -1: + # Nothing found + start = len(line) + else: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == -1: + # Found lone \r + endpos = crpos + 1 + break + elif nlpos < crpos: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == crpos + 1: + # Found \r\n + endpos = crpos + 2 + break + else: + # Found \r + endpos = crpos + 1 + break + else: + # non-universal + pos = line.find(self._readnl) + if pos >= 0: + endpos = pos + len(self._readnl) + break + + # No line ending seen yet - get more data + more_line = self.read(self._CHUNK_SIZE) + if more_line: + line += more_line + else: + # end of file + return line + + self._pending = line[endpos:] + return line[:endpos] + + _LF = 1 + _CR = 2 + _CRLF = 4 + + @property + def newlines(self): + return (None, + "\n", + "\r", + ("\r", "\n"), + "\r\n", + ("\n", "\r\n"), + ("\r", "\r\n"), + ("\r", "\n", "\r\n") + )[self._seennl] + + +except ImportError: + StringIO = _StringIO Index: Lib/test/test_uu.py =================================================================== --- Lib/test/test_uu.py (revision 64102) +++ Lib/test/test_uu.py (working copy) @@ -17,6 +17,32 @@ M5&AE('-M;V]T:\"US8V%L960@<'ET:&]N(&-R97!T(&]V97(@=&AE('-L965P (:6YG(&1O9PH """ +# Stolen from io.py +class FakeIO(io.TextIOWrapper): + """Text I/O implementation using an in-memory buffer. + + Can be a used as a drop-in replacement for sys.stdin and sys.stdout. + """ + + # XXX This is really slow, but fully functional + + def __init__(self, initial_value="", encoding="utf-8", + errors="strict", newline="\n"): + super(FakeIO, self).__init__(io.BytesIO(), + encoding=encoding, + errors=errors, + newline=newline) + if initial_value: + if not isinstance(initial_value, str): + initial_value = str(initial_value) + self.write(initial_value) + self.seek(0) + + def getvalue(self): + self.flush() + return self.buffer.getvalue().decode(self._encoding, self._errors) + + def encodedtextwrapped(mode, filename): return (bytes("begin %03o %s\n" % (mode, filename), "ascii") + encodedtext + b"\n \nend\n") @@ -76,15 +102,15 @@ sys.stdout = self.stdout def test_encode(self): - sys.stdin = io.StringIO(plaintext.decode("ascii")) - sys.stdout = io.StringIO() + sys.stdin = FakeIO(plaintext.decode("ascii")) + sys.stdout = FakeIO() uu.encode("-", "-", "t1", 0o666) self.assertEqual(sys.stdout.getvalue(), encodedtextwrapped(0o666, "t1").decode("ascii")) def test_decode(self): - sys.stdin = io.StringIO(encodedtextwrapped(0o666, "t1").decode("ascii")) - sys.stdout = io.StringIO() + sys.stdin = FakeIO(encodedtextwrapped(0o666, "t1").decode("ascii")) + sys.stdout = FakeIO() uu.decode("-", "-") stdout = sys.stdout sys.stdout = self.stdout Index: Lib/test/test_minidom.py =================================================================== --- Lib/test/test_minidom.py (revision 64102) +++ Lib/test/test_minidom.py (working copy) @@ -3,7 +3,6 @@ import os import sys import pickle -from io import StringIO from test.support import verbose, run_unittest, TestSkipped import unittest @@ -80,7 +79,7 @@ self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t))) def testParseFromFile(self): - dom = parse(StringIO(open(tstfile).read())) + dom = parse(open(tstfile)) dom.unlink() self.confirm(isinstance(dom, Document)) Index: Lib/test/test_memoryio.py =================================================================== --- Lib/test/test_memoryio.py (revision 64102) +++ Lib/test/test_memoryio.py (working copy) @@ -373,7 +373,7 @@ class PyStringIOTest(MemoryTestMixin, unittest.TestCase): buftype = str - ioclass = io.StringIO + ioclass = io._StringIO EOF = "" def test_relative_seek(self): @@ -404,10 +404,14 @@ class CBytesIOTest(PyBytesIOTest): ioclass = io.BytesIO + class CStringIOTest(PyStringIOTest): + ioclass = io.StringIO + + def test_main(): tests = [PyBytesIOTest, PyStringIOTest] if has_c_implementation: - tests.extend([CBytesIOTest]) + tests.extend([CBytesIOTest, CStringIOTest]) support.run_unittest(*tests) if __name__ == '__main__': Index: Lib/xml/dom/minidom.py =================================================================== --- Lib/xml/dom/minidom.py (revision 64102) +++ Lib/xml/dom/minidom.py (working copy) @@ -14,6 +14,7 @@ * SAX 2 namespaces """ +import codecs import io import xml.dom @@ -49,16 +50,16 @@ # indent = the indentation string to prepend, per level # newl = the newline string to append use_encoding = "utf-8" if encoding is None else encoding - writer = io.StringIO(encoding=use_encoding) + writer = codecs.getwriter(use_encoding)(io.BytesIO()) if self.nodeType == Node.DOCUMENT_NODE: # Can pass encoding only to document, to put it into XML header self.writexml(writer, "", indent, newl, encoding) else: self.writexml(writer, "", indent, newl) if encoding is None: - return writer.getvalue() + return writer.stream.getvalue().decode(use_encoding) else: - return writer.buffer.getvalue() + return writer.stream.getvalue() def hasChildNodes(self): if self.childNodes: