Index: Lib/io.py =================================================================== --- Lib/io.py (revision 54258) +++ Lib/io.py (working copy) @@ -14,6 +14,7 @@ "BufferedRandom", "EOF"] import os +import sys DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes @@ -376,12 +377,13 @@ Does not allow random access (seek, tell). """ - def __init__(self, raw): + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): """Create a new buffered reader using the given readable raw IO object. """ assert raw.readable() self.raw = raw self._read_buf = b"" + self.buffer_size = buffer_size if hasattr(raw, 'fileno'): self.fileno = raw.fileno @@ -395,8 +397,12 @@ """ assert n is None or n > 0 nodata_val = EOF - while (len(self._read_buf) < n) if (n is not None) else True: - current = self.raw.read(n) + while n is None or len(self._read_buf) < n: + if n is None: + to_read = self.buffer_size + else: + to_read = n - len(self._read_buf) + current = self.raw.read(to_read) if current in (EOF, None): nodata_val = current break @@ -567,3 +573,132 @@ def close(self): self.raw.close() + +def make_encoding(encoding): + return encoding if encoding is not None else sys.getfilesystemencoding() + +def make_newline(newline): + assert newline in (None, '\n', '\r\n'), "illegal newline %s" % newline + return newline if newline else os.linesep + +class TextLineReverser: + def __init__(self, raw, encoding=None, newline=None, + buffer_size=DEFAULT_BUFFER_SIZE): + # XXX: Searches backwards for encoded line terminator, which + # works for UTF-8 but not for encodings where one character encoding + # can be a substring of another longer one. + raw.seek(0, 2) + self.raw = raw + self._bufsize = buffer_size + self._encoding = make_encoding(encoding) + # XXX: bytes() call below should not be needed but in my build (r54258) + # type('foo').encode('utf-8') is str, not bytes as I would expect. + self._newline = bytes(make_newline(newline).encode(self._encoding)) + self._limpos = raw.tell() + self._bufpos = self._limpos + self._buf = b'' + + __iter__ = lambda self: self + + def _extend_buffer_backwards(self): + (bufpos, limpos, bufsize) = (self._bufpos, self._limpos, self._bufsize) + + newpos = (bufpos // bufsize) * bufsize + if newpos == bufpos: + newpos -= bufsize + assert newpos >= 0 + + nbytes = bufpos - newpos + assert nbytes != 0 + + self.raw.seek(newpos, 0) + assert self.raw.tell() == newpos, \ + 'seek() arrived at %r (expected %r)' % (seekpos, newpos) + + newbuf = self.raw.read(nbytes) + assert len(newbuf) == nbytes, 'Unexpected EOF' + + if limpos > bufpos: + newbuf += self._buf[:limpos - bufpos] + + (self._buf, self._bufpos) = (newbuf, newpos) + + def next(self): + (limpos, endline) = (self._limpos, self._newline) + + if limpos == -len(endline): + raise StopIteration + assert limpos >= 0 + + is_firstline = self._buf == b'' + + # lim points one character past the end of the line we're about to + # return - e.g the bracketed char here: "abc[\r\n]def" + # + # When we exit the loop, offset will be the index within self._buf of + # the start of the newline before the line we want to return. + # + while True: + offset = self._buf.rfind(endline, 0, limpos - self._bufpos) + if offset != -1: + break + + if self._bufpos > 0: + self._extend_buffer_backwards() + continue + + offset = -len(endline) + break + + self._limpos = self._bufpos + offset + + line_offset = offset + len(endline) + + # We treat the first returned line specially, as it may be missing + # the endline terminator. Also we avoid returning an initial empty + # line for files with a normal terminating endline. + # + if is_firstline: + res = self._buf[line_offset:] or self.next() + else: + res = self._buf[line_offset:limpos - self._bufpos] + endline + + return res.decode(self._encoding) + +class TextIOBase: + def read(n: int = 1) -> str: + raise NotImplementedError + + __iter__ = lambda self: self + + def next(self): + line = self.readline() + if line == '': + raise StopIteration + return line + +class TextIOWrapper (TextIOBase): + # XXX Incomplete - only enough to support "for line in file:" is there + def __init__(self, buffer, encoding=None, newline=None): + self.buffer = buffer + self._encoding = make_encoding(encoding) + self._newline = make_newline(newline) + + def __reversed__(self): + return TextLineReverser(self.buffer.raw, self._encoding, self._newline) + + def read(self, n: int = None): + return self.buffer.read(n).decode(self._encoding) + + def write(self, s: str): + return self.buffer.write(s.encode(self._encoding)) + + def readline(self): + # XXX: Very inefficient + line = '' + while not line.endswith(self._newline): + ch = self.read(1) + if len(ch) == 0: + break + line += ch + return line