diff -r 6b747ad4a99a Lib/_io2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/_io2.py Thu Sep 19 17:17:33 2013 +0300 @@ -0,0 +1,162 @@ +import io + +class _BufferedReaderMixin: + buffer_size = io.DEFAULT_BUFFER_SIZE + + def __init__(self): + self.__buffer = b'' + self.__buffer_offset = 0 + self.__buffer_start = 0 + + def readable(self): + return True + + def peek(self, size=0): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + size = min(size, self.buffer_size) + try: + data = self.__buffer[self.__buffer_offset:] + except TypeError: + raise ValueError("I/O operation on closed file") + except AttributeError: + raise io.UnsupportedOperation("File not open for reading") + if len(data) < size or not data: + data += self._raw_read(self.buffer_size - len(data)) + self.__buffer_start += self.__buffer_offset + self.__buffer = data + self.__buffer_offset = 0 + return data + + def read(self, size=-1): + """Read up to size bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + try: + bufsize = len(self.__buffer) + except TypeError: + raise ValueError("I/O operation on closed file") + except AttributeError: + raise io.UnsupportedOperation("File not open for reading") + if size == 0: + return b'' + + if size < 0: + # The loop assumes that _buffer_offset is 0. Ensure that this is true. + data = self.__buffer[self.__buffer_offset:] + blocks = [] + if data: + blocks.append(data) + self.__buffer_start += bufsize + self.__buffer = b'' + self.__buffer_offset = 0 + while True: + data = self._raw_read() + if not data: + break + self.__buffer_start += len(data) + blocks.append(data) + return b''.join(blocks) + + # If we have enough data buffered, return immediately. + end = self.__buffer_offset + size + if end <= bufsize: + data = self.__buffer[self.__buffer_offset : end] + self.__buffer_offset = end + return data + + # The loop assumes that _buffer_offset is 0. Ensure that this is true. + data = self.__buffer[self.__buffer_offset:] + blocks = [data] + self.__buffer_start += bufsize + self.__buffer = b'' + self.__buffer_offset = 0 + size -= len(data) + while size > 0: + data = self._raw_read(size) + if not data: + break + if size < len(data): + self.__buffer = data + self.__buffer_offset = size + data = data[:size] + else: + self.__buffer_start += len(data) + blocks.append(data) + size -= len(data) + return b''.join(blocks) + + def read1(self, size=-1): + """Read up to size bytes, while trying to avoid + making multiple reads from the underlying stream. + + Returns b'' if the file is at EOF. + """ + try: + if size == 0: + return self.__buffer[:0] + if size < 0: + data = self.__buffer[self.__buffer_offset:] + else: + data = self.__buffer[self.__buffer_offset : + self.__buffer_offset + size] + if data: + self.__buffer_offset += len(data) + return data + except TypeError: + raise ValueError("I/O operation on closed file") + except AttributeError: + raise io.UnsupportedOperation("File not open for reading") + self.__buffer_start += len(self.__buffer) + self.__buffer = b'' + self.__buffer_offset = 0 + if data: + return data + if not data: + if size > 0: + data = self._raw_read(size) + self.__buffer = data + data = data[:size] + self.__buffer_offset = len(data) + else: + data = self._raw_read() + self.__buffer_start += len(data) + return data + + def readline(self, size=-1): + """Read a line of bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + # Shortcut for the common case - the whole line is in the buffer. + if size < 0: + try: + if self.__buffer is None: + raise ValueError("I/O operation on closed file") + except AttributeError: + raise io.UnsupportedOperation("File not open for reading") + end = self.__buffer.find(b"\n", self.__buffer_offset) + 1 + if end > 0: + line = self.__buffer[self.__buffer_offset : end] + self.__buffer_offset = end + return line + return io.BufferedIOBase.readline(self, size) + + def tell(self): + """Return the current file position.""" + if self.__buffer is None: + raise ValueError("I/O operation on closed file") + return self.__buffer_start + self.__buffer_offset + + def close(self): + super().close() + self.__buffer = None + self.__buffer_offset = None + self.__buffer_start = None diff -r 6b747ad4a99a Lib/bz2.py --- a/Lib/bz2.py Mon Sep 16 23:57:00 2013 +0300 +++ b/Lib/bz2.py Thu Sep 19 17:17:33 2013 +0300 @@ -10,6 +10,7 @@ __author__ = "Nadeem Vawda " import io +import _io2 import warnings try: @@ -22,7 +23,6 @@ _MODE_CLOSED = 0 _MODE_READ = 1 -_MODE_READ_EOF = 2 _MODE_WRITE = 3 _BUFFER_SIZE = 8192 @@ -30,7 +30,7 @@ _builtin_open = open -class BZ2File(io.BufferedIOBase): +class BZ2File(_io2._BufferedReaderMixin, io.BufferedIOBase): """A file object providing transparent bzip2 (de)compression. @@ -81,8 +81,7 @@ mode = "rb" mode_code = _MODE_READ self._decompressor = BZ2Decompressor() - self._buffer = b"" - self._buffer_offset = 0 + _io2._BufferedReaderMixin.__init__(self) elif mode in ("w", "wb"): mode = "wb" mode_code = _MODE_WRITE @@ -111,12 +110,12 @@ closed, any other operation on it will raise a ValueError. """ with self._lock: - if self._mode == _MODE_CLOSED: + if self.closed: return try: - if self._mode in (_MODE_READ, _MODE_READ_EOF): + if self.readable(): self._decompressor = None - elif self._mode == _MODE_WRITE: + elif self.writable(): self._fp.write(self._compressor.flush()) self._compressor = None finally: @@ -124,16 +123,10 @@ if self._closefp: self._fp.close() finally: + super().close() + self._mode = _MODE_CLOSED self._fp = None self._closefp = False - self._mode = _MODE_CLOSED - self._buffer = b"" - self._buffer_offset = 0 - - @property - def closed(self): - """True if this file is closed.""" - return self._mode == _MODE_CLOSED def fileno(self): """Return the file descriptor for the underlying file.""" @@ -146,13 +139,17 @@ def readable(self): """Return whether the file was opened for reading.""" + if self._mode == _MODE_READ: + return True self._check_not_closed() - return self._mode in (_MODE_READ, _MODE_READ_EOF) + return False def writable(self): """Return whether the file was opened for writing.""" + if self._mode == _MODE_WRITE: + return True self._check_not_closed() - return self._mode == _MODE_WRITE + return False # Mode-checking helper functions. @@ -160,99 +157,42 @@ if self.closed: raise ValueError("I/O operation on closed file") - def _check_can_read(self): - if self._mode not in (_MODE_READ, _MODE_READ_EOF): - self._check_not_closed() - raise io.UnsupportedOperation("File not open for reading") - def _check_can_write(self): if self._mode != _MODE_WRITE: self._check_not_closed() raise io.UnsupportedOperation("File not open for writing") def _check_can_seek(self): - if self._mode not in (_MODE_READ, _MODE_READ_EOF): - self._check_not_closed() + if not self.readable(): raise io.UnsupportedOperation("Seeking is only supported " "on files open for reading") if not self._fp.seekable(): raise io.UnsupportedOperation("The underlying file object " "does not support seeking") - # Fill the readahead buffer if it is empty. Returns False on EOF. - def _fill_buffer(self): - if self._mode == _MODE_READ_EOF: - return False + def _raw_read(self, size=None): # Depending on the input data, our call to the decompressor may not # return any data. In this case, try again after reading another block. - while self._buffer_offset == len(self._buffer): - rawblock = (self._decompressor.unused_data or - self._fp.read(_BUFFER_SIZE)) + while True: + rawblock = self._decompressor.unused_data or self._fp.read(_BUFFER_SIZE) if not rawblock: if self._decompressor.eof: # End-of-stream marker and end of file. We're good. - self._mode = _MODE_READ_EOF - self._size = self._pos - return False + self._size = self.tell() + return b'' else: # Problem - we were expecting more compressed data. raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") + # Continue to next stream. if self._decompressor.eof: - # Continue to next stream. self._decompressor = BZ2Decompressor() - self._buffer = self._decompressor.decompress(rawblock) - self._buffer_offset = 0 - return True - - # Read data until EOF. - # If return_data is false, consume the data without returning it. - def _read_all(self, return_data=True): - # The loop assumes that _buffer_offset is 0. Ensure that this is true. - self._buffer = self._buffer[self._buffer_offset:] - self._buffer_offset = 0 - - blocks = [] - while self._fill_buffer(): - if return_data: - blocks.append(self._buffer) - self._pos += len(self._buffer) - self._buffer = b"" - if return_data: - return b"".join(blocks) - - # Read a block of up to n bytes. - # If return_data is false, consume the data without returning it. - def _read_block(self, n, return_data=True): - # If we have enough data buffered, return immediately. - end = self._buffer_offset + n - if end <= len(self._buffer): - data = self._buffer[self._buffer_offset : end] - self._buffer_offset = end - self._pos += len(data) - return data if return_data else None - - # The loop assumes that _buffer_offset is 0. Ensure that this is true. - self._buffer = self._buffer[self._buffer_offset:] - self._buffer_offset = 0 - - blocks = [] - while n > 0 and self._fill_buffer(): - if n < len(self._buffer): - data = self._buffer[:n] - self._buffer_offset = n - else: - data = self._buffer - self._buffer = b"" - if return_data: - blocks.append(data) - self._pos += len(data) - n -= len(data) - if return_data: - return b"".join(blocks) + data = self._decompressor.decompress(rawblock) + if data: + return data def peek(self, n=0): """Return buffered data without advancing the file position. @@ -261,10 +201,7 @@ The exact number of bytes returned is unspecified. """ with self._lock: - self._check_can_read() - if not self._fill_buffer(): - return b"" - return self._buffer[self._buffer_offset:] + return super().peek(n) def read(self, size=-1): """Read up to size uncompressed bytes from the file. @@ -273,13 +210,7 @@ Returns b'' if the file is already at EOF. """ with self._lock: - self._check_can_read() - if size == 0: - return b"" - elif size < 0: - return self._read_all() - else: - return self._read_block(size) + return super().read(size) def read1(self, size=-1): """Read up to size uncompressed bytes, while trying to avoid @@ -291,22 +222,7 @@ # this does not give enough data for the decompressor to make progress. # In this case we make multiple reads, to avoid returning b"". with self._lock: - self._check_can_read() - if (size == 0 or - # Only call _fill_buffer() if the buffer is actually empty. - # This gives a significant speedup if *size* is small. - (self._buffer_offset == len(self._buffer) and not self._fill_buffer())): - return b"" - if size > 0: - data = self._buffer[self._buffer_offset : - self._buffer_offset + size] - self._buffer_offset += len(data) - else: - data = self._buffer[self._buffer_offset:] - self._buffer = b"" - self._buffer_offset = 0 - self._pos += len(data) - return data + return super().read1(size) def readinto(self, b): """Read up to len(b) bytes into b. @@ -314,7 +230,7 @@ Returns the number of bytes read (0 for EOF). """ with self._lock: - return io.BufferedIOBase.readinto(self, b) + return super().readinto(b) def readline(self, size=-1): """Read a line of uncompressed bytes from the file. @@ -328,16 +244,7 @@ raise TypeError("Integer argument expected") size = size.__index__() with self._lock: - self._check_can_read() - # Shortcut for the common case - the whole line is in the buffer. - if size < 0: - end = self._buffer.find(b"\n", self._buffer_offset) + 1 - if end > 0: - line = self._buffer[self._buffer_offset : end] - self._buffer_offset = end - self._pos += len(line) - return line - return io.BufferedIOBase.readline(self, size) + return super().readline(size) def readlines(self, size=-1): """Read a list of lines of uncompressed bytes from the file. @@ -380,12 +287,9 @@ # Rewind the file to the beginning of the data stream. def _rewind(self): + _io2._BufferedReaderMixin.__init__(self) self._fp.seek(0, 0) - self._mode = _MODE_READ - self._pos = 0 self._decompressor = BZ2Decompressor() - self._buffer = b"" - self._buffer_offset = 0 def seek(self, offset, whence=0): """Change the file position. @@ -409,31 +313,37 @@ if whence == 0: pass elif whence == 1: - offset = self._pos + offset + offset = self.tell() + offset elif whence == 2: # Seeking relative to EOF - we need to know the file's size. if self._size < 0: - self._read_all(return_data=False) + while self.read1(_BUFFER_SIZE): + pass offset = self._size + offset else: raise ValueError("Invalid value for whence: %s" % (whence,)) # Make it so that offset is the number of bytes to skip forward. - if offset < self._pos: + if offset < self.tell(): self._rewind() else: - offset -= self._pos + offset -= self.tell() # Read and discard data until we reach the desired position. - self._read_block(offset, return_data=False) + while offset > 0: + data = self.read1(offset) + if not data: + break + offset -= len(data) - return self._pos + return self.tell() def tell(self): """Return the current file position.""" with self._lock: - self._check_not_closed() - return self._pos + if self.writable(): + return self._pos + return super().tell() def open(filename, mode="rb", compresslevel=9, diff -r 6b747ad4a99a Lib/gzip.py --- a/Lib/gzip.py Mon Sep 16 23:57:00 2013 +0300 +++ b/Lib/gzip.py Thu Sep 19 17:17:33 2013 +0300 @@ -9,6 +9,7 @@ import zlib import builtins import io +import _io2 __all__ = ["GzipFile", "open", "compress", "decompress"] @@ -121,7 +122,7 @@ return getattr(self.file, name) -class GzipFile(io.BufferedIOBase): +class GzipFile(_io2._BufferedReaderMixin, io.BufferedIOBase): """The GzipFile class simulates most of the methods of a file object with the exception of the readinto() and truncate() methods. @@ -190,16 +191,11 @@ self.mode = READ # Set flag indicating start of a new member self._new_member = True - # Buffer data read from gzip file. extrastart is offset in - # stream where buffer starts. extrasize is number of - # bytes remaining in buffer from current stream position. - self.extrabuf = b"" - self.extrasize = 0 - self.extrastart = 0 self.name = filename # Starts small, scales exponentially self.min_readsize = 100 fileobj = _PaddedFile(fileobj) + _io2._BufferedReaderMixin.__init__(self) elif mode.startswith(('w', 'a')): self.mode = WRITE @@ -213,7 +209,7 @@ raise ValueError("Invalid mode: {!r}".format(mode)) self.fileobj = fileobj - self.offset = 0 + self._offset = 0 self.mtime = mtime if self.mode == WRITE: @@ -342,100 +338,28 @@ self.size = self.size + len(data) self.crc = zlib.crc32(data, self.crc) & 0xffffffff self.fileobj.write( self.compress.compress(data) ) - self.offset += len(data) + self._offset += len(data) return len(data) - def read(self, size=-1): - self._check_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") - - if self.extrasize <= 0 and self.fileobj is None: + def _raw_read(self, size=None): + if self.fileobj is None: return b'' - readsize = 1024 - if size < 0: # get the whole thing - while self._read(readsize): - readsize = min(self.max_read_chunk, readsize * 2) - size = self.extrasize - else: # just get some more of it - while size > self.extrasize: - if not self._read(readsize): - if size > self.extrasize: - size = self.extrasize - break - readsize = min(self.max_read_chunk, readsize * 2) - - offset = self.offset - self.extrastart - chunk = self.extrabuf[offset: offset + size] - self.extrasize = self.extrasize - size - - self.offset += size - return chunk - - def read1(self, size=-1): - self._check_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") - - if self.extrasize <= 0 and self.fileobj is None: - return b'' - - # For certain input data, a single call to _read() may not return - # any data. In this case, retry until we get some data or reach EOF. - while self.extrasize <= 0 and self._read(): - pass - if size < 0 or size > self.extrasize: - size = self.extrasize - - offset = self.offset - self.extrastart - chunk = self.extrabuf[offset: offset + size] - self.extrasize -= size - self.offset += size - return chunk - - def peek(self, n): - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") - - # Do not return ridiculously small buffers, for one common idiom - # is to call peek(1) and expect more bytes in return. - if n < 100: - n = 100 - if self.extrasize == 0: - if self.fileobj is None: - return b'' - # Ensure that we don't return b"" if we haven't reached EOF. - # 1024 is the same buffering heuristic used in read() - while self.extrasize == 0 and self._read(max(n, 1024)): - pass - offset = self.offset - self.extrastart - remaining = self.extrasize - assert remaining == len(self.extrabuf) - offset - return self.extrabuf[offset:offset + n] - - def _unread(self, buf): - self.extrasize = len(buf) + self.extrasize - self.offset -= len(buf) - - def _read(self, size=1024): - if self.fileobj is None: - return False - if self._new_member: # If the _new_member flag is set, we have to # jump to the next member, if there is one. self._init_read() if not self._read_gzip_header(): - return False + return b'' self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) self._new_member = False # Read a chunk of data from the file + if size is None: + size = 4096 + size = min(size, self.max_read_chunk) + size = max(size, 1024) buf = self.fileobj.read(size) # If the EOF has been reached, flush the decompression object @@ -447,8 +371,8 @@ # seen by _read_eof() self.fileobj.prepend(self.decompress.unused_data, True) self._read_eof() - self._add_read_data( uncompress ) - return False + self._add_read_data2( uncompress ) + return uncompress uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -464,14 +388,10 @@ # a new member on the next call self._read_eof() self._new_member = True - return True + return uncompress def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff - offset = self.offset - self.extrastart - self.extrabuf = self.extrabuf[offset:] + data - self.extrasize = self.extrasize + len(data) - self.extrastart = self.offset self.size = self.size + len(data) def _read_eof(self): @@ -495,21 +415,16 @@ if c: self.fileobj.prepend(c, True) - @property - def closed(self): - return self.fileobj is None - def close(self): if self.fileobj is None: return + super().close() if self.mode == WRITE: self.fileobj.write(self.compress.flush()) write32u(self.fileobj, self.crc) # self.size may exceed 2GB, or even 4GB write32u(self.fileobj, self.size & 0xffffffff) - self.fileobj = None - elif self.mode == READ: - self.fileobj = None + self.fileobj = None if self.myfileobj: self.myfileobj.close() self.myfileobj = None @@ -536,10 +451,7 @@ raise OSError("Can't rewind in write mode") self.fileobj.seek(0) self._new_member = True - self.extrabuf = b"" - self.extrasize = 0 - self.extrastart = 0 - self.offset = 0 + _io2._BufferedReaderMixin.__init__(self) def readable(self): return self.mode == READ @@ -550,70 +462,44 @@ def seekable(self): return True + def tell(self): + if self.mode == WRITE: + return self._offset + return super().tell() + def seek(self, offset, whence=0): if whence: if whence == 1: - offset = self.offset + offset + if self.mode == WRITE: + offset = self._offset + offset + else: + offset = self.tell() + offset else: raise ValueError('Seek from end not supported') if self.mode == WRITE: - if offset < self.offset: + if offset < self._offset: raise OSError('Negative seek in write mode') - count = offset - self.offset + count = offset - self._offset chunk = bytes(1024) for i in range(count // 1024): self.write(chunk) self.write(bytes(count % 1024)) + return self._offset elif self.mode == READ: - if offset < self.offset: + if offset < self.tell(): # for negative seek, rewind and do positive seek self.rewind() - count = offset - self.offset + count = offset - self.tell() for i in range(count // 1024): self.read(1024) self.read(count % 1024) + return self.tell() - return self.offset - - def readline(self, size=-1): - if size < 0: - # Shortcut common case - newline found in buffer. - offset = self.offset - self.extrastart - i = self.extrabuf.find(b'\n', offset) + 1 - if i > 0: - self.extrasize -= i - offset - self.offset += i - offset - return self.extrabuf[offset: i] - - size = sys.maxsize - readsize = self.min_readsize - else: - readsize = size - bufs = [] - while size != 0: - c = self.read(readsize) - i = c.find(b'\n') - - # We set i=size to break out of the loop under two - # conditions: 1) there's no newline, and the chunk is - # larger than size, or 2) there is a newline, but the - # resulting line would be longer than 'size'. - if (size <= i) or (i == -1 and len(c) > size): - i = size - 1 - - if i >= 0 or c == b'': - bufs.append(c[:i + 1]) # Add portion of last chunk - self._unread(c[i + 1:]) # Push back rest of chunk - break - - # Append chunk to list, decrease 'size', - bufs.append(c) - size = size - len(c) - readsize = min(size, readsize * 2) - if readsize > self.min_readsize: - self.min_readsize = min(readsize, self.min_readsize * 2, 512) - return b''.join(bufs) # Return resulting line - + @property + def offset(self): + warnings.warn("This property is deprecated, use the tell() method instead", + DeprecationWarning, 2) + return self.tell() def compress(data, compresslevel=9): """Compress data in one shot and return the compressed string. diff -r 6b747ad4a99a Lib/lzma.py --- a/Lib/lzma.py Mon Sep 16 23:57:00 2013 +0300 +++ b/Lib/lzma.py Thu Sep 19 17:17:33 2013 +0300 @@ -23,19 +23,19 @@ import builtins import io +import _io2 from _lzma import * from _lzma import _encode_filter_properties, _decode_filter_properties _MODE_CLOSED = 0 _MODE_READ = 1 -_MODE_READ_EOF = 2 _MODE_WRITE = 3 _BUFFER_SIZE = 8192 -class LZMAFile(io.BufferedIOBase): +class LZMAFile(_io2._BufferedReaderMixin, io.BufferedIOBase): """A file object providing transparent LZMA (de)compression. @@ -92,7 +92,6 @@ self._fp = None self._closefp = False self._mode = _MODE_CLOSED - self._pos = 0 self._size = -1 if mode in ("r", "rb"): @@ -110,12 +109,12 @@ # stream will need a separate decompressor object. self._init_args = {"format":format, "filters":filters} self._decompressor = LZMADecompressor(**self._init_args) - self._buffer = b"" - self._buffer_offset = 0 + _io2._BufferedReaderMixin.__init__(self) elif mode in ("w", "wb", "a", "ab"): if format is None: format = FORMAT_XZ mode_code = _MODE_WRITE + self._pos = 0 self._compressor = LZMACompressor(format=format, check=check, preset=preset, filters=filters) else: @@ -139,28 +138,23 @@ May be called more than once without error. Once the file is closed, any other operation on it will raise a ValueError. """ - if self._mode == _MODE_CLOSED: + if self.closed: return try: - if self._mode in (_MODE_READ, _MODE_READ_EOF): + if self.readable(): self._decompressor = None - self._buffer = b"" - elif self._mode == _MODE_WRITE: + elif self.writable(): self._fp.write(self._compressor.flush()) self._compressor = None finally: + self._mode = _MODE_CLOSED try: if self._closefp: self._fp.close() finally: + super().close() self._fp = None self._closefp = False - self._mode = _MODE_CLOSED - - @property - def closed(self): - """True if this file is closed.""" - return self._mode == _MODE_CLOSED def fileno(self): """Return the file descriptor for the underlying file.""" @@ -173,13 +167,17 @@ def readable(self): """Return whether the file was opened for reading.""" + if self._mode == _MODE_READ: + return True self._check_not_closed() - return self._mode in (_MODE_READ, _MODE_READ_EOF) + return False def writable(self): """Return whether the file was opened for writing.""" + if self._mode == _MODE_WRITE: + return True self._check_not_closed() - return self._mode == _MODE_WRITE + return False # Mode-checking helper functions. @@ -187,41 +185,32 @@ if self.closed: raise ValueError("I/O operation on closed file") - def _check_can_read(self): - if self._mode not in (_MODE_READ, _MODE_READ_EOF): - self._check_not_closed() - raise io.UnsupportedOperation("File not open for reading") - def _check_can_write(self): if self._mode != _MODE_WRITE: self._check_not_closed() raise io.UnsupportedOperation("File not open for writing") def _check_can_seek(self): - if self._mode not in (_MODE_READ, _MODE_READ_EOF): - self._check_not_closed() + if not self.readable(): raise io.UnsupportedOperation("Seeking is only supported " "on files open for reading") if not self._fp.seekable(): raise io.UnsupportedOperation("The underlying file object " "does not support seeking") - # Fill the readahead buffer if it is empty. Returns False on EOF. - def _fill_buffer(self): - if self._mode == _MODE_READ_EOF: - return False + def _raw_read(self, size=None): # Depending on the input data, our call to the decompressor may not # return any data. In this case, try again after reading another block. - while self._buffer_offset == len(self._buffer): - rawblock = (self._decompressor.unused_data or - self._fp.read(_BUFFER_SIZE)) + while True: + rawblock = (self._decompressor.unused_data or self._fp.read(_BUFFER_SIZE)) if not rawblock: if self._decompressor.eof: - self._mode = _MODE_READ_EOF - self._size = self._pos - return False + # End-of-stream marker and end of file. We're good. + self._size = self.tell() + return b'' else: + # Problem - we were expecting more compressed data. raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") @@ -229,124 +218,9 @@ if self._decompressor.eof: self._decompressor = LZMADecompressor(**self._init_args) - self._buffer = self._decompressor.decompress(rawblock) - self._buffer_offset = 0 - return True - - # Read data until EOF. - # If return_data is false, consume the data without returning it. - def _read_all(self, return_data=True): - # The loop assumes that _buffer_offset is 0. Ensure that this is true. - self._buffer = self._buffer[self._buffer_offset:] - self._buffer_offset = 0 - - blocks = [] - while self._fill_buffer(): - if return_data: - blocks.append(self._buffer) - self._pos += len(self._buffer) - self._buffer = b"" - if return_data: - return b"".join(blocks) - - # Read a block of up to n bytes. - # If return_data is false, consume the data without returning it. - def _read_block(self, n, return_data=True): - # If we have enough data buffered, return immediately. - end = self._buffer_offset + n - if end <= len(self._buffer): - data = self._buffer[self._buffer_offset : end] - self._buffer_offset = end - self._pos += len(data) - return data if return_data else None - - # The loop assumes that _buffer_offset is 0. Ensure that this is true. - self._buffer = self._buffer[self._buffer_offset:] - self._buffer_offset = 0 - - blocks = [] - while n > 0 and self._fill_buffer(): - if n < len(self._buffer): - data = self._buffer[:n] - self._buffer_offset = n - else: - data = self._buffer - self._buffer = b"" - if return_data: - blocks.append(data) - self._pos += len(data) - n -= len(data) - if return_data: - return b"".join(blocks) - - def peek(self, size=-1): - """Return buffered data without advancing the file position. - - Always returns at least one byte of data, unless at EOF. - The exact number of bytes returned is unspecified. - """ - self._check_can_read() - if not self._fill_buffer(): - return b"" - return self._buffer[self._buffer_offset:] - - def read(self, size=-1): - """Read up to size uncompressed bytes from the file. - - If size is negative or omitted, read until EOF is reached. - Returns b"" if the file is already at EOF. - """ - self._check_can_read() - if size == 0: - return b"" - elif size < 0: - return self._read_all() - else: - return self._read_block(size) - - def read1(self, size=-1): - """Read up to size uncompressed bytes, while trying to avoid - making multiple reads from the underlying stream. - - Returns b"" if the file is at EOF. - """ - # Usually, read1() calls _fp.read() at most once. However, sometimes - # this does not give enough data for the decompressor to make progress. - # In this case we make multiple reads, to avoid returning b"". - self._check_can_read() - if (size == 0 or - # Only call _fill_buffer() if the buffer is actually empty. - # This gives a significant speedup if *size* is small. - (self._buffer_offset == len(self._buffer) and not self._fill_buffer())): - return b"" - if size > 0: - data = self._buffer[self._buffer_offset : - self._buffer_offset + size] - self._buffer_offset += len(data) - else: - data = self._buffer[self._buffer_offset:] - self._buffer = b"" - self._buffer_offset = 0 - self._pos += len(data) - return data - - def readline(self, size=-1): - """Read a line of uncompressed bytes from the file. - - The terminating newline (if present) is retained. If size is - non-negative, no more than size bytes will be read (in which - case the line may be incomplete). Returns b'' if already at EOF. - """ - self._check_can_read() - # Shortcut for the common case - the whole line is in the buffer. - if size < 0: - end = self._buffer.find(b"\n", self._buffer_offset) + 1 - if end > 0: - line = self._buffer[self._buffer_offset : end] - self._buffer_offset = end - self._pos += len(line) - return line - return io.BufferedIOBase.readline(self, size) + data = self._decompressor.decompress(rawblock) + if data: + return data def write(self, data): """Write a bytes object to the file. @@ -363,12 +237,9 @@ # Rewind the file to the beginning of the data stream. def _rewind(self): + _io2._BufferedReaderMixin.__init__(self) self._fp.seek(0, 0) - self._mode = _MODE_READ - self._pos = 0 self._decompressor = LZMADecompressor(**self._init_args) - self._buffer = b"" - self._buffer_offset = 0 def seek(self, offset, whence=0): """Change the file position. @@ -391,30 +262,36 @@ if whence == 0: pass elif whence == 1: - offset = self._pos + offset + offset = self.tell() + offset elif whence == 2: # Seeking relative to EOF - we need to know the file's size. if self._size < 0: - self._read_all(return_data=False) + while self.read1(_BUFFER_SIZE): + pass offset = self._size + offset else: raise ValueError("Invalid value for whence: {}".format(whence)) # Make it so that offset is the number of bytes to skip forward. - if offset < self._pos: + if offset < self.tell(): self._rewind() else: - offset -= self._pos + offset -= self.tell() # Read and discard data until we reach the desired position. - self._read_block(offset, return_data=False) + while offset > 0: + data = self.read1(offset) + if not data: + break + offset -= len(data) - return self._pos + return self.tell() def tell(self): """Return the current file position.""" - self._check_not_closed() - return self._pos + if self.writable(): + return self._pos + return super().tell() def open(filename, mode="rb", *, diff -r 6b747ad4a99a Lib/zipfile.py --- a/Lib/zipfile.py Mon Sep 16 23:57:00 2013 +0300 +++ b/Lib/zipfile.py Thu Sep 19 17:17:33 2013 +0300 @@ -4,6 +4,7 @@ XXX references to utf-8 need further investigation. """ import io +import _io2 import os import re import importlib.util @@ -622,7 +623,7 @@ raise NotImplementedError("compression type %d" % (compress_type,)) -class ZipExtFile(io.BufferedIOBase): +class ZipExtFile(_io2._BufferedReaderMixin, io.BufferedIOBase): """File-like object for reading an archive member. Is returned by ZipFile.open(). """ @@ -648,9 +649,8 @@ self._decompressor = _get_decompressor(self._compress_type) + _io2._BufferedReaderMixin.__init__(self) self._eof = False - self._readbuffer = b'' - self._offset = 0 self._universal = 'U' in mode self.newlines = None @@ -675,22 +675,18 @@ If limit is specified, at most limit bytes will be read. """ - if not self._universal and limit < 0: - # Shortcut common case - newline found in buffer. - i = self._readbuffer.find(b'\n', self._offset) + 1 - if i > 0: - line = self._readbuffer[self._offset: i] - self._offset = i - return line - if not self._universal: - return io.BufferedIOBase.readline(self, limit) + return super().readline(limit) line = b'' while limit < 0 or len(line) < limit: readahead = self.peek(2) if readahead == b'': return line + read = 0 + if readahead == b'\r': + readahead = self.read(1) + self.peek(1) + read = 1 # # Search for universal newlines or line chunks. @@ -707,67 +703,21 @@ self.newlines = [] if newline not in self.newlines: self.newlines.append(newline) - self._offset += len(newline) + self.read(len(newline) - read) return line + b'\n' chunk = match.group('chunk') if limit >= 0: chunk = chunk[: limit - len(line)] - self._offset += len(chunk) + self.read(len(chunk) - read) line += chunk return line - def peek(self, n=1): - """Returns buffered bytes without advancing the position.""" - if n > len(self._readbuffer) - self._offset: - chunk = self.read(n) - if len(chunk) > self._offset: - self._readbuffer = chunk + self._readbuffer[self._offset:] - self._offset = 0 - else: - self._offset -= len(chunk) - - # Return up to 512 bytes to reduce allocation overhead for tight loops. - return self._readbuffer[self._offset: self._offset + 512] - def readable(self): return True - def read(self, n=-1): - """Read and return up to n bytes. - If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. - """ - if n is None or n < 0: - buf = self._readbuffer[self._offset:] - self._readbuffer = b'' - self._offset = 0 - while not self._eof: - buf += self._read1(self.MAX_N) - return buf - - end = n + self._offset - if end < len(self._readbuffer): - buf = self._readbuffer[self._offset:end] - self._offset = end - return buf - - n = end - len(self._readbuffer) - buf = self._readbuffer[self._offset:] - self._readbuffer = b'' - self._offset = 0 - while n > 0 and not self._eof: - data = self._read1(n) - if n < len(data): - self._readbuffer = data - self._offset = n - buf += data[:n] - break - buf += data - n -= len(data) - return buf - def _update_crc(self, newdata): # Update the CRC using the given data. if self._expected_crc is None: @@ -778,71 +728,46 @@ if self._eof and self._running_crc != self._expected_crc: raise BadZipFile("Bad CRC-32 for file %r" % self.name) - def read1(self, n): - """Read up to n bytes with at most one read() system call.""" - - if n is None or n < 0: - buf = self._readbuffer[self._offset:] - self._readbuffer = b'' - self._offset = 0 - data = self._read1(self.MAX_N) - buf += data - return buf - - end = n + self._offset - if end < len(self._readbuffer): - buf = self._readbuffer[self._offset:end] - self._offset = end - return buf - - n = end - len(self._readbuffer) - buf = self._readbuffer[self._offset:] - self._readbuffer = b'' - self._offset = 0 - if n > 0: - data = self._read1(n) - if n < len(data): - self._readbuffer = data - self._offset = n - data = data[:n] - buf += data - return buf - - def _read1(self, n): - # Read up to n compressed bytes with at most one read() system call, - # decrypt and decompress them. - if self._eof or n <= 0: + def _raw_read(self, size=None): + # Read up to size compressed bytes with at most one read() system call, + # decrypt and decompress them + if size is None: + size = self.MAX_N + if self._eof or size <= 0: return b'' - # Read from file. - if self._compress_type == ZIP_DEFLATED: - ## Handle unconsumed data. - data = self._decompressor.unconsumed_tail - if n > len(data): - data += self._read2(n - len(data)) - else: - data = self._read2(n) + while not self._eof: + # Read from file. + if self._compress_type == ZIP_DEFLATED: + ## Handle unconsumed data. + data = self._decompressor.unconsumed_tail + if size > len(data): + data += self._read2(size - len(data)) + else: + data = self._read2(size) - if self._compress_type == ZIP_STORED: - self._eof = self._compress_left <= 0 - elif self._compress_type == ZIP_DEFLATED: - n = max(n, self.MIN_READ_SIZE) - data = self._decompressor.decompress(data, n) - self._eof = (self._decompressor.eof or - self._compress_left <= 0 and - not self._decompressor.unconsumed_tail) - if self._eof: - data += self._decompressor.flush() - else: - data = self._decompressor.decompress(data) - self._eof = self._decompressor.eof or self._compress_left <= 0 + if self._compress_type == ZIP_STORED: + self._eof = self._compress_left <= 0 + elif self._compress_type == ZIP_DEFLATED: + size = max(size, self.MIN_READ_SIZE) + data = self._decompressor.decompress(data, size) + self._eof = (self._decompressor.eof or + self._compress_left <= 0 and + not self._decompressor.unconsumed_tail) + if self._eof: + data += self._decompressor.flush() + else: + data = self._decompressor.decompress(data) + self._eof = self._decompressor.eof or self._compress_left <= 0 - data = data[:self._left] - self._left -= len(data) - if self._left <= 0: - self._eof = True - self._update_crc(data) - return data + data = data[:self._left] + self._left -= len(data) + if self._left <= 0: + self._eof = True + self._update_crc(data) + if data: + return data + return b'' def _read2(self, n): if self._compress_left <= 0: