diff -r 6f456d9add40 Lib/bz2.py --- a/Lib/bz2.py Wed Sep 26 13:11:48 2012 +0200 +++ b/Lib/bz2.py Wed Sep 26 16:49:44 2012 +0300 @@ -79,7 +79,8 @@ mode = "rb" mode_code = _MODE_READ self._decompressor = BZ2Decompressor() - self._buffer = None + self._buffer = b'' + self._offset = 0 elif mode in ("w", "wb"): mode = "wb" mode_code = _MODE_WRITE @@ -124,7 +125,8 @@ self._fp = None self._closefp = False self._mode = _MODE_CLOSED - self._buffer = None + self._buffer = b'' + self._offset = 0 @property def closed(self): @@ -172,14 +174,14 @@ raise io.UnsupportedOperation("The underlying file object " "does not support seeking") - # Fill the readahead buffer if it is empty. Returns False on EOF. - def _fill_buffer(self): + # Non-buffered read and decompress next chunk of data. + # Always returns at least one byte of data, unless at EOF. + def _read1(self): # Depending on the input data, our call to the decompressor may not # return any data. In this case, try again after reading another block. + if self._mode == _MODE_READ_EOF: + return b'' while True: - if self._buffer: - return True - if self._decompressor.unused_data: rawblock = self._decompressor.unused_data else: @@ -189,48 +191,70 @@ if self._decompressor.eof: self._mode = _MODE_READ_EOF self._size = self._pos - return False + return b'' else: raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") + "end-of-stream marker was reached") # Continue to next stream. if self._decompressor.eof: self._decompressor = BZ2Decompressor() - self._buffer = self._decompressor.decompress(rawblock) + data = self._decompressor.decompress(rawblock) + if data: + return data # Read data until EOF. # If return_data is false, consume the data without returning it. def _read_all(self, return_data=True): + data = self._buffer[self._offset:] blocks = [] - while self._fill_buffer(): + self._buffer = b'' + self._offset = 0 + while True: if return_data: - blocks.append(self._buffer) - self._pos += len(self._buffer) - self._buffer = None + blocks.append(data) + self._pos += len(data) + data = self._read1() + if not data: + break if return_data: return b"".join(blocks) # Read a block of up to n bytes. # If return_data is false, consume the data without returning it. def _read_block(self, n, return_data=True): + if n <= 0: + return b'' + end = n + self._offset + data = self._buffer[self._offset:end] + if end <= len(self._buffer): + self._offset = end + self._pos += len(data) + return data + blocks = [] - while n > 0 and self._fill_buffer(): - if n < len(self._buffer): - data = self._buffer[:n] - self._buffer = self._buffer[n:] - else: - data = self._buffer - self._buffer = None + self._buffer = b'' + self._offset = 0 + while True: if return_data: blocks.append(data) self._pos += len(data) n -= len(data) + if not n: + break + data = self._read1() + if not data: + break + if n < len(data): + self._buffer = data + self._offset = n + data = data[:n] + if return_data: return b"".join(blocks) - def peek(self, n=0): + def peek(self, n=1): """Return buffered data without advancing the file position. Always returns at least one byte of data, unless at EOF. @@ -238,9 +262,11 @@ """ with self._lock: self._check_can_read() - if self._mode == _MODE_READ_EOF or not self._fill_buffer(): - return b"" - return self._buffer + data = self._buffer[self._offset:] + if not data: + self._buffer = data = self._read1() + self._offset = 0 + return data def read(self, size=-1): """Read up to size uncompressed bytes from the file. @@ -250,9 +276,7 @@ """ with self._lock: self._check_can_read() - if self._mode == _MODE_READ_EOF or size == 0: - return b"" - elif size < 0: + if size < 0: return self._read_all() else: return self._read_block(size) @@ -268,15 +292,18 @@ # In this case we make multiple reads, to avoid returning b"". with self._lock: self._check_can_read() - if (size == 0 or self._mode == _MODE_READ_EOF or - not self._fill_buffer()): + if size == 0: return b"" - if 0 < size < len(self._buffer): - data = self._buffer[:size] - self._buffer = self._buffer[size:] + if self._offset == len(self._buffer): + self._buffer = self._read1() + self._offset = 0 + if size > 0: + data = self._buffer[self._offset:self._offset + size] + self._offset += len(data) else: - data = self._buffer - self._buffer = None + data = self._buffer[self._offset:] + self._buffer = b'' + self._offset = 0 self._pos += len(data) return data @@ -299,6 +326,14 @@ raise TypeError("Integer argument expected") size = size.__index__() with self._lock: + if size < 0: + # Shortcut common case - newline found in buffer. + i = self._buffer.find(b'\n', self._offset) + 1 + if i > 0: + line = self._buffer[self._offset: i] + self._offset = i + return line + return io.BufferedIOBase.readline(self, size) def readlines(self, size=-1): @@ -345,7 +380,8 @@ self._mode = _MODE_READ self._pos = 0 self._decompressor = BZ2Decompressor() - self._buffer = None + self._buffer = b'' + self._offset = 0 def seek(self, offset, whence=0): """Change the file position.