diff -r cd87afe18ff8 Lib/lzma.py --- a/Lib/lzma.py Wed Jan 23 08:38:47 2013 -0500 +++ b/Lib/lzma.py Thu Jan 24 09:29:40 2013 +0200 @@ -112,6 +112,7 @@ self._decompressor = LZMADecompressor(**self._init_args) self._buffer = b"" self._buffer_offset = 0 + self._error = None elif mode in ("w", "wb", "a", "ab"): if format is None: format = FORMAT_XZ @@ -207,30 +208,40 @@ "does not support seeking") # Fill the readahead buffer if it is empty. Returns False on EOF. - def _fill_buffer(self): + def _fill_buffer(self, weak=False): if self._mode == _MODE_READ_EOF: return False - # Depending on the input data, our call to the decompressor may not - # return any data. In this case, try again after reading another block. - while self._buffer_offset == len(self._buffer): - rawblock = (self._decompressor.unused_data or - self._fp.read(_BUFFER_SIZE)) + if self._error is not None: + err = self._error + self._error = None + raise err + try: + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while self._buffer_offset == len(self._buffer): + rawblock = (self._decompressor.unused_data or + self._fp.read(_BUFFER_SIZE)) - if not rawblock: + if not rawblock: + if self._decompressor.eof: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + # Continue to next stream. if self._decompressor.eof: - self._mode = _MODE_READ_EOF - self._size = self._pos - return False - else: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") + self._decompressor = LZMADecompressor(**self._init_args) - # Continue to next stream. - if self._decompressor.eof: - self._decompressor = LZMADecompressor(**self._init_args) - - self._buffer = self._decompressor.decompress(rawblock) - self._buffer_offset = 0 + self._buffer = self._decompressor.decompress(rawblock) + self._buffer_offset = 0 + except (EOFError, LZMAError) as err: + if weak: + self._error = err + return False + raise return True # Read data until EOF. @@ -241,7 +252,7 @@ self._buffer_offset = 0 blocks = [] - while self._fill_buffer(): + while self._fill_buffer(blocks): if return_data: blocks.append(self._buffer) self._pos += len(self._buffer) @@ -265,7 +276,7 @@ self._buffer_offset = 0 blocks = [] - while n > 0 and self._fill_buffer(): + while n > 0 and self._fill_buffer(blocks): if n < len(self._buffer): data = self._buffer[:n] self._buffer_offset = n @@ -318,7 +329,7 @@ # Only call _fill_buffer() if the buffer is actually empty. # This gives a significant speedup if *size* is small. (self._buffer_offset == len(self._buffer) and not self._fill_buffer())): - return b"" + return b"" if size > 0: data = self._buffer[self._buffer_offset : self._buffer_offset + size] @@ -369,6 +380,7 @@ self._decompressor = LZMADecompressor(**self._init_args) self._buffer = b"" self._buffer_offset = 0 + self._error = None def seek(self, offset, whence=0): """Change the file position. diff -r cd87afe18ff8 Lib/test/test_lzma.py --- a/Lib/test/test_lzma.py Wed Jan 23 08:38:47 2013 -0500 +++ b/Lib/test/test_lzma.py Thu Jan 24 09:29:40 2013 +0200 @@ -667,6 +667,9 @@ def test_read_incomplete(self): with LZMAFile(BytesIO(COMPRESSED_XZ[:128])) as f: + res = f.read() + self.assertIn(res, INPUT) + self.assertTrue(INPUT.startswith(res)) self.assertRaises(EOFError, f.read) def test_read_truncated(self): @@ -674,10 +677,14 @@ # flagsĀ (2 bytes) and magic number (2 bytes). truncated = COMPRESSED_XZ[:-12] with LZMAFile(BytesIO(truncated)) as f: + self.assertEqual(f.read(), INPUT) self.assertRaises(EOFError, f.read) with LZMAFile(BytesIO(truncated)) as f: self.assertEqual(f.read(len(INPUT)), INPUT) self.assertRaises(EOFError, f.read, 1) + with LZMAFile(BytesIO(truncated)) as f: + self.assertEqual(f.read(len(INPUT) + 1), INPUT) + self.assertRaises(EOFError, f.read, 1) # Incomplete 12-byte header. for i in range(12): with LZMAFile(BytesIO(truncated[:i])) as f: