diff -r 6354b4ceba1d Lib/bz2.py --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -76,6 +76,10 @@ mode = "wb" mode_code = _MODE_WRITE self._compressor = BZ2Compressor() + elif mode in ("a", "ab"): + mode = "ab" + mode_code = _MODE_WRITE + self._compressor = BZ2Compressor() else: raise ValueError("Invalid mode: {!r}".format(mode)) @@ -162,13 +166,19 @@ if self._buffer: return True if self._decompressor.eof: - self._mode = _MODE_READ_EOF - self._size = self._pos - return False - rawblock = self._fp.read(_BUFFER_SIZE) - if not rawblock: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") + # Continue to next stream if unused data remains. + if self._decompressor.unused_data: + rawblock = self._decompressor.unused_data + self._decompressor = BZ2Decompressor() + else: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + rawblock = self._fp.read(_BUFFER_SIZE) + if not rawblock: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") self._buffer = self._decompressor.decompress(rawblock) return True @@ -384,9 +394,16 @@ """ if len(data) == 0: return b"" - decomp = BZ2Decompressor() - result = decomp.decompress(data) - if not decomp.eof: - raise ValueError("Compressed data ended before the " - "end-of-stream marker was reached") - return result + + result = b'' + while True: + decomp = BZ2Decompressor() + result += decomp.decompress(data) + if not decomp.eof: + raise ValueError("Compressed data ended before the " + "end-of-stream marker was reached") + # Proceed to next stream if unused data remains. + if not decomp.unused_data: + return result + + data = decomp.unused_data diff -r 6354b4ceba1d Lib/test/test_bz2.py --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -84,12 +84,19 @@ else: return self.DATA - def createTempFile(self, crlf=False): + def createTempFile(self, crlf=False, streams=1): with open(self.filename, "wb") as f: - f.write(self.getData(crlf)) + f.write(self.getData(crlf) * streams) def testRead(self): # "Test BZ2File.read()" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + self.assertRaises(TypeError, bz2f.read, None) + self.assertEqual(bz2f.read(), self.TEXT * 5) + + def testReadMultiStream(self): + # "Test BZ2File.read()" self.createTempFile() with BZ2File(self.filename) as bz2f: self.assertRaises(TypeError, bz2f.read, None) @@ -114,6 +121,18 @@ text += str self.assertEqual(text, self.TEXT) + def testReadChunk10MultiStream(self): + # "Test BZ2File.read() in chunks of 10 bytes" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + text = b'' + while 1: + str = bz2f.read(10) + if not str: + break + text += str + self.assertEqual(text, self.TEXT * 5) + def testRead100(self): # "Test BZ2File.read(100)" self.createTempFile() @@ -151,6 +170,15 @@ for line in sio.readlines(): self.assertEqual(bz2f.readline(), line) + def testReadLineMultiStream(self): + # "Test BZ2File.readline()" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + self.assertRaises(TypeError, bz2f.readline, None) + sio = BytesIO(self.TEXT * 5) + for line in sio.readlines(): + self.assertEqual(bz2f.readline(), line) + def testReadLines(self): # "Test BZ2File.readlines()" self.createTempFile() @@ -159,6 +187,14 @@ sio = BytesIO(self.TEXT) self.assertEqual(bz2f.readlines(), sio.readlines()) + def testReadLinesMultiStream(self): + # "Test BZ2File.readlines()" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + self.assertRaises(TypeError, bz2f.readlines, None) + sio = BytesIO(self.TEXT * 5) + self.assertEqual(bz2f.readlines(), sio.readlines()) + def testIterator(self): # "Test iter(BZ2File)" self.createTempFile() @@ -166,6 +202,13 @@ sio = BytesIO(self.TEXT) self.assertEqual(list(iter(bz2f)), sio.readlines()) + def testIteratorMultiStream(self): + # "Test iter(BZ2File)" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + sio = BytesIO(self.TEXT * 5) + self.assertEqual(list(iter(bz2f)), sio.readlines()) + def testClosedIteratorDeadlock(self): # "Test that iteration on a closed bz2file releases the lock." # http://bugs.python.org/issue3309 @@ -217,6 +260,17 @@ self.assertRaises(IOError, bz2f.write, b"a") self.assertRaises(IOError, bz2f.writelines, [b"a"]) + def testAppend(self): + # "Test BZ2File.write()" + with BZ2File(self.filename, "w") as bz2f: + self.assertRaises(TypeError, bz2f.write) + bz2f.write(self.TEXT) + with BZ2File(self.filename, "a") as bz2f: + self.assertRaises(TypeError, bz2f.write) + bz2f.write(self.TEXT) + with open(self.filename, 'rb') as f: + self.assertEqual(self.decompress(f.read()), self.TEXT * 2) + def testSeekForward(self): # "Test BZ2File.seek(150, 0)" self.createTempFile() @@ -225,6 +279,14 @@ bz2f.seek(150) self.assertEqual(bz2f.read(), self.TEXT[150:]) + def testSeekForwardMultiStream(self): + # "Test BZ2File.seek(150, 0)" + self.createTempFile(streams=2) + with BZ2File(self.filename) as bz2f: + self.assertRaises(TypeError, bz2f.seek) + bz2f.seek(len(self.TEXT) + 150) + self.assertEqual(bz2f.read(), self.TEXT[150:]) + def testSeekBackwards(self): # "Test BZ2File.seek(-150, 1)" self.createTempFile() @@ -233,6 +295,16 @@ bz2f.seek(-150, 1) self.assertEqual(bz2f.read(), self.TEXT[500-150:]) + def testSeekBackwardsMultiStream(self): + # "Test BZ2File.seek(-150, 1) across stream boundaries" + self.createTempFile(streams=2) + with BZ2File(self.filename) as bz2f: + readto = len(self.TEXT) + 100 + while readto > 0: + readto -= len(bz2f.read(readto)) + bz2f.seek(-150, 1) + self.assertEqual(bz2f.read(), self.TEXT[100-150:] + self.TEXT) + def testSeekBackwardsFromEnd(self): # "Test BZ2File.seek(-150, 2)" self.createTempFile() @@ -240,6 +312,13 @@ bz2f.seek(-150, 2) self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:]) + def testSeekBackwardsFromEndMultiStream(self): + # "Test BZ2File.seek(-1000, 2), across stream boundaries" + self.createTempFile(streams=2) + with BZ2File(self.filename) as bz2f: + bz2f.seek(-1000, 2) + self.assertEqual(bz2f.read(), (self.TEXT * 2)[-1000:]) + def testSeekPostEnd(self): # "Test BZ2File.seek(150000)" self.createTempFile() @@ -248,6 +327,14 @@ self.assertEqual(bz2f.tell(), len(self.TEXT)) self.assertEqual(bz2f.read(), b"") + def testSeekPostEndMultiStream(self): + # "Test BZ2File.seek(150000)" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + bz2f.seek(150000) + self.assertEqual(bz2f.tell(), len(self.TEXT) * 5) + self.assertEqual(bz2f.read(), b"") + def testSeekPostEndTwice(self): # "Test BZ2File.seek(150000) twice" self.createTempFile() @@ -257,6 +344,15 @@ self.assertEqual(bz2f.tell(), len(self.TEXT)) self.assertEqual(bz2f.read(), b"") + def testSeekPostEndTwiceMultiStream(self): + # "Test BZ2File.seek(150000) twice" + self.createTempFile(streams=5) + with BZ2File(self.filename) as bz2f: + bz2f.seek(150000) + bz2f.seek(150000) + self.assertEqual(bz2f.tell(), len(self.TEXT) * 5) + self.assertEqual(bz2f.read(), b"") + def testSeekPreStart(self): # "Test BZ2File.seek(-150, 0)" self.createTempFile() @@ -265,6 +361,14 @@ self.assertEqual(bz2f.tell(), 0) self.assertEqual(bz2f.read(), self.TEXT) + def testSeekPreStartMultiStream(self): + # "Test BZ2File.seek(-150, 0)" + self.createTempFile(streams=2) + with BZ2File(self.filename) as bz2f: + bz2f.seek(-150) + self.assertEqual(bz2f.tell(), 0) + self.assertEqual(bz2f.read(), self.TEXT * 2) + def testFileno(self): # "Test BZ2File.fileno()" self.createTempFile() @@ -510,6 +614,11 @@ # "Test decompress() function with incomplete data" self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10]) + def testDecompressMultiStream(self): + # "Test decompress() function for data with multiple streams" + text = bz2.decompress(self.DATA * 5) + self.assertEqual(text, self.TEXT * 5) + def test_main(): support.run_unittest( BZ2FileTest,