Index: Lib/test/test_bz2.py =================================================================== --- Lib/test/test_bz2.py (revision 75584) +++ Lib/test/test_bz2.py (working copy) @@ -1,4 +1,11 @@ #!/usr/bin/python + +""" """ + +""" +Portions Copyright 2009 VMware, Inc. +""" + from test import support from test.support import TESTFN @@ -50,13 +57,13 @@ if os.path.isfile(self.filename): os.unlink(self.filename) - def createTempFile(self, crlf=0): + def createTempFile(self, crlf=0, streams=1): f = open(self.filename, "wb") if crlf: data = self.DATA_CRLF else: data = self.DATA - f.write(data) + f.write(data*streams) f.close() def testRead(self): @@ -67,6 +74,14 @@ self.assertEqual(bz2f.read(), self.TEXT) bz2f.close() + def testReadMultiStream(self): + # "Test BZ2File.read() for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + self.assertRaises(TypeError, bz2f.read, None) + self.assertEqual(bz2f.read(), self.TEXT*5) + bz2f.close() + def testRead0(self): # Test BBZ2File.read(0)" self.createTempFile() @@ -85,9 +100,22 @@ if not str: break text += str - self.assertEqual(text, text) + self.assertEqual(text, self.TEXT) bz2f.close() + def testReadChunk10MultiStream(self): + # "Test BZ2File.read() in chunks of 10 bytes for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + text = b'' + while 1: + str = bz2f.read(10) + if not str: + break + text += str + self.assertEqual(text, self.TEXT*5) + bz2f.close() + def testRead100(self): # "Test BZ2File.read(100)" self.createTempFile() @@ -105,6 +133,16 @@ self.assertEqual(bz2f.readline(), line) bz2f.close() + def testReadLineMultiStream(self): + # "Test BZ2File.readline() for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + self.assertRaises(TypeError, bz2f.readline, None) + sio = BytesIO(self.TEXT*5) + for line in sio.readlines(): + self.assertEqual(bz2f.readline(), line) + bz2f.close() + def testReadLines(self): # "Test BZ2File.readlines()" self.createTempFile() @@ -114,6 +152,15 @@ self.assertEqual(bz2f.readlines(), sio.readlines()) bz2f.close() + def testReadLinesMultiStream(self): + # "Test BZ2File.readlines() for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + self.assertRaises(TypeError, bz2f.readlines, None) + sio = BytesIO(self.TEXT*5) + self.assertEqual(bz2f.readlines(), sio.readlines()) + bz2f.close() + def testIterator(self): # "Test iter(BZ2File)" self.createTempFile() @@ -122,6 +169,14 @@ self.assertEqual(list(iter(bz2f)), sio.readlines()) bz2f.close() + def testIteratorMultiStream(self): + # "Test iter(BZ2File) for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + sio = BytesIO(self.TEXT*5) + self.assertEqual(list(iter(bz2f)), sio.readlines()) + bz2f.close() + def testClosedIteratorDeadlock(self): # "Test that iteration on a closed bz2file releases the lock." # http://bugs.python.org/issue3309 @@ -180,6 +235,19 @@ self.assertRaises(IOError, bz2f.write, b"a") self.assertRaises(IOError, bz2f.writelines, [b"a"]) + def testAppend(self): + # "Test BZ2File.write() on BZ2File opened in append mode" + bz2f = BZ2File(self.filename, 'w') + bz2f.write(self.TEXT) + bz2f.close() + bz2f = BZ2File(self.filename, 'a') + self.assertRaises(TypeError, bz2f.write) + bz2f.write(self.TEXT) + bz2f.close() + f = open(self.filename, 'rb') + self.assertEqual(self.decompress(f.read()), self.TEXT*2) + f.close() + def testSeekForward(self): # "Test BZ2File.seek(150, 0)" self.createTempFile() @@ -189,6 +257,15 @@ self.assertEqual(bz2f.read(), self.TEXT[150:]) bz2f.close() + def testSeekForwardMultiStream(self): + # "Test BZ2File.seek(150, 0) for a multistream file" + self.createTempFile(streams=2) + bz2f = BZ2File(self.filename) + self.assertRaises(TypeError, bz2f.seek) + bz2f.seek(len(self.TEXT)+150) + self.assertEqual(bz2f.read(), self.TEXT[150:]) + bz2f.close() + def testSeekBackwards(self): # "Test BZ2File.seek(-150, 1)" self.createTempFile() @@ -198,6 +275,17 @@ self.assertEqual(bz2f.read(), self.TEXT[500-150:]) bz2f.close() + def testSeekBackwardsMultiStream(self): + # "Test BZ2File.seek(-150, 1) across stream boundaries" + self.createTempFile(streams=2) + bz2f = BZ2File(self.filename) + readto = len(self.TEXT)+100 + while readto > 0: + readto -= len(bz2f.read(readto)) + bz2f.seek(-150, 1) + self.assertEqual(bz2f.read(), self.TEXT[100-150:]+self.TEXT) + bz2f.close() + def testSeekBackwardsFromEnd(self): # "Test BZ2File.seek(-150, 2)" self.createTempFile() @@ -206,6 +294,14 @@ self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:]) bz2f.close() + def testSeekBackwardsFromEndMultiStream(self): + # "Test BZ2File.seek(-1000, 2), across stream boundaries" + self.createTempFile(streams=2) + bz2f = BZ2File(self.filename) + bz2f.seek(-1000, 2) + self.assertEqual(bz2f.read(), (self.TEXT*2)[-1000:]) + bz2f.close() + def testSeekPostEnd(self): # "Test BZ2File.seek(150000)" self.createTempFile() @@ -215,6 +311,15 @@ self.assertEqual(bz2f.read(), b"") bz2f.close() + def testSeekPostEndMultiStream(self): + # "Test BZ2File.seek(150000) for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + bz2f.seek(150000) + self.assertEqual(bz2f.tell(), len(self.TEXT)*5) + self.assertEqual(bz2f.read(), b"") + bz2f.close() + def testSeekPostEndTwice(self): # "Test BZ2File.seek(150000) twice" self.createTempFile() @@ -225,6 +330,16 @@ self.assertEqual(bz2f.read(), b"") bz2f.close() + def testSeekPostEndTwiceMultiStream(self): + # "Test BZ2File.seek(150000) twice for a multistream file" + self.createTempFile(streams=5) + bz2f = BZ2File(self.filename) + bz2f.seek(150000) + bz2f.seek(150000) + self.assertEqual(bz2f.tell(), len(self.TEXT)*5) + self.assertEqual(bz2f.read(), b"") + bz2f.close() + def testSeekPreStart(self): # "Test BZ2File.seek(-150, 0)" self.createTempFile() @@ -234,6 +349,15 @@ self.assertEqual(bz2f.read(), self.TEXT) bz2f.close() + def testSeekPreStartMultiStream(self): + # "Test BZ2File.seek(-150, 0) for a multistream file" + self.createTempFile(streams=2) + bz2f = BZ2File(self.filename) + bz2f.seek(-150) + self.assertEqual(bz2f.tell(), 0) + self.assertEqual(bz2f.read(), self.TEXT*2) + bz2f.close() + def testOpenDel(self): # "Test opening and deleting a file many times" self.createTempFile() @@ -367,6 +491,11 @@ # "Test decompress() function with incomplete data" self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10]) + def testDecompressMultiStream(self): + # "Test decompress() function for data with multiple streams" + text = bz2.decompress(self.DATA*5) + self.assertEqual(text, self.TEXT*5) + def test_main(): support.run_unittest( BZ2FileTest, Index: Modules/bz2module.c =================================================================== --- Modules/bz2module.c (revision 75584) +++ Modules/bz2module.c (working copy) @@ -4,6 +4,7 @@ Copyright (c) 2002 Gustavo Niemeyer Copyright (c) 2002 Python Software Foundation; All Rights Reserved +Portions Copyright 2009 VMware, Inc. */ @@ -191,6 +192,51 @@ return ret; } +/* Any time we hit a BZ_STREAM_END there are a number of bookkeeping details + * in opening the next stream in the file. Handle those here. + * + * If this is really EOF, then set f->mode to MODE_READ_EOF and f->size to + * f->pos. + * + * This function returns 0 on success, 1 if there's an error. + */ +static int +Util_HandleBZStreamEnd(BZ2FileObject *f) +{ + int bzerror = 0; + char unused[BZ_MAX_UNUSED]; + void *tmpunused = NULL; + int nunused=0; + + /* get any unused data */ + BZ2_bzReadGetUnused(&bzerror, f->fp, &tmpunused, &nunused); + if (bzerror != BZ_OK) + return Util_CatchBZ2Error(bzerror); + memcpy((void*)unused, tmpunused, nunused); + + /* close the current stream */ + BZ2_bzReadClose(&bzerror, f->fp); + f->fp = NULL; + f->mode = MODE_CLOSED; + if (bzerror != BZ_OK) + return Util_CatchBZ2Error(bzerror); + + /* open the next stream */ + f->fp = BZ2_bzReadOpen(&bzerror, f->rawfp, 0, 0, + (void*)unused, nunused); + if (bzerror != BZ_OK) + return Util_CatchBZ2Error(bzerror); + f->mode = MODE_READ; + + /* check for EOF */ + if (nunused == 0 && feof(f->rawfp)) { + f->mode = MODE_READ_EOF; + f->size = f->pos; + } + + return 0; +} + #if BUFSIZ < 8192 #define SMALLCHUNK 8192 #else @@ -250,9 +296,13 @@ } while (bzerror == BZ_OK && c != '\n' && buf != end); Py_END_ALLOW_THREADS if (bzerror == BZ_STREAM_END) { - f->size = f->pos; - f->mode = MODE_READ_EOF; - break; + if (!Util_HandleBZStreamEnd(f)) { + if (f->mode == MODE_READ_EOF) + break; + } else { + Py_DECREF(v); + return NULL; + } } else if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); Py_DECREF(v); @@ -325,8 +375,10 @@ Py_END_ALLOW_THREADS f->pos += chunksize; if (bzerror == BZ_STREAM_END) { - f->size = f->pos; - f->mode = MODE_READ_EOF; + if (Util_HandleBZStreamEnd(f)) { + Util_DropReadAhead(f); + return -1; + } } else if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); Util_DropReadAhead(f); @@ -446,9 +498,14 @@ Py_END_ALLOW_THREADS bytesread += chunksize; if (bzerror == BZ_STREAM_END) { - self->size = self->pos; - self->mode = MODE_READ_EOF; - break; + if (!Util_HandleBZStreamEnd(self)) { + if (self->mode == MODE_READ_EOF) + break; + } else { + Py_DECREF(ret); + ret = NULL; + goto cleanup; + } } else if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); Py_DECREF(ret); @@ -578,13 +635,16 @@ self->pos += nread; Py_END_ALLOW_THREADS if (bzerror == BZ_STREAM_END) { - self->size = self->pos; - self->mode = MODE_READ_EOF; - if (nread == 0) { - sizehint = 0; - break; + if (!Util_HandleBZStreamEnd(self)) { + if (nread == 0) { + sizehint = 0; + break; + } + if (self->mode == MODE_READ_EOF) + shortread = 1; + } else { + goto error; } - shortread = 1; } else if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); error: @@ -600,7 +660,8 @@ buffersize *= 2; if (buffersize > INT_MAX) { PyErr_SetString(PyExc_OverflowError, - "line is longer than a Python string can hold"); + "line is longer than a Python string can " + "hold"); goto error; } if (big_buffer == NULL) { @@ -614,7 +675,8 @@ } else { /* Grow the big buffer */ - if (_PyBytes_Resize(&big_buffer, buffersize) < 0){ + if (_PyBytes_Resize(&big_buffer, buffersize) + < 0){ big_buffer = NULL; goto error; } @@ -937,15 +999,20 @@ bytesread += chunksize; if (bzerror == BZ_STREAM_END) { - break; + if (!Util_HandleBZStreamEnd(self)) { + if (self->mode == + MODE_READ_EOF) + break; + } else { + goto cleanup; + } } else if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); goto cleanup; } } - self->mode = MODE_READ_EOF; + bytesread = 0; self->size = self->pos; - bytesread = 0; } offset = self->size + offset; } else if (where == 1) { @@ -954,7 +1021,6 @@ /* Before getting here, offset must be the absolute position the file * pointer should be set to. */ - if (offset >= self->pos) { /* we can move forward */ offset -= self->pos; @@ -995,9 +1061,12 @@ Py_END_ALLOW_THREADS bytesread += chunksize; if (bzerror == BZ_STREAM_END) { - self->size = self->pos; - self->mode = MODE_READ_EOF; - break; + if (!Util_HandleBZStreamEnd(self)) { + if (self->mode == MODE_READ_EOF) + break; + } else { + goto cleanup; + } } else if (bzerror != BZ_OK) { Util_CatchBZ2Error(bzerror); goto cleanup; @@ -1064,11 +1133,13 @@ switch (self->mode) { case MODE_READ: case MODE_READ_EOF: - BZ2_bzReadClose(&bzerror, self->fp); + if (self->fp) + BZ2_bzReadClose(&bzerror, self->fp); break; case MODE_WRITE: - BZ2_bzWriteClose(&bzerror, self->fp, - 0, NULL, NULL); + if (self->fp) + BZ2_bzWriteClose(&bzerror, self->fp, + 0, NULL, NULL); break; } self->mode = MODE_CLOSED; @@ -1182,6 +1253,7 @@ switch (*mode) { case 'r': case 'w': + case 'a': if (mode_char) error = 1; mode_char = *mode; @@ -1208,8 +1280,21 @@ mode_char = 'r'; } - mode = (mode_char == 'r') ? "rb" : "wb"; + switch (mode_char) { + case 'w': + mode = "wb"; + break; + case 'a': + mode = "ab"; + break; + + case 'r': + default: + mode = "rb"; + break; + } + self->rawfp = fopen(name, mode); if (self->rawfp == NULL) { PyErr_SetFromErrno(PyExc_IOError); @@ -1267,11 +1352,13 @@ switch (self->mode) { case MODE_READ: case MODE_READ_EOF: - BZ2_bzReadClose(&bzerror, self->fp); + if (self->fp) + BZ2_bzReadClose(&bzerror, self->fp); break; case MODE_WRITE: - BZ2_bzWriteClose(&bzerror, self->fp, - 0, NULL, NULL); + if (self->fp) + BZ2_bzWriteClose(&bzerror, self->fp, + 0, NULL, NULL); break; } Util_DropReadAhead(self); @@ -2004,6 +2091,7 @@ bz_stream _bzs; bz_stream *bzs = &_bzs; int bzerror; + Py_ssize_t total_out = 0; if (!PyArg_ParseTuple(args, "y*:decompress", &pdata)) return NULL; @@ -2041,7 +2129,19 @@ bzerror = BZ2_bzDecompress(bzs); Py_END_ALLOW_THREADS if (bzerror == BZ_STREAM_END) { - break; + if (bzs->avail_in > 0) { + total_out += (Py_ssize_t) BZS_TOTAL_OUT(bzs); + BZ2_bzDecompressEnd(bzs); + bzerror = BZ2_bzDecompressInit(bzs, 0, 0); + if (bzerror != BZ_OK) { + Util_CatchBZ2Error(bzerror); + Py_DECREF(ret); + PyBuffer_Release(&pdata); + return NULL; + } + } else { + break; + } } else if (bzerror != BZ_OK) { BZ2_bzDecompressEnd(bzs); Util_CatchBZ2Error(bzerror); @@ -2064,13 +2164,15 @@ PyBuffer_Release(&pdata); return NULL; } - bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs); + bzs->next_out = BUF(ret) + total_out + + BZS_TOTAL_OUT(bzs); bzs->avail_out = bufsize - (bzs->next_out - BUF(ret)); } } if (bzs->avail_out != 0) { - if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) { + total_out += BZS_TOTAL_OUT(bzs); + if (_PyBytes_Resize(&ret, total_out) < 0) { ret = NULL; } }