diff -r 48228fb874c1 Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py Mon Nov 05 09:34:46 2012 +0200 +++ b/Lib/test/test_zlib.py Mon Nov 05 15:43:37 2012 +0200 @@ -490,16 +490,37 @@ def test_decompress_unused_data(self): # Repeated calls to decompress() after EOF should accumulate data in # dco.unused_data, instead of just storing the arg to the last call. - x = zlib.compress(HAMLET_SCENE) + HAMLET_SCENE - for step in 1, 2, 100: + source = b'abcdefghijklmnopqrstuvwxyz' + remainder = b'0123456789' + y = zlib.compress(source) + x = y + remainder + for step in 1, 2, len(y), len(x): dco = zlib.decompressobj() - data = b''.join(dco.decompress(x[i : i + step]) - for i in range(0, len(x), step)) + data = b'' + for i in range(0, len(x), step): + if i < len(y): + self.assertEqual(dco.unused_data, b'') + data += dco.decompress(x[i : i + step]) + self.assertEqual(dco.unconsumed_tail, b'') data += dco.flush() + self.assertTrue(dco.eof) + self.assertEqual(data, source) + self.assertEqual(dco.unconsumed_tail, b'') + self.assertEqual(dco.unused_data, remainder) + for step in 1, 2, len(y), len(x): + dco = zlib.decompressobj() + data = b'' + for i in range(0, len(x), step): + if i < len(y): + self.assertEqual(dco.unused_data, b'') + data += dco.decompress(dco.unconsumed_tail + x[i : i + step], + 1000) + data += dco.flush() self.assertTrue(dco.eof) - self.assertEqual(data, HAMLET_SCENE) - self.assertEqual(dco.unused_data, HAMLET_SCENE) + self.assertEqual(data, source) + self.assertEqual(dco.unconsumed_tail, b'') + self.assertEqual(dco.unused_data, remainder) if hasattr(zlib.compressobj(), "copy"): def test_compresscopy(self): diff -r 48228fb874c1 Modules/zlibmodule.c --- a/Modules/zlibmodule.c Mon Nov 05 09:34:46 2012 +0200 +++ b/Modules/zlibmodule.c Mon Nov 05 15:43:37 2012 +0200 @@ -561,6 +561,47 @@ return RetVal; } +static int +save_remainder(compobject *self, int err) +{ + if (err == Z_STREAM_END) { + /* The end of the compressed data has been reached, so set the + unused_data attribute to a string containing the remainder of the + data in the string. + */ + if (self->zst.avail_in) { + size_t old_size = PyBytes_GET_SIZE(self->unused_data); + PyObject *new_data; + if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) { + PyErr_NoMemory(); + return -1; + } + new_data = PyBytes_FromStringAndSize(NULL, + old_size + self->zst.avail_in); + if (new_data == NULL) + return -1; + Py_MEMCPY(PyBytes_AS_STRING(new_data), + PyBytes_AS_STRING(self->unused_data), old_size); + Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size, + self->zst.next_in, self->zst.avail_in); + Py_DECREF(self->unused_data); + self->unused_data = new_data; + } + self->zst.avail_in = 0; + } + if (self->zst.avail_in || PyBytes_GET_SIZE(self->unconsumed_tail)) { + /* Not all of the compressed data could be accommodated in a buffer of + the specified size. Return the unconsumed tail in an attribute. */ + PyObject *new_data = PyBytes_FromStringAndSize( + (char *)self->zst.next_in, self->zst.avail_in); + if (new_data == NULL) + return -1; + Py_DECREF(self->unconsumed_tail); + self->unconsumed_tail = new_data; + } + return 0; +} + PyDoc_STRVAR(decomp_decompress__doc__, "decompress(data, max_length) -- Return a string containing the decompressed\n" "version of the data.\n" @@ -668,61 +709,23 @@ Py_END_ALLOW_THREADS } - if(max_length) { - /* Not all of the compressed data could be accommodated in a buffer of - the specified size. Return the unconsumed tail in an attribute. */ - Py_DECREF(self->unconsumed_tail); - self->unconsumed_tail = PyBytes_FromStringAndSize((char *)self->zst.next_in, - self->zst.avail_in); - } - else if (PyBytes_GET_SIZE(self->unconsumed_tail) > 0) { - /* All of the compressed data was consumed. Clear unconsumed_tail. */ - Py_DECREF(self->unconsumed_tail); - self->unconsumed_tail = PyBytes_FromStringAndSize("", 0); - } - if (self->unconsumed_tail == NULL) { + if (save_remainder(self, err) < 0) { Py_DECREF(RetVal); RetVal = NULL; goto error; } - /* The end of the compressed data has been reached, so set the - unused_data attribute to a string containing the remainder of the - data in the string. Note that this is also a logical place to call - inflateEnd, but the old behaviour of only calling it on flush() is - preserved. - */ if (err == Z_STREAM_END) { - if (self->zst.avail_in > 0) { - /* Append the leftover data to the existing value of unused_data. */ - Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data); - Py_ssize_t new_size = old_size + self->zst.avail_in; - PyObject *new_data; - if (new_size <= old_size) { /* Check for overflow. */ - PyErr_NoMemory(); - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } - new_data = PyBytes_FromStringAndSize(NULL, new_size); - if (new_data == NULL) { - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } - Py_MEMCPY(PyBytes_AS_STRING(new_data), - PyBytes_AS_STRING(self->unused_data), old_size); - Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size, - self->zst.next_in, self->zst.avail_in); - Py_DECREF(self->unused_data); - self->unused_data = new_data; - } + /* This is a logical place to call inflateEnd, but the old behaviour + of only calling it on flush() is preserved. + */ self->eof = 1; + } + else if (err != Z_OK && err != Z_BUF_ERROR) { /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ - } else if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(self->zst, err, "while decompressing data"); Py_DECREF(RetVal); RetVal = NULL; @@ -996,6 +999,12 @@ Py_END_ALLOW_THREADS } + if (save_remainder(self, err) < 0) { + Py_DECREF(retval); + retval = NULL; + goto error; + } + /* If at end of stream, clean up any memory allocated by zlib. */ if (err == Z_STREAM_END) { self->eof = 1; @@ -1008,6 +1017,7 @@ goto error; } } + if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) { Py_DECREF(retval); retval = NULL;