diff -r 3084914245d2 Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py Mon Feb 08 20:34:49 2016 -0800 +++ b/Lib/test/test_zlib.py Wed Feb 17 22:51:12 2016 -0500 @@ -157,10 +157,13 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase): + expected_type = bytes + decompress = staticmethod(zlib.decompress) + # Test compression in one go (whole message compression) def test_speech(self): x = zlib.compress(HAMLET_SCENE) - self.assertEqual(zlib.decompress(x), HAMLET_SCENE) + self.assertEqual(self.decompress(x), HAMLET_SCENE) def test_speech128(self): # compress more data @@ -168,14 +171,14 @@ x = zlib.compress(data) self.assertEqual(zlib.compress(bytearray(data)), x) for ob in x, bytearray(x): - self.assertEqual(zlib.decompress(ob), data) + self.assertEqual(self.decompress(ob), data) def test_incomplete_stream(self): # An useful error message is given x = zlib.compress(HAMLET_SCENE) self.assertRaisesRegex(zlib.error, "Error -5 while decompressing data: incomplete or truncated stream", - zlib.decompress, x[:-1]) + self.decompress, x[:-1]) # Memory use of the following functions takes into account overallocation @@ -186,14 +189,14 @@ @bigmemtest(size=_1G + 1024 * 1024, memuse=2) def test_big_decompress_buffer(self, size): - self.check_big_decompress_buffer(size, zlib.decompress) + self.check_big_decompress_buffer(size, self.decompress) @bigmemtest(size=_4G + 100, memuse=1, dry_run=False) def test_length_overflow(self, size): data = b'x' * size try: self.assertRaises(OverflowError, zlib.compress, data, 1) - self.assertRaises(OverflowError, zlib.decompress, data) + self.assertRaises(OverflowError, self.decompress, data) finally: data = None @@ -209,8 +212,28 @@ compressed = zlib.compress(data, 1) self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data) + def test_produces_correct_type(self): + self.assertIsInstance( + self.decompress(zlib.compress(b'data')), + self.expected_type, + ) + + +class AsByteArrayTestCase(CompressTestCase): + expected_type = bytearray + + @staticmethod + def decompress(*args): + return zlib.decompress(*args, as_bytearray=True) + class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): + expected_type = bytes + + @staticmethod + def decompress(d, *args): + return d.decompress(*args) + # Test compression object def test_pair(self): # straightforward compress/decompress objects @@ -641,7 +664,7 @@ # Test copying a compression object in an inconsistent state data = zlib.compress(HAMLET_SCENE) d = zlib.decompressobj() - d.decompress(data) + self.decompress(d, data) d.flush() self.assertRaises(ValueError, d.copy) @@ -666,7 +689,7 @@ @bigmemtest(size=_1G + 1024 * 1024, memuse=2) def test_big_decompress_buffer(self, size): d = zlib.decompressobj() - decompress = lambda s: d.decompress(s) + d.flush() + decompress = lambda s: self.decompress(d, s) + d.flush() self.check_big_decompress_buffer(size, decompress) @bigmemtest(size=_4G + 100, memuse=1, dry_run=False) @@ -676,10 +699,25 @@ d = zlib.decompressobj() try: self.assertRaises(OverflowError, c.compress, data) - self.assertRaises(OverflowError, d.decompress, data) + self.assertRaises(OverflowError, self.decompress, d, data) finally: data = None + def test_produces_correct_type(self): + d = zlib.decompressobj() + self.assertIsInstance( + self.decompress(d, zlib.compress(b'data')), + self.expected_type, + ) + + +class CompressObjectAsByteArrayTestCase(CompressObjectTestCase): + expected_type = bytearray + + @staticmethod + def decompress(d, *args): + return d.decompress(*args, as_bytearray=True) + def genblock(seed, length, step=1024, generator=random): """length-byte stream of random data from a seed (in step-byte blocks).""" diff -r 3084914245d2 Modules/clinic/zlibmodule.c.h --- a/Modules/clinic/zlibmodule.c.h Mon Feb 08 20:34:49 2016 -0800 +++ b/Modules/clinic/zlibmodule.c.h Wed Feb 17 22:51:12 2016 -0500 @@ -40,37 +40,42 @@ } PyDoc_STRVAR(zlib_decompress__doc__, -"decompress($module, data, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE, /)\n" +"decompress($module, /, data, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE, *,\n" +" as_bytearray=False)\n" "--\n" "\n" -"Returns a bytes object containing the uncompressed data.\n" +"Returns a bytes or bytearray object containing the uncompressed data.\n" "\n" " data\n" " Compressed data.\n" " wbits\n" " The window buffer size.\n" " bufsize\n" -" The initial output buffer size."); +" The initial output buffer size.\n" +" as_bytearray\n" +" Should the return value be a bytearray object instead of a bytes object."); #define ZLIB_DECOMPRESS_METHODDEF \ - {"decompress", (PyCFunction)zlib_decompress, METH_VARARGS, zlib_decompress__doc__}, + {"decompress", (PyCFunction)zlib_decompress, METH_VARARGS|METH_KEYWORDS, zlib_decompress__doc__}, static PyObject * zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits, - unsigned int bufsize); + unsigned int bufsize, int as_bytearray); static PyObject * -zlib_decompress(PyModuleDef *module, PyObject *args) +zlib_decompress(PyModuleDef *module, PyObject *args, PyObject *kwargs) { PyObject *return_value = NULL; + static char *_keywords[] = {"data", "wbits", "bufsize", "as_bytearray", NULL}; Py_buffer data = {NULL, NULL}; int wbits = MAX_WBITS; unsigned int bufsize = DEF_BUF_SIZE; + int as_bytearray = 0; - if (!PyArg_ParseTuple(args, "y*|iO&:decompress", - &data, &wbits, capped_uint_converter, &bufsize)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|iO&$p:decompress", _keywords, + &data, &wbits, capped_uint_converter, &bufsize, &as_bytearray)) goto exit; - return_value = zlib_decompress_impl(module, &data, wbits, bufsize); + return_value = zlib_decompress_impl(module, &data, wbits, bufsize, as_bytearray); exit: /* Cleanup for data */ @@ -212,10 +217,10 @@ } PyDoc_STRVAR(zlib_Decompress_decompress__doc__, -"decompress($self, data, max_length=0, /)\n" +"decompress($self, /, data, max_length=0, *, as_bytearray=False)\n" "--\n" "\n" -"Return a bytes object containing the decompressed version of the data.\n" +"Return a bytes or bytearray object containing the decompressed version of the data.\n" "\n" " data\n" " The binary data to decompress.\n" @@ -223,29 +228,33 @@ " The maximum allowable length of the decompressed data.\n" " Unconsumed input data will be stored in\n" " the unconsumed_tail attribute.\n" +" as_bytearray\n" +" Should the return value be a bytearray object instead of a bytes object.\n" "\n" "After calling this function, some of the input data may still be stored in\n" "internal buffers for later processing.\n" "Call the flush() method to clear these buffers."); #define ZLIB_DECOMPRESS_DECOMPRESS_METHODDEF \ - {"decompress", (PyCFunction)zlib_Decompress_decompress, METH_VARARGS, zlib_Decompress_decompress__doc__}, + {"decompress", (PyCFunction)zlib_Decompress_decompress, METH_VARARGS|METH_KEYWORDS, zlib_Decompress_decompress__doc__}, static PyObject * zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, - unsigned int max_length); + unsigned int max_length, int as_bytearray); static PyObject * -zlib_Decompress_decompress(compobject *self, PyObject *args) +zlib_Decompress_decompress(compobject *self, PyObject *args, PyObject *kwargs) { PyObject *return_value = NULL; + static char *_keywords[] = {"data", "max_length", "as_bytearray", NULL}; Py_buffer data = {NULL, NULL}; unsigned int max_length = 0; + int as_bytearray = 0; - if (!PyArg_ParseTuple(args, "y*|O&:decompress", - &data, capped_uint_converter, &max_length)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O&$p:decompress", _keywords, + &data, capped_uint_converter, &max_length, &as_bytearray)) goto exit; - return_value = zlib_Decompress_decompress_impl(self, &data, max_length); + return_value = zlib_Decompress_decompress_impl(self, &data, max_length, as_bytearray); exit: /* Cleanup for data */ @@ -439,4 +448,4 @@ #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ -/*[clinic end generated code: output=cf81e1deae3af0ce input=a9049054013a1b77]*/ +/*[clinic end generated code: output=88ce80dcbe005419 input=a9049054013a1b77]*/ diff -r 3084914245d2 Modules/zlibmodule.c --- a/Modules/zlibmodule.c Mon Feb 08 20:34:49 2016 -0800 +++ b/Modules/zlibmodule.c Wed Feb 17 22:51:12 2016 -0500 @@ -265,6 +265,28 @@ return 1; } +/* Function that gives PyByteArray_Resize the same signature as + _PyBytes_Resize. */ +static int +bytearray_resize(PyObject **arr, Py_ssize_t requested_size) +{ + return PyByteArray_Resize(*arr, requested_size); +} + +/* Symbol versions of the *_AS_STRING macros so we can programatically dispatch + without paying the pointer check. */ +static char * +bytearray_as_string(PyObject *ob) +{ + return PyByteArray_AS_STRING(ob); +} + +static char * +bytes_as_string(PyObject *ob) +{ + return PyBytes_AS_STRING(ob); +} + /*[clinic input] zlib.decompress @@ -274,15 +296,17 @@ The window buffer size. bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE The initial output buffer size. - / + * + as_bytearray: bool = False + Should the return value be a bytearray object instead of a bytes object. -Returns a bytes object containing the uncompressed data. +Returns a bytes or bytearray object containing the uncompressed data. [clinic start generated code]*/ static PyObject * zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits, - unsigned int bufsize) -/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/ + unsigned int bufsize, int as_bytearray) +/*[clinic end generated code: output=5929c4f0874228b8 input=bf3aba6c39d2a343]*/ { PyObject *result_str = NULL; Byte *input; @@ -290,6 +314,8 @@ int err; unsigned int new_bufsize; z_stream zst; + int (*resize)(PyObject**, Py_ssize_t); + char *(*as_string)(PyObject*); if ((size_t)data->len > UINT_MAX) { PyErr_SetString(PyExc_OverflowError, @@ -305,14 +331,26 @@ zst.avail_in = length; zst.avail_out = bufsize; - if (!(result_str = PyBytes_FromStringAndSize(NULL, bufsize))) - goto error; + if (as_bytearray) { + if (!(result_str = PyByteArray_FromStringAndSize(NULL, bufsize))) { + goto error; + } + resize = bytearray_resize; + as_string = bytearray_as_string; + } + else { + if (!(result_str = PyBytes_FromStringAndSize(NULL, bufsize))) { + goto error; + } + resize = _PyBytes_Resize; + as_string = bytes_as_string; + } zst.opaque = NULL; zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; - zst.next_out = (Byte *)PyBytes_AS_STRING(result_str); zst.next_in = (Byte *)input; + zst.next_out = (Byte *)as_string(result_str); err = inflateInit2(&zst, wbits); switch(err) { @@ -354,12 +392,11 @@ new_bufsize = bufsize << 1; else new_bufsize = UINT_MAX; - if (_PyBytes_Resize(&result_str, new_bufsize) < 0) { + if (resize(&result_str, new_bufsize) < 0) { inflateEnd(&zst); goto error; } - zst.next_out = - (unsigned char *)PyBytes_AS_STRING(result_str) + bufsize; + zst.next_out = (unsigned char *)as_string(result_str) + bufsize; zst.avail_out = bufsize; bufsize = new_bufsize; break; @@ -376,8 +413,9 @@ goto error; } - if (_PyBytes_Resize(&result_str, zst.total_out) < 0) + if (resize(&result_str, zst.total_out) < 0) { goto error; + } return result_str; @@ -694,9 +732,11 @@ The maximum allowable length of the decompressed data. Unconsumed input data will be stored in the unconsumed_tail attribute. - / + * + as_bytearray: bool = False + Should the return value be a bytearray object instead of a bytes object. -Return a bytes object containing the decompressed version of the data. +Return a bytes or bytearray object containing the decompressed version of the data. After calling this function, some of the input data may still be stored in internal buffers for later processing. @@ -705,13 +745,15 @@ static PyObject * zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, - unsigned int max_length) -/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/ + unsigned int max_length, int as_bytearray) +/*[clinic end generated code: output=015f8d326f12871e input=908cd9875e03f886]*/ { int err; unsigned int old_length, length = DEF_BUF_SIZE; PyObject *RetVal = NULL; unsigned long start_total_out; + int (*resize)(PyObject**, Py_ssize_t); + char *(*as_string)(PyObject*); if ((size_t)data->len > UINT_MAX) { PyErr_SetString(PyExc_OverflowError, @@ -722,8 +764,21 @@ /* limit amount of data allocated to max_length */ if (max_length && length > max_length) length = max_length; - if (!(RetVal = PyBytes_FromStringAndSize(NULL, length))) - return NULL; + + if (as_bytearray) { + if (!(RetVal = PyByteArray_FromStringAndSize(NULL, length))) { + return NULL; + } + resize = bytearray_resize; + as_string = bytearray_as_string; + } + else { + if (!(RetVal = PyBytes_FromStringAndSize(NULL, length))) { + return NULL; + } + resize = _PyBytes_Resize; + as_string = bytes_as_string; + } ENTER_ZLIB(self); @@ -731,7 +786,7 @@ self->zst.avail_in = (unsigned int)data->len; self->zst.next_in = data->buf; self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal); + self->zst.next_out = (unsigned char *)as_string(RetVal); Py_BEGIN_ALLOW_THREADS err = inflate(&(self->zst), Z_SYNC_FLUSH); @@ -783,12 +838,11 @@ if (max_length && length > max_length) length = max_length; - if (_PyBytes_Resize(&RetVal, length) < 0) { + if (resize(&RetVal, length) < 0) { Py_CLEAR(RetVal); goto error; } - self->zst.next_out = - (unsigned char *)PyBytes_AS_STRING(RetVal) + old_length; + self->zst.next_out = (unsigned char *)as_string(RetVal) + old_length; self->zst.avail_out = length - old_length; Py_BEGIN_ALLOW_THREADS @@ -817,7 +871,7 @@ goto error; } - if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) { + if (resize(&RetVal, self->zst.total_out - start_total_out) < 0) { Py_CLEAR(RetVal); }