# HG changeset patch # Parent 434fc614c506eeee61bc0916e5df41080f3ab244 Issue #27130: Fix handling of buffers exceeding (U)INT_MAX in “zlib” module Ported from patches by Xiang Zhang, Nadeem Vawda, and myself. diff -r 434fc614c506 Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py Tue Jul 19 16:41:20 2016 -0500 +++ b/Lib/test/test_zlib.py Fri Jul 22 04:45:37 2016 +0000 @@ -1,4 +1,5 @@ import unittest +from test import test_support as support from test.test_support import TESTFN, run_unittest, import_module, unlink, requires import binascii import pickle @@ -80,6 +81,16 @@ zlib.crc32('spam', (2**31))) +# Issue #10276 - check that inputs >=4GB are handled correctly. +class ChecksumBigBufferTestCase(unittest.TestCase): + + @precisionbigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(zlib.crc32(data) & 0xFFFFFFFF, 1044521549) + self.assertEqual(zlib.adler32(data) & 0xFFFFFFFF, 2256789997) + + class ExceptionTestCase(unittest.TestCase): # make sure we generate some expected errors def test_badlevel(self): @@ -104,6 +115,15 @@ self.assertRaises(ValueError, zlib.decompressobj().flush, 0) self.assertRaises(ValueError, zlib.decompressobj().flush, -1) + @support.cpython_only + def test_overflow(self): + with self.assertRaisesRegexp(OverflowError, 'int too large'): + zlib.decompress(b'', 15, sys.maxsize + 1) + with self.assertRaisesRegexp(OverflowError, 'int too large'): + zlib.decompressobj().decompress(b'', sys.maxsize + 1) + with self.assertRaisesRegexp(OverflowError, 'int too large'): + zlib.decompressobj().flush(sys.maxsize + 1) + class BaseCompressTestCase(object): def check_big_compress_buffer(self, size, compress_func): @@ -167,6 +187,28 @@ def test_big_decompress_buffer(self, size): self.check_big_decompress_buffer(size, zlib.decompress) + @precisionbigmemtest(size=_4G, memuse=1) + def test_large_bufsize(self, size): + # Test decompress(bufsize) parameter greater than the internal limit + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + self.assertEqual(zlib.decompress(compressed, 15, size), data) + + def test_custom_bufsize(self): + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data) + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @precisionbigmemtest(size=_4G + 100, memuse=4) + def test_64bit_compress(self, size): + data = b'x' * size + try: + comp = zlib.compress(data, 0) + self.assertEqual(zlib.decompress(comp), data) + finally: + comp = data = None + class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): # Test compression object @@ -318,6 +360,22 @@ self.assertRaises(ValueError, dco.decompress, "", -1) self.assertEqual('', dco.unconsumed_tail) + def test_maxlen_large(self): + # Sizes up to sys.maxsize should be accepted, although zlib is + # internally limited to expressing sizes with unsigned int + data = HAMLET_SCENE * 10 + DEFAULTALLOC = 16 * 1024 + self.assertGreater(len(data), DEFAULTALLOC) + compressed = zlib.compress(data, 1) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, sys.maxsize), data) + + def test_maxlen_custom(self): + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100]) + def test_clear_unconsumed_tail(self): # Issue #12050: calling decompress() without providing max_length # should clear the unconsumed_tail attribute. @@ -416,6 +474,22 @@ data = zlib.compress(input2) self.assertEqual(dco.flush(), input1[1:]) + @precisionbigmemtest(size=_4G, memuse=1) + def test_flush_large_length(self, size): + # Test flush(length) parameter greater than internal limit UINT_MAX + input = HAMLET_SCENE * 10 + data = zlib.compress(input, 1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + self.assertEqual(dco.flush(size), input[1:]) + + def test_flush_custom_length(self): + input = HAMLET_SCENE * 10 + data = zlib.compress(input, 1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + self.assertEqual(dco.flush(CustomInt()), input[1:]) + @requires_Compress_copy def test_compresscopy(self): # Test copying a compression object @@ -527,6 +601,46 @@ decompress = lambda s: d.decompress(s) + d.flush() self.check_big_decompress_buffer(size, decompress) + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @precisionbigmemtest(size=_4G + 100, memuse=4) + def test_64bit_compress(self, size): + data = b'x' * size + co = zlib.compressobj(0) + do = zlib.decompressobj() + try: + comp = co.compress(data) + co.flush() + uncomp = do.decompress(comp) + do.flush() + self.assertEqual(uncomp, data) + finally: + comp = uncomp = data = None + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @precisionbigmemtest(size=_4G + 100, memuse=3) + def test_large_unused_data(self, size): + data = b'abcdefghijklmnop' + unused = b'x' * size + comp = zlib.compress(data) + unused + do = zlib.decompressobj() + try: + uncomp = do.decompress(comp) + do.flush() + self.assertEqual(unused, do.unused_data) + self.assertEqual(uncomp, data) + finally: + unused = comp = do = None + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @precisionbigmemtest(size=_4G + 100, memuse=5) + def test_large_unconsumed_tail(self, size): + data = b'x' * size + do = zlib.decompressobj() + try: + comp = zlib.compress(data, 0) + uncomp = do.decompress(comp, 1) + do.flush() + self.assertEqual(uncomp, data) + self.assertEqual(do.unconsumed_tail, b'') + finally: + comp = uncomp = data = None + def test_wbits(self): co = zlib.compressobj(1, zlib.DEFLATED, 15) zlib15 = co.compress(HAMLET_SCENE) + co.flush() @@ -658,9 +772,15 @@ """ +class CustomInt: + def __int__(self): + return 100 + + def test_main(): run_unittest( ChecksumTestCase, + ChecksumBigBufferTestCase, ExceptionTestCase, CompressTestCase, CompressObjectTestCase diff -r 434fc614c506 Misc/NEWS --- a/Misc/NEWS Tue Jul 19 16:41:20 2016 -0500 +++ b/Misc/NEWS Fri Jul 22 04:45:37 2016 +0000 @@ -26,6 +26,11 @@ Library ------- +- Issue #27130: Fix handling of large buffers (typically 2 or 4 GiB) in the + "zlib" module. Previously, inputs were typically limited to 2 GiB, and + compression and decompression operations did not properly handle results of + 2 or 4 GiB. + - Issue #23804: Fix SSL zero-length recv() calls to not block and not raise an error about unclean EOF. diff -r 434fc614c506 Modules/zlibmodule.c --- a/Modules/zlibmodule.c Tue Jul 19 16:41:20 2016 -0500 +++ b/Modules/zlibmodule.c Fri Jul 22 04:45:37 2016 +0000 @@ -1,8 +1,9 @@ /* zlibmodule.c -- gzip-compatible data compression */ -/* See http://www.gzip.org/zlib/ */ +/* See http://zlib.net/ */ /* Windows users: read Python's PCbuild\readme.txt */ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include "zlib.h" @@ -129,6 +130,71 @@ return self; } +static void +arrange_input_buffer(z_stream *zst, Py_ssize_t *remains) +{ + if ((size_t)*remains > UINT_MAX) { + zst->avail_in = UINT_MAX; + } else { + zst->avail_in = *remains; + } + *remains -= zst->avail_in; +} + +static Py_ssize_t +arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer, + Py_ssize_t length, + Py_ssize_t max_length) +{ + Py_ssize_t occupied; + + if (*buffer == NULL) { + if (!(*buffer = PyBytes_FromStringAndSize(NULL, length))) + return -1; + occupied = 0; + } + else { + occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer); + + if (length == occupied) { + Py_ssize_t new_length; + assert(length <= max_length); + /* can not scale the buffer over max_length */ + if (length == max_length) + return -2; + if (length <= (max_length >> 1)) + new_length = length << 1; + else + new_length = max_length; + if (_PyBytes_Resize(buffer, new_length) < 0) + return -1; + length = new_length; + } + } + + if ((size_t)(length - occupied) > UINT_MAX) { + zst->avail_out = UINT_MAX; + } else { + zst->avail_out = length - occupied; + } + zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied; + + return length; +} + +static Py_ssize_t +arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length) +{ + Py_ssize_t ret; + + ret = arrange_output_buffer_with_maximum(zst, buffer, length, + PY_SSIZE_T_MAX); + if (ret == -2) + PyErr_NoMemory(); + + return ret; +} + PyDoc_STRVAR(compress__doc__, "compress(string[, level]) -- Returned compressed string.\n" "\n" @@ -137,44 +203,31 @@ static PyObject * PyZlib_compress(PyObject *self, PyObject *args) { - PyObject *ReturnVal = NULL; - Byte *input, *output; - int length, level=Z_DEFAULT_COMPRESSION, err; + PyObject *RetVal = NULL; + Byte *ibuf; + Py_ssize_t ibuflen, obuflen = DEFAULTALLOC; + int level=Z_DEFAULT_COMPRESSION; + int err, flush; z_stream zst; /* require Python string object, optional 'level' arg */ - if (!PyArg_ParseTuple(args, "s#|i:compress", &input, &length, &level)) + if (!PyArg_ParseTuple(args, "s#|i:compress", &ibuf, &ibuflen, &level)) return NULL; - zst.avail_out = length + length/1000 + 12 + 1; - - output = (Byte*)malloc(zst.avail_out); - if (output == NULL) { - PyErr_SetString(PyExc_MemoryError, - "Can't allocate memory to compress data"); - return NULL; - } - - /* Past the point of no return. From here on out, we need to make sure - we clean up mallocs & INCREFs. */ - zst.zalloc = (alloc_func)NULL; zst.zfree = (free_func)Z_NULL; - zst.next_out = (Byte *)output; - zst.next_in = (Byte *)input; - zst.avail_in = length; + zst.next_in = ibuf; err = deflateInit(&zst, level); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Out of memory while compressing data"); goto error; - case(Z_STREAM_ERROR): - PyErr_SetString(ZlibError, - "Bad compression level"); + case Z_STREAM_ERROR: + PyErr_SetString(ZlibError, "Bad compression level"); goto error; default: deflateEnd(&zst); @@ -182,27 +235,45 @@ goto error; } - Py_BEGIN_ALLOW_THREADS; - err = deflate(&zst, Z_FINISH); - Py_END_ALLOW_THREADS; + do { + arrange_input_buffer(&zst, &ibuflen); + flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; - if (err != Z_STREAM_END) { - zlib_error(zst, err, "while compressing data"); - deflateEnd(&zst); - goto error; + do { + obuflen = arrange_output_buffer(&zst, &RetVal, obuflen); + if (obuflen < 0) { + deflateEnd(&zst); + goto error; + } + + Py_BEGIN_ALLOW_THREADS + err = deflate(&zst, flush); + Py_END_ALLOW_THREADS + + if (err == Z_STREAM_ERROR) { + deflateEnd(&zst); + zlib_error(zst, err, "while compressing data"); + goto error; + } + + } while (zst.avail_out == 0); + assert(zst.avail_in == 0); + + } while (flush != Z_FINISH); + assert(err == Z_STREAM_END); + + err = deflateEnd(&zst); + if (err == Z_OK) { + if (_PyBytes_Resize(&RetVal, zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) < 0) + goto error; + return RetVal; } - - err=deflateEnd(&zst); - if (err == Z_OK) - ReturnVal = PyString_FromStringAndSize((char *)output, - zst.total_out); else zlib_error(zst, err, "while finishing compression"); - error: - free(output); - - return ReturnVal; + Py_XDECREF(RetVal); + return NULL; } PyDoc_STRVAR(decompress__doc__, @@ -214,36 +285,32 @@ static PyObject * PyZlib_decompress(PyObject *self, PyObject *args) { - PyObject *result_str; - Byte *input; - int length, err; + PyObject *RetVal = NULL; + Byte *ibuf; + Py_ssize_t ibuflen; + int err, flush; int wsize=DEF_WBITS; Py_ssize_t r_strlen=DEFAULTALLOC; z_stream zst; if (!PyArg_ParseTuple(args, "s#|in:decompress", - &input, &length, &wsize, &r_strlen)) + &ibuf, &ibuflen, &wsize, &r_strlen)) return NULL; - if (r_strlen <= 0) + if (r_strlen <= 0) { r_strlen = 1; - - zst.avail_in = length; - zst.avail_out = r_strlen; - - if (!(result_str = PyString_FromStringAndSize(NULL, r_strlen))) - return NULL; + } zst.zalloc = (alloc_func)NULL; zst.zfree = (free_func)Z_NULL; - zst.next_out = (Byte *)PyString_AS_STRING(result_str); - zst.next_in = (Byte *)input; + zst.avail_in = 0; + zst.next_in = ibuf; err = inflateInit2(&zst, wsize); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Out of memory while decompressing data"); goto error; @@ -254,42 +321,46 @@ } do { - Py_BEGIN_ALLOW_THREADS - err=inflate(&zst, Z_FINISH); - Py_END_ALLOW_THREADS + arrange_input_buffer(&zst, &ibuflen); + flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; - switch(err) { - case(Z_STREAM_END): - break; - case(Z_BUF_ERROR): - /* - * If there is at least 1 byte of room according to zst.avail_out - * and we get this error, assume that it means zlib cannot - * process the inflate call() due to an error in the data. - */ - if (zst.avail_out > 0) { - zlib_error(zst, err, "while decompressing data"); + do { + r_strlen = arrange_output_buffer(&zst, &RetVal, r_strlen); + if (r_strlen < 0) { inflateEnd(&zst); goto error; } - /* fall through */ - case(Z_OK): - /* need more memory */ - if (_PyString_Resize(&result_str, r_strlen << 1) < 0) { + + Py_BEGIN_ALLOW_THREADS + err = inflate(&zst, flush); + Py_END_ALLOW_THREADS + + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + case Z_MEM_ERROR: inflateEnd(&zst); + PyErr_SetString(PyExc_MemoryError, + "Out of memory while decompressing data"); + goto error; + default: + inflateEnd(&zst); + zlib_error(zst, err, "while decompressing data"); goto error; } - zst.next_out = (unsigned char *)PyString_AS_STRING(result_str) \ - + r_strlen; - zst.avail_out = r_strlen; - r_strlen = r_strlen << 1; - break; - default: - inflateEnd(&zst); - zlib_error(zst, err, "while decompressing data"); - goto error; - } - } while (err != Z_STREAM_END); + + } while (zst.avail_out == 0); + + } while (err != Z_STREAM_END && ibuflen != 0); + + + if (err != Z_STREAM_END) { + inflateEnd(&zst); + zlib_error(zst, err, "while decompressing data"); + goto error; + } err = inflateEnd(&zst); if (err != Z_OK) { @@ -297,11 +368,12 @@ goto error; } - _PyString_Resize(&result_str, zst.total_out); - return result_str; + _PyString_Resize(&RetVal, zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)); + return RetVal; error: - Py_XDECREF(result_str); + Py_XDECREF(RetVal); return NULL; } @@ -317,23 +389,23 @@ return NULL; self = newcompobject(&Comptype); - if (self==NULL) + if (self == NULL) return(NULL); self->zst.zalloc = (alloc_func)NULL; self->zst.zfree = (free_func)Z_NULL; self->zst.next_in = NULL; self->zst.avail_in = 0; err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy); - switch(err) { - case (Z_OK): + switch (err) { + case Z_OK: self->is_initialised = 1; - return (PyObject*)self; - case (Z_MEM_ERROR): + return (PyObject *)self; + case Z_MEM_ERROR: Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object"); return NULL; - case(Z_STREAM_ERROR): + case Z_STREAM_ERROR: Py_DECREF(self); PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); return NULL; @@ -354,21 +426,21 @@ self = newcompobject(&Decomptype); if (self == NULL) - return(NULL); + return NULL; self->zst.zalloc = (alloc_func)NULL; self->zst.zfree = (free_func)Z_NULL; self->zst.next_in = NULL; self->zst.avail_in = 0; err = inflateInit2(&self->zst, wbits); - switch(err) { - case (Z_OK): + switch (err) { + case Z_OK: self->is_initialised = 1; - return (PyObject*)self; - case(Z_STREAM_ERROR): + return (PyObject *)self; + case Z_STREAM_ERROR: Py_DECREF(self); PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); return NULL; - case (Z_MEM_ERROR): + case Z_MEM_ERROR: Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for decompression object"); @@ -411,58 +483,44 @@ static PyObject * PyZlib_objcompress(compobject *self, PyObject *args) { - int err, inplen; - Py_ssize_t length = DEFAULTALLOC; - PyObject *RetVal; - Byte *input; - unsigned long start_total_out; + PyObject *RetVal = NULL; + Py_ssize_t ibuflen, obuflen = DEFAULTALLOC; + int err; - if (!PyArg_ParseTuple(args, "s#:compress", &input, &inplen)) - return NULL; - - if (!(RetVal = PyString_FromStringAndSize(NULL, length))) + if (!PyArg_ParseTuple(args, "s#:compress", &self->zst.next_in, &ibuflen)) return NULL; ENTER_ZLIB - start_total_out = self->zst.total_out; - self->zst.avail_in = inplen; - self->zst.next_in = input; - self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal); + do { + arrange_input_buffer(&self->zst, &ibuflen); - Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), Z_NO_FLUSH); - Py_END_ALLOW_THREADS + do { + obuflen = arrange_output_buffer(&self->zst, &RetVal, obuflen); + if (obuflen < 0) + goto error; - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ - while (err == Z_OK && self->zst.avail_out == 0) { - if (_PyString_Resize(&RetVal, length << 1) < 0) - goto error; - self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \ - + length; - self->zst.avail_out = length; - length = length << 1; + Py_BEGIN_ALLOW_THREADS + err = deflate(&self->zst, Z_NO_FLUSH); + Py_END_ALLOW_THREADS - Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), Z_NO_FLUSH); - Py_END_ALLOW_THREADS - } - /* We will only get Z_BUF_ERROR if the output buffer was full but - there wasn't more output when we tried again, so it is not an error - condition. - */ + if (err == Z_STREAM_ERROR) { + zlib_error(self->zst, err, "while compressing data"); + goto error; + } - if (err != Z_OK && err != Z_BUF_ERROR) { - zlib_error(self->zst, err, "while compressing"); - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } - _PyString_Resize(&RetVal, self->zst.total_out - start_total_out); + } while (self->zst.avail_out == 0); + assert(self->zst.avail_in == 0); + + } while (ibuflen != 0); + + _PyString_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)); + goto success; error: + Py_CLEAR(RetVal); + success: LEAVE_ZLIB return RetVal; } @@ -470,41 +528,46 @@ /* Helper for objdecompress() and unflush(). Saves any unconsumed input data in self->unused_data or self->unconsumed_tail, as appropriate. */ static int -save_unconsumed_input(compobject *self, int err) +save_unconsumed_input(compobject *self, Byte *input, Py_ssize_t inplen, + int err) { if (err == Z_STREAM_END) { /* The end of the compressed data has been reached. Store the leftover input data in self->unused_data. */ if (self->zst.avail_in > 0) { Py_ssize_t old_size = PyString_GET_SIZE(self->unused_data); - Py_ssize_t new_size; + Py_ssize_t new_size, left_size; PyObject *new_data; - if (self->zst.avail_in > PY_SSIZE_T_MAX - old_size) { + left_size = input + inplen - self->zst.next_in; + if (left_size > (PY_SSIZE_T_MAX - old_size)) { PyErr_NoMemory(); return -1; } - new_size = old_size + self->zst.avail_in; + new_size = old_size + left_size; new_data = PyString_FromStringAndSize(NULL, new_size); if (new_data == NULL) return -1; Py_MEMCPY(PyString_AS_STRING(new_data), PyString_AS_STRING(self->unused_data), old_size); Py_MEMCPY(PyString_AS_STRING(new_data) + old_size, - self->zst.next_in, self->zst.avail_in); + self->zst.next_in, left_size); Py_SETREF(self->unused_data, new_data); self->zst.avail_in = 0; } } + if (self->zst.avail_in > 0 || PyString_GET_SIZE(self->unconsumed_tail)) { /* This code handles two distinct cases: 1. Output limit was reached. Save leftover input in unconsumed_tail. 2. All input data was consumed. Clear unconsumed_tail. */ + Py_ssize_t left_size = input + inplen - self->zst.next_in; PyObject *new_data = PyString_FromStringAndSize( - (char *)self->zst.next_in, self->zst.avail_in); + (char *)self->zst.next_in, left_size); if (new_data == NULL) return -1; Py_SETREF(self->unconsumed_tail, new_data); } + return 0; } @@ -522,71 +585,69 @@ static PyObject * PyZlib_objdecompress(compobject *self, PyObject *args) { - int err, inplen, max_length = 0; - Py_ssize_t old_length, length = DEFAULTALLOC; - PyObject *RetVal; + int err = Z_OK; + Py_ssize_t inplen, max_length = 0; + Py_ssize_t ibuflen, obuflen = DEFAULTALLOC, hard_limit; + PyObject *RetVal = NULL; Byte *input; - unsigned long start_total_out; - if (!PyArg_ParseTuple(args, "s#|i:decompress", &input, + if (!PyArg_ParseTuple(args, "s#|n:decompress", &input, &inplen, &max_length)) return NULL; if (max_length < 0) { PyErr_SetString(PyExc_ValueError, "max_length must be greater than zero"); return NULL; - } + } else if (max_length == 0) + hard_limit = PY_SSIZE_T_MAX; + else + hard_limit = max_length; + + self->zst.next_in = input; + ibuflen = inplen; /* limit amount of data allocated to max_length */ - if (max_length && length > max_length) - length = max_length; - if (!(RetVal = PyString_FromStringAndSize(NULL, length))) - return NULL; + if (max_length && obuflen > max_length) + obuflen = max_length; ENTER_ZLIB - start_total_out = self->zst.total_out; - self->zst.avail_in = inplen; - self->zst.next_in = input; - self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal); + do { + arrange_input_buffer(&self->zst, &ibuflen); - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_SYNC_FLUSH); - Py_END_ALLOW_THREADS + do { + obuflen = arrange_output_buffer_with_maximum(&self->zst, &RetVal, + obuflen, hard_limit); + if (obuflen == -2) { + if (max_length > 0) { + goto save; + } + PyErr_NoMemory(); + } + if (obuflen < 0) { + goto abort; + } - /* While Z_OK and the output buffer is full, there might be more output. - So extend the output buffer and try again. - */ - while (err == Z_OK && self->zst.avail_out == 0) { - /* If max_length set, don't continue decompressing if we've already - reached the limit. - */ - if (max_length && length >= max_length) - break; + Py_BEGIN_ALLOW_THREADS + err = inflate(&self->zst, Z_SYNC_FLUSH); + Py_END_ALLOW_THREADS - /* otherwise, ... */ - old_length = length; - length = length << 1; - if (max_length && length > max_length) - length = max_length; + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + default: + goto save; + } - if (_PyString_Resize(&RetVal, length) < 0) - goto error; - self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \ - + old_length; - self->zst.avail_out = length - old_length; + } while (self->zst.avail_out == 0); - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_SYNC_FLUSH); - Py_END_ALLOW_THREADS - } + } while (err != Z_STREAM_END && ibuflen != 0); - if (save_unconsumed_input(self, err) < 0) { - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } + save: + if (save_unconsumed_input(self, input, inplen, err) < 0) + goto abort; /* This is the logical place to call inflateEnd, but the old behaviour of only calling it on flush() is preserved. */ @@ -597,14 +658,16 @@ not an error condition. */ zlib_error(self->zst, err, "while decompressing"); - Py_DECREF(RetVal); - RetVal = NULL; - goto error; + goto abort; } - _PyString_Resize(&RetVal, self->zst.total_out - start_total_out); + _PyString_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)); + goto success; - error: + abort: + Py_CLEAR(RetVal); + success: LEAVE_ZLIB return RetVal; @@ -621,10 +684,10 @@ static PyObject * PyZlib_flush(compobject *self, PyObject *args) { - int err, length = DEFAULTALLOC; - PyObject *RetVal; + int err; + Py_ssize_t length = DEFAULTALLOC; + PyObject *RetVal = NULL; int flushmode = Z_FINISH; - unsigned long start_total_out; if (!PyArg_ParseTuple(args, "|i:flush", &flushmode)) return NULL; @@ -635,44 +698,37 @@ return PyString_FromStringAndSize(NULL, 0); } - if (!(RetVal = PyString_FromStringAndSize(NULL, length))) - return NULL; - ENTER_ZLIB - start_total_out = self->zst.total_out; self->zst.avail_in = 0; - self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal); - Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), flushmode); - Py_END_ALLOW_THREADS - - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ - while (err == Z_OK && self->zst.avail_out == 0) { - if (_PyString_Resize(&RetVal, length << 1) < 0) + do { + length = arrange_output_buffer(&self->zst, &RetVal, length); + if (length < 0) { + Py_CLEAR(RetVal); goto error; - self->zst.next_out = (unsigned char *)PyString_AS_STRING(RetVal) \ - + length; - self->zst.avail_out = length; - length = length << 1; + } Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), flushmode); + err = deflate(&self->zst, flushmode); Py_END_ALLOW_THREADS - } + + if (err == Z_STREAM_ERROR) { + zlib_error(self->zst, err, "while flushing"); + Py_CLEAR(RetVal); + goto error; + } + } while (self->zst.avail_out == 0); + assert(self->zst.avail_in == 0); /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH, but checking both for safety*/ if (err == Z_STREAM_END && flushmode == Z_FINISH) { - err = deflateEnd(&(self->zst)); + err = deflateEnd(&self->zst); if (err != Z_OK) { zlib_error(self->zst, err, "from deflateEnd()"); - Py_DECREF(RetVal); - RetVal = NULL; + Py_CLEAR(RetVal); goto error; } else @@ -682,18 +738,17 @@ but there wasn't more output when we tried again, so it is not an error condition. */ - } else if (err!=Z_OK && err!=Z_BUF_ERROR) { + } else if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(self->zst, err, "while flushing"); - Py_DECREF(RetVal); - RetVal = NULL; + Py_CLEAR(RetVal); goto error; } - _PyString_Resize(&RetVal, self->zst.total_out - start_total_out); + _PyString_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)); error: LEAVE_ZLIB - return RetVal; } @@ -715,13 +770,13 @@ */ ENTER_ZLIB err = deflateCopy(&retval->zst, &self->zst); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_STREAM_ERROR): + case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Inconsistent stream state"); goto error; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object"); goto error; @@ -764,13 +819,13 @@ */ ENTER_ZLIB err = inflateCopy(&retval->zst, &self->zst); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_STREAM_ERROR): + case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Inconsistent stream state"); goto error; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for decompression object"); goto error; @@ -807,73 +862,76 @@ static PyObject * PyZlib_unflush(compobject *self, PyObject *args) { - int err, length = DEFAULTALLOC; - PyObject * retval = NULL; - unsigned long start_total_out; + Py_ssize_t length = DEFAULTALLOC; + int err, flush; + PyObject *RetVal = NULL; + Py_ssize_t ibuflen; - if (!PyArg_ParseTuple(args, "|i:flush", &length)) + if (!PyArg_ParseTuple(args, "|n:flush", &length)) return NULL; if (length <= 0) { PyErr_SetString(PyExc_ValueError, "length must be greater than zero"); return NULL; } - if (!(retval = PyString_FromStringAndSize(NULL, length))) - return NULL; - ENTER_ZLIB - start_total_out = self->zst.total_out; - self->zst.avail_in = PyString_GET_SIZE(self->unconsumed_tail); self->zst.next_in = (Byte *)PyString_AS_STRING(self->unconsumed_tail); - self->zst.avail_out = length; - self->zst.next_out = (Byte *)PyString_AS_STRING(retval); + ibuflen = PyString_GET_SIZE(self->unconsumed_tail); - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_FINISH); - Py_END_ALLOW_THREADS + do { + arrange_input_buffer(&self->zst, &ibuflen); + flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ - while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) { - if (_PyString_Resize(&retval, length << 1) < 0) - goto error; - self->zst.next_out = (Byte *)PyString_AS_STRING(retval) + length; - self->zst.avail_out = length; - length = length << 1; + do { + length = arrange_output_buffer(&self->zst, &RetVal, length); + if (length < 0) + goto abort; - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_FINISH); - Py_END_ALLOW_THREADS - } + Py_BEGIN_ALLOW_THREADS + err = inflate(&self->zst, flush); + Py_END_ALLOW_THREADS - if (save_unconsumed_input(self, err) < 0) { - Py_DECREF(retval); - retval = NULL; - goto error; - } + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + default: + goto save; + } + + } while (self->zst.avail_out == 0); + + } while (err != Z_STREAM_END && ibuflen != 0); + + save: + if (save_unconsumed_input(self, + (Byte *)PyString_AS_STRING(self->unconsumed_tail), + PyString_GET_SIZE(self->unconsumed_tail), err) < 0) + goto abort; /* If flushmode is Z_FINISH, we also have to call deflateEnd() to free various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH */ if (err == Z_STREAM_END) { - err = inflateEnd(&(self->zst)); + err = inflateEnd(&self->zst); self->is_initialised = 0; if (err != Z_OK) { zlib_error(self->zst, err, "from inflateEnd()"); - Py_DECREF(retval); - retval = NULL; - goto error; + goto abort; } } - _PyString_Resize(&retval, self->zst.total_out - start_total_out); + _PyString_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)); + goto success; -error: - + abort: + Py_CLEAR(RetVal); + success: LEAVE_ZLIB - - return retval; + return RetVal; } static PyMethodDef comp_methods[] = @@ -943,15 +1001,24 @@ { unsigned int adler32val = 1; /* adler32(0L, Z_NULL, 0) */ Byte *buf; - int len, signed_val; + Py_ssize_t len; + int signed_val; if (!PyArg_ParseTuple(args, "s#|I:adler32", &buf, &len, &adler32val)) return NULL; + + /* Avoid truncation of length for very large buffers. adler32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while ((size_t)len > UINT_MAX) { + adler32val = adler32(adler32val, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } /* In Python 2.x we return a signed integer regardless of native platform * long size (the 32bit unsigned long is treated as 32-bit signed and sign * extended into a 64-bit long inside the integer object). 3.0 does the * right thing and returns unsigned. http://bugs.python.org/issue1202 */ - signed_val = adler32(adler32val, buf, len); + signed_val = adler32(adler32val, buf, (unsigned int)len); return PyInt_FromLong(signed_val); } @@ -966,15 +1033,24 @@ { unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */ Byte *buf; - int len, signed_val; + Py_ssize_t len; + int signed_val; if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val)) return NULL; + + /* Avoid truncation of length for very large buffers. crc32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while ((size_t)len > UINT_MAX) { + crc32val = crc32(crc32val, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } /* In Python 2.x we return a signed integer regardless of native platform * long size (the 32bit unsigned long is treated as 32-bit signed and sign * extended into a 64-bit long inside the integer object). 3.0 does the * right thing and returns unsigned. http://bugs.python.org/issue1202 */ - signed_val = crc32(crc32val, buf, len); + signed_val = crc32(crc32val, buf, (unsigned int)len); return PyInt_FromLong(signed_val); }