# HG changeset patch # Parent 6c733337afae11e1a3ba163cea8aeeb6fb38d0c2 Issue #25626: Change zlib to accept size_t and cap to UINT_MAX The underlying zlib library stores sizes in “unsigned int”. The corresponding Python parameters are all sizes of buffers filled in by zlib, so it is okay to reduce higher values to the UINT_MAX internal cap. OverflowError is still raised for sizes that do not fit in size_t. Also, clear the error when the size is greater than a signed long. This resolves a regression in the gzip module when reading more than UINT_MAX or LONG_MAX bytes in one call, introduced by revision 62723172412c. diff -r 6c733337afae Doc/howto/clinic.rst --- a/Doc/howto/clinic.rst Sat Nov 14 15:14:42 2015 -0800 +++ b/Doc/howto/clinic.rst Fri Nov 20 07:35:05 2015 +0000 @@ -1249,18 +1249,18 @@ /*[python input] - class uint_converter(CConverter): + class capped_uint_converter(CConverter): type = 'unsigned int' - converter = 'uint_converter' + converter = 'capped_uint_converter' [python start generated code]*/ - /*[python end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ + /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/ -This block adds a converter to Argument Clinic named ``uint``. Parameters -declared as ``uint`` will be declared as type ``unsigned int``, and will -be parsed by the ``'O&'`` format unit, which will call the ``uint_converter`` -converter function. -``uint`` variables automatically support default values. +This block adds a converter to Argument Clinic named ``capped_uint``. Parameters +declared as ``capped_uint`` will be declared as type ``unsigned int``, and will +be parsed by the ``'O&'`` format unit, which will call the +``capped_uint_converter`` converter function. ``capped_uint`` variables +automatically support default values. More sophisticated custom converters can insert custom C code to handle initialization and cleanup. diff -r 6c733337afae Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py Sat Nov 14 15:14:42 2015 -0800 +++ b/Lib/test/test_zlib.py Fri Nov 20 07:35:05 2015 +0000 @@ -364,6 +364,15 @@ self.assertRaises(ValueError, dco.decompress, b"", -1) self.assertEqual(b'', dco.unconsumed_tail) + def test_maxlen_large(self): + # Sizes up to sys.maxsize should be accepted, although zlib is + # internally limited to expressing sizes with unsigned int + data = HAMLET_SCENE * 10 + self.assertGreater(len(data), zlib.DEF_BUF_SIZE) + compressed = zlib.compress(data) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, sys.maxsize), data) + def test_clear_unconsumed_tail(self): # Issue #12050: calling decompress() without providing max_length # should clear the unconsumed_tail attribute. diff -r 6c733337afae Misc/NEWS --- a/Misc/NEWS Sat Nov 14 15:14:42 2015 -0800 +++ b/Misc/NEWS Fri Nov 20 07:35:05 2015 +0000 @@ -70,6 +70,14 @@ Library ------- +- Issue #25626: Change three zlib functions to accept sizes that fit in + size_t, but internally cap those sizes to UINT_MAX. Also, clear the error + when the size is greater than a signed long. This resolves a regression in + 3.5 where GzipFile.read() failed to read chunks larger than 2 or 4 GiB. + The change affects the zlib.Decompress.decompress() max_length parameter, + the zlib.decompress() bufsize parameter, and the zlib.Decompress.flush() + length parameter. + - Issue #25590: In the Readline completer, only call getattr() once per attribute. diff -r 6c733337afae Modules/clinic/zlibmodule.c.h --- a/Modules/clinic/zlibmodule.c.h Sat Nov 14 15:14:42 2015 -0800 +++ b/Modules/clinic/zlibmodule.c.h Fri Nov 20 07:35:05 2015 +0000 @@ -68,7 +68,7 @@ unsigned int bufsize = DEF_BUF_SIZE; if (!PyArg_ParseTuple(args, "y*|iO&:decompress", - &data, &wbits, uint_converter, &bufsize)) + &data, &wbits, capped_uint_converter, &bufsize)) goto exit; return_value = zlib_decompress_impl(module, &data, wbits, bufsize); @@ -242,7 +242,7 @@ unsigned int max_length = 0; if (!PyArg_ParseTuple(args, "y*|O&:decompress", - &data, uint_converter, &max_length)) + &data, capped_uint_converter, &max_length)) goto exit; return_value = zlib_Decompress_decompress_impl(self, &data, max_length); @@ -353,7 +353,7 @@ unsigned int length = DEF_BUF_SIZE; if (!PyArg_ParseTuple(args, "|O&:flush", - uint_converter, &length)) + capped_uint_converter, &length)) goto exit; return_value = zlib_Decompress_flush_impl(self, length); @@ -438,4 +438,4 @@ #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ -/*[clinic end generated code: output=56ed1147bbbb4788 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7734aec079550bc8 input=a9049054013a1b77]*/ diff -r 6c733337afae Modules/zlibmodule.c --- a/Modules/zlibmodule.c Sat Nov 14 15:14:42 2015 -0800 +++ b/Modules/zlibmodule.c Fri Nov 20 07:35:05 2015 +0000 @@ -226,24 +226,25 @@ /*[python input] -class uint_converter(CConverter): +class capped_uint_converter(CConverter): type = 'unsigned int' - converter = 'uint_converter' + converter = 'capped_uint_converter' c_ignored_default = "0" [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=22263855f7a3ebfd]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/ static int -uint_converter(PyObject *obj, void *ptr) +capped_uint_converter(PyObject *obj, void *ptr) { long val; - unsigned long uval; + size_t uval; val = PyLong_AsLong(obj); if (val == -1 && PyErr_Occurred()) { - uval = PyLong_AsUnsignedLong(obj); - if (uval == (unsigned long)-1 && PyErr_Occurred()) + PyErr_Clear(); + uval = PyLong_AsSize_t(obj); + if (uval == (size_t)-1 && PyErr_Occurred()) return 0; } else { @@ -252,16 +253,15 @@ "value must be positive"); return 0; } - uval = (unsigned long)val; + uval = (size_t)val; } if (uval > UINT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "Python int too large for C unsigned int"); - return 0; + *(unsigned int *)ptr = UINT_MAX; } - - *(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, unsigned long, unsigned int); + else { + *(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, size_t, unsigned int); + } return 1; } @@ -272,7 +272,7 @@ Compressed data. wbits: int(c_default="MAX_WBITS") = MAX_WBITS The window buffer size. - bufsize: uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE + bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE The initial output buffer size. / @@ -282,7 +282,7 @@ static PyObject * zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits, unsigned int bufsize) -/*[clinic end generated code: output=444d0987f3429574 input=0f4b9abb7103f50e]*/ +/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/ { PyObject *result_str = NULL; Byte *input; @@ -691,7 +691,7 @@ data: Py_buffer The binary data to decompress. - max_length: uint = 0 + max_length: capped_uint = 0 The maximum allowable length of the decompressed data. Unconsumed input data will be stored in the unconsumed_tail attribute. @@ -707,7 +707,7 @@ static PyObject * zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, unsigned int max_length) -/*[clinic end generated code: output=b82e2a2c19f5fe7b input=02cfc047377cec86]*/ +/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/ { int err; unsigned int old_length, length = DEF_BUF_SIZE; @@ -1048,7 +1048,7 @@ /*[clinic input] zlib.Decompress.flush - length: uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE + length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE the initial size of the output buffer. / @@ -1057,7 +1057,7 @@ static PyObject * zlib_Decompress_flush_impl(compobject *self, unsigned int length) -/*[clinic end generated code: output=db6fb753ab698e22 input=1580956505978993]*/ +/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/ { int err; unsigned int new_length;