# HG changeset patch # Parent 03708c680ecaea7537b0fe77d50d4c068d27a671 Issue #23200: Deprecate the zlib decompressor's flush() method diff -r 03708c680eca Doc/library/zlib.rst --- a/Doc/library/zlib.rst Wed Feb 03 07:52:06 2016 +0000 +++ b/Doc/library/zlib.rst Wed Feb 03 11:22:02 2016 +0000 @@ -218,26 +218,31 @@ .. method:: Decompress.decompress(data[, max_length]) - Decompress *data*, returning a bytes object containing the uncompressed data - corresponding to at least part of the data in *string*. This data should be + Decompress *data*, returning a bytes object containing + the uncompressed data. This data should be concatenated to the output produced by any preceding calls to the - :meth:`decompress` method. Some of the input data may be preserved in internal - buffers for later processing. + :meth:`.decompress` method. If the optional parameter *max_length* is non-zero then the return value will be no longer than *max_length*. This may mean that not all of the compressed input - can be processed; and unconsumed data will be stored in the attribute - :attr:`unconsumed_tail`. This bytestring must be passed to a subsequent call to - :meth:`decompress` if decompression is to continue. If *max_length* is not - supplied then the whole input is decompressed, and :attr:`unconsumed_tail` is + can be processed. Unconsumed data will be preserved in internal + buffers for later processing, and/or stored in the attribute + :attr:`.unconsumed_tail`. This bytestring must be passed to a subsequent call to + :meth:`.decompress` if decompression is to continue. If *max_length* is not + supplied then the whole input is decompressed, and :attr:`.unconsumed_tail` is empty. .. method:: Decompress.flush([length]) + .. deprecated:: 3.6 + Calling this method is either unnecessary, because :meth:`.decompress` + returns as much data as possible, or it defeats the purpose of the + *max_length* limit. + All pending input is processed, and a bytes object containing the remaining - uncompressed output is returned. After calling :meth:`flush`, the - :meth:`decompress` method cannot be called again; the only realistic action is + uncompressed output is returned. After calling :meth:`.flush`, the + :meth:`.decompress` method cannot be called again; the only realistic action is to delete the object. The optional parameter *length* sets the initial size of the output buffer. diff -r 03708c680eca Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Wed Feb 03 07:52:06 2016 +0000 +++ b/Doc/whatsnew/3.6.rst Wed Feb 03 11:22:02 2016 +0000 @@ -196,6 +196,11 @@ been deprecated in previous versions of Python in favour of :meth:`importlib.abc.Loader.exec_module`. +* Calling :meth:`zlib.Decompress.flush` now raises a warning. Calling it + is either unnecessary, because :meth:`~zlib.Decompress.decompress` + returns as much data as possible since Python 2.1, or it defeats + the *max_length* limit. See :issue:`23200`. + Deprecated functions and types of the C API ------------------------------------------- diff -r 03708c680eca Lib/encodings/zlib_codec.py --- a/Lib/encodings/zlib_codec.py Wed Feb 03 07:52:06 2016 +0000 +++ b/Lib/encodings/zlib_codec.py Wed Feb 03 11:22:02 2016 +0000 @@ -47,11 +47,7 @@ self.decompressobj = zlib.decompressobj() def decode(self, input, final=False): - if final: - c = self.decompressobj.decompress(input) - return c + self.decompressobj.flush() - else: - return self.decompressobj.decompress(input) + return self.decompressobj.decompress(input) def reset(self): self.decompressobj = zlib.decompressobj() diff -r 03708c680eca Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Wed Feb 03 07:52:06 2016 +0000 +++ b/Lib/test/test_codecs.py Wed Feb 03 11:22:02 2016 +0000 @@ -2682,6 +2682,13 @@ self.assertEqual(size, len(o)) self.assertEqual(i, binput) + encoder = codecs.getincrementalencoder(encoding)() + o_inc = encoder.encode(binput, True) + self.assertEqual(o_inc, o) + decoder = codecs.getincrementaldecoder(encoding)() + i = decoder.decode(o, True) + self.assertEqual(i, binput) + def test_read(self): for encoding in bytes_transform_encodings: with self.subTest(encoding=encoding): diff -r 03708c680eca Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py Wed Feb 03 07:52:06 2016 +0000 +++ b/Lib/test/test_zlib.py Wed Feb 03 11:22:02 2016 +0000 @@ -1,9 +1,11 @@ import unittest from test import support import binascii +from contextlib import contextmanager import pickle import random import sys +import warnings from test.support import bigmemtest, _1G, _4G zlib = support.import_module('zlib') @@ -114,8 +116,9 @@ def test_decompressobj_badflush(self): # verify failure on calling decompressobj.flush with bad params - self.assertRaises(ValueError, zlib.decompressobj().flush, 0) - self.assertRaises(ValueError, zlib.decompressobj().flush, -1) + with ignore_flush_warning(): + self.assertRaises(ValueError, zlib.decompressobj().flush, 0) + self.assertRaises(ValueError, zlib.decompressobj().flush, -1) @support.cpython_only def test_overflow(self): @@ -225,9 +228,13 @@ self.assertEqual(x1 + x2, datazip) for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): dco = zlib.decompressobj() - y1 = dco.decompress(v1 + v2) - y2 = dco.flush() - self.assertEqual(data, y1 + y2) + y = dco.decompress(v1 + v2) + self.assertEqual(data, y) + self.assertIsInstance(dco.unconsumed_tail, bytes) + self.assertIsInstance(dco.unused_data, bytes) + + with ignore_flush_warning(): + self.assertEqual(b"", dco.flush()) self.assertIsInstance(dco.unconsumed_tail, bytes) self.assertIsInstance(dco.unused_data, bytes) @@ -242,9 +249,8 @@ x1 = co.compress(HAMLET_SCENE) x2 = co.flush() dco = zlib.decompressobj(wbits) - y1 = dco.decompress(x1 + x2) - y2 = dco.flush() - self.assertEqual(HAMLET_SCENE, y1 + y2) + y = dco.decompress(x1 + x2) + self.assertEqual(HAMLET_SCENE, y) # keyword arguments should also be supported zlib.compressobj(level=level, method=method, wbits=wbits, @@ -261,11 +267,10 @@ combuf = b''.join(bufs) dco = zlib.decompressobj() - y1 = dco.decompress(b''.join(bufs)) - y2 = dco.flush() - self.assertEqual(data, y1 + y2) + y = dco.decompress(b''.join(bufs)) + self.assertEqual(data, y) - def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): + def test_decompinc(self, source=None, cx=256, dcx=64): # compress object in steps, decompress object in steps source = source or HAMLET_SCENE data = source * 128 @@ -290,24 +295,20 @@ "(A) uct should be b'': not %d long" % len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) - if flush: - bufs.append(dco.flush()) - else: - while True: - chunk = dco.decompress(b'') - if chunk: - bufs.append(chunk) - else: - break + self.assertEqual(data, b''.join(bufs)) + + self.assertEqual(b'', dco.decompress(b'')) self.assertEqual(b'', dco.unconsumed_tail, ######## "(B) uct should be b'': not %d long" % len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) - self.assertEqual(data, b''.join(bufs)) - # Failure means: "decompressobj with init options failed" - def test_decompincflush(self): - self.test_decompinc(flush=True) + with ignore_flush_warning(): + self.assertEqual(b'', dco.flush()) + self.assertEqual(b'', dco.unconsumed_tail, ######## + "(B) uct should be b'': not %d long" % + len(dco.unconsumed_tail)) + self.assertEqual(b'', dco.unused_data) def test_decompimax(self, source=None, cx=256, dcx=64): # compress in steps, decompress in length-restricted steps @@ -333,7 +334,12 @@ 'chunk too big (%d>%d)' % (len(chunk), dcx)) bufs.append(chunk) cb = dco.unconsumed_tail - bufs.append(dco.flush()) + # The loop above does not check for the end of the stream, so there + # may be more than dcx bytes possible in the final call. Calling + # flush() will return them all, since it does not accept a max_length + # parameter. + with ignore_flush_warning(): + bufs.append(dco.flush()) self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') def test_decompressmaxlen(self, flush=False): @@ -359,7 +365,8 @@ bufs.append(chunk) cb = dco.unconsumed_tail if flush: - bufs.append(dco.flush()) + with ignore_flush_warning(): + bufs.append(dco.flush()) else: while chunk: chunk = dco.decompress(b'', max_length) @@ -463,7 +470,13 @@ co = zlib.compressobj(zlib.Z_BEST_COMPRESSION) self.assertTrue(co.flush()) # Returns a zlib header dco = zlib.decompressobj() - self.assertEqual(dco.flush(), b"") # Returns nothing + with ignore_flush_warning(): + self.assertEqual(dco.flush(), b"") # Returns nothing + + def test_flush_deprecated(self): + dco = zlib.decompressobj() + with self.assertWarnsRegex(DeprecationWarning, "Decompress.flush"): + dco.flush() def test_dictionary(self): h = HAMLET_SCENE @@ -476,7 +489,7 @@ cd = co.compress(h) + co.flush() # Verify that it will decompress with the dictionary. dco = zlib.decompressobj(zdict=zdict) - self.assertEqual(dco.decompress(cd) + dco.flush(), h) + self.assertEqual(dco.decompress(cd), h) # Verify that it fails when not given the dictionary. dco = zlib.decompressobj() self.assertRaises(zlib.error, dco.decompress, cd) @@ -504,8 +517,9 @@ # (see issue #8672). dco = zlib.decompressobj() y = dco.decompress(x[:-5]) - y += dco.flush() self.assertEqual(y, b'foo') + with ignore_flush_warning(): + self.assertEqual(dco.flush(), b'') def test_decompress_eof(self): x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' @@ -515,7 +529,8 @@ self.assertFalse(dco.eof) dco.decompress(x[-5:]) self.assertTrue(dco.eof) - dco.flush() + with ignore_flush_warning(): + dco.flush() self.assertTrue(dco.eof) def test_decompress_eof_incomplete_stream(self): @@ -524,7 +539,8 @@ self.assertFalse(dco.eof) dco.decompress(x[:-5]) self.assertFalse(dco.eof) - dco.flush() + with ignore_flush_warning(): + dco.flush() self.assertFalse(dco.eof) def test_decompress_unused_data(self): @@ -547,12 +563,17 @@ else: data += dco.decompress( dco.unconsumed_tail + x[i : i + step], maxlen) - data += dco.flush() self.assertTrue(dco.eof) self.assertEqual(data, source) self.assertEqual(dco.unconsumed_tail, b'') self.assertEqual(dco.unused_data, remainder) + with ignore_flush_warning(): + self.assertEqual(dco.flush(), b'') + self.assertTrue(dco.eof) + self.assertEqual(dco.unconsumed_tail, b'') + self.assertEqual(dco.unused_data, remainder) + def test_flush_with_freed_input(self): # Issue #16411: decompressor accesses input to last decompress() call # in flush(), even if this object has been freed in the meanwhile. @@ -563,7 +584,8 @@ dco.decompress(data, 1) del data data = zlib.compress(input2) - self.assertEqual(dco.flush(), input1[1:]) + with ignore_flush_warning(): + self.assertEqual(dco.flush(), input1[1:]) @bigmemtest(size=_4G, memuse=1) def test_flush_large_length(self, size): @@ -642,7 +664,8 @@ data = zlib.compress(HAMLET_SCENE) d = zlib.decompressobj() d.decompress(data) - d.flush() + with ignore_flush_warning(): + d.flush() self.assertRaises(ValueError, d.copy) def test_compresspickle(self): @@ -666,7 +689,7 @@ @bigmemtest(size=_1G + 1024 * 1024, memuse=2) def test_big_decompress_buffer(self, size): d = zlib.decompressobj() - decompress = lambda s: d.decompress(s) + d.flush() + decompress = lambda s: d.decompress(s) self.check_big_decompress_buffer(size, decompress) @bigmemtest(size=_4G + 100, memuse=1, dry_run=False) @@ -681,6 +704,14 @@ data = None +@contextmanager +def ignore_flush_warning(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", + category=DeprecationWarning, message="Decompress.flush") + yield + + def genblock(seed, length, step=1024, generator=random): """length-byte stream of random data from a seed (in step-byte blocks).""" if seed is not None: diff -r 03708c680eca Lib/zipfile.py --- a/Lib/zipfile.py Wed Feb 03 07:52:06 2016 +0000 +++ b/Lib/zipfile.py Wed Feb 03 11:22:02 2016 +0000 @@ -917,12 +917,11 @@ self._eof = self._compress_left <= 0 elif self._compress_type == ZIP_DEFLATED: n = max(n, self.MIN_READ_SIZE) + input_eof = not data data = self._decompressor.decompress(data, n) - self._eof = (self._decompressor.eof or - self._compress_left <= 0 and - not self._decompressor.unconsumed_tail) - if self._eof: - data += self._decompressor.flush() + # Stop decompressing either if the decompressor indicates EOF, or + # no more input is possible and we have got all possible output + self._eof = self._decompressor.eof or input_eof and not data else: data = self._decompressor.decompress(data) self._eof = self._decompressor.eof or self._compress_left <= 0 diff -r 03708c680eca Misc/NEWS --- a/Misc/NEWS Wed Feb 03 07:52:06 2016 +0000 +++ b/Misc/NEWS Wed Feb 03 11:22:02 2016 +0000 @@ -166,6 +166,10 @@ Library ------- +- Issue #23200: Deprecate the zlib decompressor's flush() method. + Calling it is either unnecessary, or it defeats the max_length limit + used with decompress(). + - Issue #26202: copy.deepcopy() now correctly copies range() objects with non-atomic attributes. diff -r 03708c680eca Modules/zlibmodule.c --- a/Modules/zlibmodule.c Wed Feb 03 07:52:06 2016 +0000 +++ b/Modules/zlibmodule.c Wed Feb 03 11:22:02 2016 +0000 @@ -692,15 +692,11 @@ The binary data to decompress. max_length: capped_uint = 0 The maximum allowable length of the decompressed data. - Unconsumed input data will be stored in - the unconsumed_tail attribute. + Unconsumed input data will be stored in internal buffers for later + processing, and/or the unconsumed_tail attribute. / Return a bytes object containing the decompressed version of the data. - -After calling this function, some of the input data may still be stored in -internal buffers for later processing. -Call the flush() method to clear these buffers. [clinic start generated code]*/ static PyObject * @@ -1058,6 +1054,11 @@ unsigned long start_total_out; Py_ssize_t size; + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Decompress.flush() is deprecated", 1) < 0) { + return NULL; + } + if (length == 0) { PyErr_SetString(PyExc_ValueError, "length must be greater than zero"); return NULL;