# HG changeset patch # Parent cdd403dd82cbce0c14cdab18a23476af0dbd3578 Issue #23200: Deprecate the zlib decompressor's flush() method diff -r cdd403dd82cb Doc/library/zlib.rst --- a/Doc/library/zlib.rst Wed Nov 18 02:46:16 2015 +0000 +++ b/Doc/library/zlib.rst Wed Nov 18 07:36:05 2015 +0000 @@ -224,26 +224,31 @@ .. method:: Decompress.decompress(data[, max_length]) - Decompress *data*, returning a bytes object containing the uncompressed data - corresponding to at least part of the data in *string*. This data should be + Decompress *data*, returning a bytes object containing + the uncompressed data. This data should be concatenated to the output produced by any preceding calls to the - :meth:`decompress` method. Some of the input data may be preserved in internal - buffers for later processing. + :meth:`.decompress` method. If the optional parameter *max_length* is non-zero then the return value will be no longer than *max_length*. This may mean that not all of the compressed input - can be processed; and unconsumed data will be stored in the attribute - :attr:`unconsumed_tail`. This bytestring must be passed to a subsequent call to - :meth:`decompress` if decompression is to continue. If *max_length* is not - supplied then the whole input is decompressed, and :attr:`unconsumed_tail` is + can be processed. Unconsumed data will be preserved in internal + buffers for later processing, and/or stored in the attribute + :attr:`.unconsumed_tail`. This bytestring must be passed to a subsequent call to + :meth:`.decompress` if decompression is to continue. If *max_length* is not + supplied then the whole input is decompressed, and :attr:`.unconsumed_tail` is empty. .. method:: Decompress.flush([length]) + .. deprecated:: 3.6 + Calling this method is either unnecessary, because :meth:`.decompress` + returns as much data as possible, or it defeats the purpose of the + *max_length* limit. + All pending input is processed, and a bytes object containing the remaining - uncompressed output is returned. After calling :meth:`flush`, the - :meth:`decompress` method cannot be called again; the only realistic action is + uncompressed output is returned. After calling :meth:`.flush`, the + :meth:`.decompress` method cannot be called again; the only realistic action is to delete the object. The optional parameter *length* sets the initial size of the output buffer. diff -r cdd403dd82cb Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Wed Nov 18 02:46:16 2015 +0000 +++ b/Doc/whatsnew/3.6.rst Wed Nov 18 07:36:05 2015 +0000 @@ -181,7 +181,10 @@ Deprecated Python modules, functions and methods ------------------------------------------------ -* None yet. +* Calling :meth:`zlib.Decompress.flush` now raises a warning. Calling it + is either unnecessary, because :meth:`~zlib.Decompress.decompress` + returns as much data as possible since Python 2.1, or it defeats + the *max_length* limit. See :issue:`23200`. Deprecated functions and types of the C API diff -r cdd403dd82cb Lib/encodings/zlib_codec.py --- a/Lib/encodings/zlib_codec.py Wed Nov 18 02:46:16 2015 +0000 +++ b/Lib/encodings/zlib_codec.py Wed Nov 18 07:36:05 2015 +0000 @@ -47,11 +47,7 @@ self.decompressobj = zlib.decompressobj() def decode(self, input, final=False): - if final: - c = self.decompressobj.decompress(input) - return c + self.decompressobj.flush() - else: - return self.decompressobj.decompress(input) + return self.decompressobj.decompress(input) def reset(self): self.decompressobj = zlib.decompressobj() diff -r cdd403dd82cb Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Wed Nov 18 02:46:16 2015 +0000 +++ b/Lib/test/test_codecs.py Wed Nov 18 07:36:05 2015 +0000 @@ -2682,6 +2682,13 @@ self.assertEqual(size, len(o)) self.assertEqual(i, binput) + encoder = codecs.getincrementalencoder(encoding)() + o_inc = encoder.encode(binput, True) + self.assertEqual(o_inc, o) + decoder = codecs.getincrementaldecoder(encoding)() + i = decoder.decode(o, True) + self.assertEqual(i, binput) + def test_read(self): for encoding in bytes_transform_encodings: with self.subTest(encoding=encoding): diff -r cdd403dd82cb Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py Wed Nov 18 02:46:16 2015 +0000 +++ b/Lib/test/test_zlib.py Wed Nov 18 07:36:05 2015 +0000 @@ -1,9 +1,11 @@ import unittest from test import support import binascii +from contextlib import contextmanager import pickle import random import sys +import warnings from test.support import bigmemtest, _1G, _4G zlib = support.import_module('zlib') @@ -119,8 +121,9 @@ def test_decompressobj_badflush(self): # verify failure on calling decompressobj.flush with bad params - self.assertRaises(ValueError, zlib.decompressobj().flush, 0) - self.assertRaises(ValueError, zlib.decompressobj().flush, -1) + with ignore_flush_warning(): + self.assertRaises(ValueError, zlib.decompressobj().flush, 0) + self.assertRaises(ValueError, zlib.decompressobj().flush, -1) class BaseCompressTestCase(object): @@ -212,9 +215,13 @@ self.assertEqual(x1 + x2, datazip) for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): dco = zlib.decompressobj() - y1 = dco.decompress(v1 + v2) - y2 = dco.flush() - self.assertEqual(data, y1 + y2) + y = dco.decompress(v1 + v2) + self.assertEqual(data, y) + self.assertIsInstance(dco.unconsumed_tail, bytes) + self.assertIsInstance(dco.unused_data, bytes) + + with ignore_flush_warning(): + self.assertEqual(b"", dco.flush()) self.assertIsInstance(dco.unconsumed_tail, bytes) self.assertIsInstance(dco.unused_data, bytes) @@ -229,9 +236,8 @@ x1 = co.compress(HAMLET_SCENE) x2 = co.flush() dco = zlib.decompressobj(wbits) - y1 = dco.decompress(x1 + x2) - y2 = dco.flush() - self.assertEqual(HAMLET_SCENE, y1 + y2) + y = dco.decompress(x1 + x2) + self.assertEqual(HAMLET_SCENE, y) # keyword arguments should also be supported zlib.compressobj(level=level, method=method, wbits=wbits, @@ -248,11 +254,10 @@ combuf = b''.join(bufs) dco = zlib.decompressobj() - y1 = dco.decompress(b''.join(bufs)) - y2 = dco.flush() - self.assertEqual(data, y1 + y2) + y = dco.decompress(b''.join(bufs)) + self.assertEqual(data, y) - def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): + def test_decompinc(self, source=None, cx=256, dcx=64): # compress object in steps, decompress object in steps source = source or HAMLET_SCENE data = source * 128 @@ -277,24 +282,20 @@ "(A) uct should be b'': not %d long" % len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) - if flush: - bufs.append(dco.flush()) - else: - while True: - chunk = dco.decompress(b'') - if chunk: - bufs.append(chunk) - else: - break + self.assertEqual(data, b''.join(bufs)) + + self.assertEqual(b'', dco.decompress(b'')) self.assertEqual(b'', dco.unconsumed_tail, ######## "(B) uct should be b'': not %d long" % len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) - self.assertEqual(data, b''.join(bufs)) - # Failure means: "decompressobj with init options failed" - def test_decompincflush(self): - self.test_decompinc(flush=True) + with ignore_flush_warning(): + self.assertEqual(b'', dco.flush()) + self.assertEqual(b'', dco.unconsumed_tail, ######## + "(B) uct should be b'': not %d long" % + len(dco.unconsumed_tail)) + self.assertEqual(b'', dco.unused_data) def test_decompimax(self, source=None, cx=256, dcx=64): # compress in steps, decompress in length-restricted steps @@ -320,7 +321,12 @@ 'chunk too big (%d>%d)' % (len(chunk), dcx)) bufs.append(chunk) cb = dco.unconsumed_tail - bufs.append(dco.flush()) + # The loop above does not check for the end of the stream, so there + # may be more than dcx bytes possible in the final call. Calling + # flush() will return them all, since it does not accept a max_length + # parameter. + with ignore_flush_warning(): + bufs.append(dco.flush()) self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') def test_decompressmaxlen(self, flush=False): @@ -346,7 +352,8 @@ bufs.append(chunk) cb = dco.unconsumed_tail if flush: - bufs.append(dco.flush()) + with ignore_flush_warning(): + bufs.append(dco.flush()) else: while chunk: chunk = dco.decompress(b'', max_length) @@ -435,7 +442,13 @@ co = zlib.compressobj(zlib.Z_BEST_COMPRESSION) self.assertTrue(co.flush()) # Returns a zlib header dco = zlib.decompressobj() - self.assertEqual(dco.flush(), b"") # Returns nothing + with ignore_flush_warning(): + self.assertEqual(dco.flush(), b"") # Returns nothing + + def test_flush_deprecated(self): + dco = zlib.decompressobj() + with self.assertWarnsRegex(DeprecationWarning, "Decompress.flush"): + dco.flush() def test_dictionary(self): h = HAMLET_SCENE @@ -448,7 +461,7 @@ cd = co.compress(h) + co.flush() # Verify that it will decompress with the dictionary. dco = zlib.decompressobj(zdict=zdict) - self.assertEqual(dco.decompress(cd) + dco.flush(), h) + self.assertEqual(dco.decompress(cd), h) # Verify that it fails when not given the dictionary. dco = zlib.decompressobj() self.assertRaises(zlib.error, dco.decompress, cd) @@ -476,8 +489,9 @@ # (see issue #8672). dco = zlib.decompressobj() y = dco.decompress(x[:-5]) - y += dco.flush() self.assertEqual(y, b'foo') + with ignore_flush_warning(): + self.assertEqual(dco.flush(), b'') def test_decompress_eof(self): x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' @@ -487,7 +501,8 @@ self.assertFalse(dco.eof) dco.decompress(x[-5:]) self.assertTrue(dco.eof) - dco.flush() + with ignore_flush_warning(): + dco.flush() self.assertTrue(dco.eof) def test_decompress_eof_incomplete_stream(self): @@ -496,7 +511,8 @@ self.assertFalse(dco.eof) dco.decompress(x[:-5]) self.assertFalse(dco.eof) - dco.flush() + with ignore_flush_warning(): + dco.flush() self.assertFalse(dco.eof) def test_decompress_unused_data(self): @@ -519,12 +535,17 @@ else: data += dco.decompress( dco.unconsumed_tail + x[i : i + step], maxlen) - data += dco.flush() self.assertTrue(dco.eof) self.assertEqual(data, source) self.assertEqual(dco.unconsumed_tail, b'') self.assertEqual(dco.unused_data, remainder) + with ignore_flush_warning(): + self.assertEqual(dco.flush(), b'') + self.assertTrue(dco.eof) + self.assertEqual(dco.unconsumed_tail, b'') + self.assertEqual(dco.unused_data, remainder) + def test_flush_with_freed_input(self): # Issue #16411: decompressor accesses input to last decompress() call # in flush(), even if this object has been freed in the meanwhile. @@ -535,7 +556,8 @@ dco.decompress(data, 1) del data data = zlib.compress(input2) - self.assertEqual(dco.flush(), input1[1:]) + with ignore_flush_warning(): + self.assertEqual(dco.flush(), input1[1:]) @requires_Compress_copy def test_compresscopy(self): @@ -598,7 +620,8 @@ data = zlib.compress(HAMLET_SCENE) d = zlib.decompressobj() d.decompress(data) - d.flush() + with ignore_flush_warning(): + d.flush() self.assertRaises(ValueError, d.copy) def test_compresspickle(self): @@ -622,7 +645,7 @@ @bigmemtest(size=_1G + 1024 * 1024, memuse=2) def test_big_decompress_buffer(self, size): d = zlib.decompressobj() - decompress = lambda s: d.decompress(s) + d.flush() + decompress = lambda s: d.decompress(s) self.check_big_decompress_buffer(size, decompress) @bigmemtest(size=_4G + 100, memuse=1, dry_run=False) @@ -637,6 +660,14 @@ data = None +@contextmanager +def ignore_flush_warning(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", + category=DeprecationWarning, message="Decompress.flush") + yield + + def genblock(seed, length, step=1024, generator=random): """length-byte stream of random data from a seed (in step-byte blocks).""" if seed is not None: diff -r cdd403dd82cb Lib/zipfile.py --- a/Lib/zipfile.py Wed Nov 18 02:46:16 2015 +0000 +++ b/Lib/zipfile.py Wed Nov 18 07:36:05 2015 +0000 @@ -919,10 +919,8 @@ n = max(n, self.MIN_READ_SIZE) data = self._decompressor.decompress(data, n) self._eof = (self._decompressor.eof or - self._compress_left <= 0 and + not data and not self._decompressor.unconsumed_tail) - if self._eof: - data += self._decompressor.flush() else: data = self._decompressor.decompress(data) self._eof = self._decompressor.eof or self._compress_left <= 0 diff -r cdd403dd82cb Misc/NEWS --- a/Misc/NEWS Wed Nov 18 02:46:16 2015 +0000 +++ b/Misc/NEWS Wed Nov 18 07:36:05 2015 +0000 @@ -95,6 +95,10 @@ Library ------- +- Issue #23200: Deprecate the zlib decompressor's flush() method. + Calling it is either unnecessary, or it defeats the max_length limit + used with decompress(). + - Issue #6973: When we know a subprocess.Popen process has died, do not allow the send_signal(), terminate(), or kill() methods to do anything as they could potentially signal a different process. diff -r cdd403dd82cb Modules/zlibmodule.c --- a/Modules/zlibmodule.c Wed Nov 18 02:46:16 2015 +0000 +++ b/Modules/zlibmodule.c Wed Nov 18 07:36:05 2015 +0000 @@ -693,15 +693,11 @@ The binary data to decompress. max_length: uint = 0 The maximum allowable length of the decompressed data. - Unconsumed input data will be stored in - the unconsumed_tail attribute. + Unconsumed input data will be stored in internal buffers for later + processing, and/or the unconsumed_tail attribute. / Return a bytes object containing the decompressed version of the data. - -After calling this function, some of the input data may still be stored in -internal buffers for later processing. -Call the flush() method to clear these buffers. [clinic start generated code]*/ static PyObject * @@ -1065,6 +1061,11 @@ unsigned long start_total_out; Py_ssize_t size; + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Decompress.flush() is deprecated", 1) < 0) { + return NULL; + } + if (length == 0) { PyErr_SetString(PyExc_ValueError, "length must be greater than zero"); return NULL;