# HG changeset patch
# Parent  03708c680ecaea7537b0fe77d50d4c068d27a671
Issue #23200: Deprecate the zlib decompressor's flush() method

diff -r 03708c680eca Doc/library/zlib.rst
--- a/Doc/library/zlib.rst	Wed Feb 03 07:52:06 2016 +0000
+++ b/Doc/library/zlib.rst	Wed Feb 03 11:22:02 2016 +0000
@@ -218,26 +218,31 @@
 
 .. method:: Decompress.decompress(data[, max_length])
 
-   Decompress *data*, returning a bytes object containing the uncompressed data
-   corresponding to at least part of the data in *string*.  This data should be
+   Decompress *data*, returning a bytes object containing
+   the uncompressed data.  This data should be
    concatenated to the output produced by any preceding calls to the
-   :meth:`decompress` method.  Some of the input data may be preserved in internal
-   buffers for later processing.
+   :meth:`.decompress` method.
 
    If the optional parameter *max_length* is non-zero then the return value will be
    no longer than *max_length*. This may mean that not all of the compressed input
-   can be processed; and unconsumed data will be stored in the attribute
-   :attr:`unconsumed_tail`. This bytestring must be passed to a subsequent call to
-   :meth:`decompress` if decompression is to continue.  If *max_length* is not
-   supplied then the whole input is decompressed, and :attr:`unconsumed_tail` is
+   can be processed. Unconsumed data will be preserved in internal
+   buffers for later processing, and/or stored in the attribute
+   :attr:`.unconsumed_tail`. This bytestring must be passed to a subsequent call to
+   :meth:`.decompress` if decompression is to continue.  If *max_length* is not
+   supplied then the whole input is decompressed, and :attr:`.unconsumed_tail` is
    empty.
 
 
 .. method:: Decompress.flush([length])
 
+   .. deprecated:: 3.6
+      Calling this method is either unnecessary, because :meth:`.decompress`
+      returns as much data as possible, or it defeats the purpose of the
+      *max_length* limit.
+
    All pending input is processed, and a bytes object containing the remaining
-   uncompressed output is returned.  After calling :meth:`flush`, the
-   :meth:`decompress` method cannot be called again; the only realistic action is
+   uncompressed output is returned.  After calling :meth:`.flush`, the
+   :meth:`.decompress` method cannot be called again; the only realistic action is
    to delete the object.
 
    The optional parameter *length* sets the initial size of the output buffer.
diff -r 03708c680eca Doc/whatsnew/3.6.rst
--- a/Doc/whatsnew/3.6.rst	Wed Feb 03 07:52:06 2016 +0000
+++ b/Doc/whatsnew/3.6.rst	Wed Feb 03 11:22:02 2016 +0000
@@ -196,6 +196,11 @@
   been deprecated in previous versions of Python in favour of
   :meth:`importlib.abc.Loader.exec_module`.
 
+* Calling :meth:`zlib.Decompress.flush` now raises a warning.  Calling it
+  is either unnecessary, because :meth:`~zlib.Decompress.decompress`
+  returns as much data as possible since Python 2.1, or it defeats
+  the *max_length* limit.  See :issue:`23200`.
+
 
 Deprecated functions and types of the C API
 -------------------------------------------
diff -r 03708c680eca Lib/encodings/zlib_codec.py
--- a/Lib/encodings/zlib_codec.py	Wed Feb 03 07:52:06 2016 +0000
+++ b/Lib/encodings/zlib_codec.py	Wed Feb 03 11:22:02 2016 +0000
@@ -47,11 +47,7 @@
         self.decompressobj = zlib.decompressobj()
 
     def decode(self, input, final=False):
-        if final:
-            c = self.decompressobj.decompress(input)
-            return c + self.decompressobj.flush()
-        else:
-            return self.decompressobj.decompress(input)
+        return self.decompressobj.decompress(input)
 
     def reset(self):
         self.decompressobj = zlib.decompressobj()
diff -r 03708c680eca Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py	Wed Feb 03 07:52:06 2016 +0000
+++ b/Lib/test/test_codecs.py	Wed Feb 03 11:22:02 2016 +0000
@@ -2682,6 +2682,13 @@
                 self.assertEqual(size, len(o))
                 self.assertEqual(i, binput)
 
+                encoder = codecs.getincrementalencoder(encoding)()
+                o_inc = encoder.encode(binput, True)
+                self.assertEqual(o_inc, o)
+                decoder = codecs.getincrementaldecoder(encoding)()
+                i = decoder.decode(o, True)
+                self.assertEqual(i, binput)
+
     def test_read(self):
         for encoding in bytes_transform_encodings:
             with self.subTest(encoding=encoding):
diff -r 03708c680eca Lib/test/test_zlib.py
--- a/Lib/test/test_zlib.py	Wed Feb 03 07:52:06 2016 +0000
+++ b/Lib/test/test_zlib.py	Wed Feb 03 11:22:02 2016 +0000
@@ -1,9 +1,11 @@
 import unittest
 from test import support
 import binascii
+from contextlib import contextmanager
 import pickle
 import random
 import sys
+import warnings
 from test.support import bigmemtest, _1G, _4G
 
 zlib = support.import_module('zlib')
@@ -114,8 +116,9 @@
 
     def test_decompressobj_badflush(self):
         # verify failure on calling decompressobj.flush with bad params
-        self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
-        self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
+        with ignore_flush_warning():
+            self.assertRaises(ValueError, zlib.decompressobj().flush, 0)
+            self.assertRaises(ValueError, zlib.decompressobj().flush, -1)
 
     @support.cpython_only
     def test_overflow(self):
@@ -225,9 +228,13 @@
             self.assertEqual(x1 + x2, datazip)
         for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))):
             dco = zlib.decompressobj()
-            y1 = dco.decompress(v1 + v2)
-            y2 = dco.flush()
-            self.assertEqual(data, y1 + y2)
+            y = dco.decompress(v1 + v2)
+            self.assertEqual(data, y)
+            self.assertIsInstance(dco.unconsumed_tail, bytes)
+            self.assertIsInstance(dco.unused_data, bytes)
+
+            with ignore_flush_warning():
+                self.assertEqual(b"", dco.flush())
             self.assertIsInstance(dco.unconsumed_tail, bytes)
             self.assertIsInstance(dco.unused_data, bytes)
 
@@ -242,9 +249,8 @@
         x1 = co.compress(HAMLET_SCENE)
         x2 = co.flush()
         dco = zlib.decompressobj(wbits)
-        y1 = dco.decompress(x1 + x2)
-        y2 = dco.flush()
-        self.assertEqual(HAMLET_SCENE, y1 + y2)
+        y = dco.decompress(x1 + x2)
+        self.assertEqual(HAMLET_SCENE, y)
 
         # keyword arguments should also be supported
         zlib.compressobj(level=level, method=method, wbits=wbits,
@@ -261,11 +267,10 @@
         combuf = b''.join(bufs)
 
         dco = zlib.decompressobj()
-        y1 = dco.decompress(b''.join(bufs))
-        y2 = dco.flush()
-        self.assertEqual(data, y1 + y2)
+        y = dco.decompress(b''.join(bufs))
+        self.assertEqual(data, y)
 
-    def test_decompinc(self, flush=False, source=None, cx=256, dcx=64):
+    def test_decompinc(self, source=None, cx=256, dcx=64):
         # compress object in steps, decompress object in steps
         source = source or HAMLET_SCENE
         data = source * 128
@@ -290,24 +295,20 @@
                              "(A) uct should be b'': not %d long" %
                                        len(dco.unconsumed_tail))
             self.assertEqual(b'', dco.unused_data)
-        if flush:
-            bufs.append(dco.flush())
-        else:
-            while True:
-                chunk = dco.decompress(b'')
-                if chunk:
-                    bufs.append(chunk)
-                else:
-                    break
+        self.assertEqual(data, b''.join(bufs))
+
+        self.assertEqual(b'', dco.decompress(b''))
         self.assertEqual(b'', dco.unconsumed_tail, ########
                          "(B) uct should be b'': not %d long" %
                                        len(dco.unconsumed_tail))
         self.assertEqual(b'', dco.unused_data)
-        self.assertEqual(data, b''.join(bufs))
-        # Failure means: "decompressobj with init options failed"
 
-    def test_decompincflush(self):
-        self.test_decompinc(flush=True)
+        with ignore_flush_warning():
+            self.assertEqual(b'', dco.flush())
+        self.assertEqual(b'', dco.unconsumed_tail, ########
+                         "(B) uct should be b'': not %d long" %
+                                       len(dco.unconsumed_tail))
+        self.assertEqual(b'', dco.unused_data)
 
     def test_decompimax(self, source=None, cx=256, dcx=64):
         # compress in steps, decompress in length-restricted steps
@@ -333,7 +334,12 @@
                     'chunk too big (%d>%d)' % (len(chunk), dcx))
             bufs.append(chunk)
             cb = dco.unconsumed_tail
-        bufs.append(dco.flush())
+        # The loop above does not check for the end of the stream, so there
+        # may be more than dcx bytes possible in the final call.  Calling
+        # flush() will return them all, since it does not accept a max_length
+        # parameter.
+        with ignore_flush_warning():
+            bufs.append(dco.flush())
         self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved')
 
     def test_decompressmaxlen(self, flush=False):
@@ -359,7 +365,8 @@
             bufs.append(chunk)
             cb = dco.unconsumed_tail
         if flush:
-            bufs.append(dco.flush())
+            with ignore_flush_warning():
+                bufs.append(dco.flush())
         else:
             while chunk:
                 chunk = dco.decompress(b'', max_length)
@@ -463,7 +470,13 @@
         co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
         self.assertTrue(co.flush())  # Returns a zlib header
         dco = zlib.decompressobj()
-        self.assertEqual(dco.flush(), b"") # Returns nothing
+        with ignore_flush_warning():
+            self.assertEqual(dco.flush(), b"") # Returns nothing
+
+    def test_flush_deprecated(self):
+        dco = zlib.decompressobj()
+        with self.assertWarnsRegex(DeprecationWarning, "Decompress.flush"):
+            dco.flush()
 
     def test_dictionary(self):
         h = HAMLET_SCENE
@@ -476,7 +489,7 @@
         cd = co.compress(h) + co.flush()
         # Verify that it will decompress with the dictionary.
         dco = zlib.decompressobj(zdict=zdict)
-        self.assertEqual(dco.decompress(cd) + dco.flush(), h)
+        self.assertEqual(dco.decompress(cd), h)
         # Verify that it fails when not given the dictionary.
         dco = zlib.decompressobj()
         self.assertRaises(zlib.error, dco.decompress, cd)
@@ -504,8 +517,9 @@
         # (see issue #8672).
         dco = zlib.decompressobj()
         y = dco.decompress(x[:-5])
-        y += dco.flush()
         self.assertEqual(y, b'foo')
+        with ignore_flush_warning():
+            self.assertEqual(dco.flush(), b'')
 
     def test_decompress_eof(self):
         x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E'  # 'foo'
@@ -515,7 +529,8 @@
         self.assertFalse(dco.eof)
         dco.decompress(x[-5:])
         self.assertTrue(dco.eof)
-        dco.flush()
+        with ignore_flush_warning():
+            dco.flush()
         self.assertTrue(dco.eof)
 
     def test_decompress_eof_incomplete_stream(self):
@@ -524,7 +539,8 @@
         self.assertFalse(dco.eof)
         dco.decompress(x[:-5])
         self.assertFalse(dco.eof)
-        dco.flush()
+        with ignore_flush_warning():
+            dco.flush()
         self.assertFalse(dco.eof)
 
     def test_decompress_unused_data(self):
@@ -547,12 +563,17 @@
                     else:
                         data += dco.decompress(
                                 dco.unconsumed_tail + x[i : i + step], maxlen)
-                data += dco.flush()
                 self.assertTrue(dco.eof)
                 self.assertEqual(data, source)
                 self.assertEqual(dco.unconsumed_tail, b'')
                 self.assertEqual(dco.unused_data, remainder)
 
+                with ignore_flush_warning():
+                    self.assertEqual(dco.flush(), b'')
+                self.assertTrue(dco.eof)
+                self.assertEqual(dco.unconsumed_tail, b'')
+                self.assertEqual(dco.unused_data, remainder)
+
     def test_flush_with_freed_input(self):
         # Issue #16411: decompressor accesses input to last decompress() call
         # in flush(), even if this object has been freed in the meanwhile.
@@ -563,7 +584,8 @@
         dco.decompress(data, 1)
         del data
         data = zlib.compress(input2)
-        self.assertEqual(dco.flush(), input1[1:])
+        with ignore_flush_warning():
+            self.assertEqual(dco.flush(), input1[1:])
 
     @bigmemtest(size=_4G, memuse=1)
     def test_flush_large_length(self, size):
@@ -642,7 +664,8 @@
         data = zlib.compress(HAMLET_SCENE)
         d = zlib.decompressobj()
         d.decompress(data)
-        d.flush()
+        with ignore_flush_warning():
+            d.flush()
         self.assertRaises(ValueError, d.copy)
 
     def test_compresspickle(self):
@@ -666,7 +689,7 @@
     @bigmemtest(size=_1G + 1024 * 1024, memuse=2)
     def test_big_decompress_buffer(self, size):
         d = zlib.decompressobj()
-        decompress = lambda s: d.decompress(s) + d.flush()
+        decompress = lambda s: d.decompress(s)
         self.check_big_decompress_buffer(size, decompress)
 
     @bigmemtest(size=_4G + 100, memuse=1, dry_run=False)
@@ -681,6 +704,14 @@
             data = None
 
 
+@contextmanager
+def ignore_flush_warning():
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore",
+            category=DeprecationWarning, message="Decompress.flush")
+        yield
+
+
 def genblock(seed, length, step=1024, generator=random):
     """length-byte stream of random data from a seed (in step-byte blocks)."""
     if seed is not None:
diff -r 03708c680eca Lib/zipfile.py
--- a/Lib/zipfile.py	Wed Feb 03 07:52:06 2016 +0000
+++ b/Lib/zipfile.py	Wed Feb 03 11:22:02 2016 +0000
@@ -917,12 +917,11 @@
             self._eof = self._compress_left <= 0
         elif self._compress_type == ZIP_DEFLATED:
             n = max(n, self.MIN_READ_SIZE)
+            input_eof = not data
             data = self._decompressor.decompress(data, n)
-            self._eof = (self._decompressor.eof or
-                         self._compress_left <= 0 and
-                         not self._decompressor.unconsumed_tail)
-            if self._eof:
-                data += self._decompressor.flush()
+            # Stop decompressing either if the decompressor indicates EOF, or
+            # no more input is possible and we have got all possible output
+            self._eof = self._decompressor.eof or input_eof and not data
         else:
             data = self._decompressor.decompress(data)
             self._eof = self._decompressor.eof or self._compress_left <= 0
diff -r 03708c680eca Misc/NEWS
--- a/Misc/NEWS	Wed Feb 03 07:52:06 2016 +0000
+++ b/Misc/NEWS	Wed Feb 03 11:22:02 2016 +0000
@@ -166,6 +166,10 @@
 Library
 -------
 
+- Issue #23200: Deprecate the zlib decompressor's flush() method.
+  Calling it is either unnecessary, or it defeats the max_length limit
+  used with decompress().
+
 - Issue #26202: copy.deepcopy() now correctly copies range() objects with
   non-atomic attributes.
 
diff -r 03708c680eca Modules/zlibmodule.c
--- a/Modules/zlibmodule.c	Wed Feb 03 07:52:06 2016 +0000
+++ b/Modules/zlibmodule.c	Wed Feb 03 11:22:02 2016 +0000
@@ -692,15 +692,11 @@
         The binary data to decompress.
     max_length: capped_uint = 0
         The maximum allowable length of the decompressed data.
-        Unconsumed input data will be stored in
-        the unconsumed_tail attribute.
+        Unconsumed input data will be stored in internal buffers for later
+        processing, and/or the unconsumed_tail attribute.
     /
 
 Return a bytes object containing the decompressed version of the data.
-
-After calling this function, some of the input data may still be stored in
-internal buffers for later processing.
-Call the flush() method to clear these buffers.
 [clinic start generated code]*/
 
 static PyObject *
@@ -1058,6 +1054,11 @@
     unsigned long start_total_out;
     Py_ssize_t size;
 
+    if (PyErr_WarnEx(PyExc_DeprecationWarning,
+            "Decompress.flush() is deprecated", 1) < 0) {
+        return NULL;
+    }
+
     if (length == 0) {
         PyErr_SetString(PyExc_ValueError, "length must be greater than zero");
         return NULL;