# HG changeset patch # Parent 12ad8c9757665da686939171ec6070d81ad4e811 Issue #12646: Add an 'eof' attribute to zlib.Decompress. This will make it easier to detect truncated input streams. Also, make zlib's error messages more consistent. diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -152,7 +152,7 @@ compress a set of data that share a common initial prefix. -Decompression objects support the following methods, and two attributes: +Decompression objects support the following methods and attributes: .. attribute:: Decompress.unused_data @@ -162,13 +162,6 @@ available. If the whole bytestring turned out to contain compressed data, this is ``b""``, an empty bytes object. - The only way to determine where a bytestring of compressed data ends is by actually - decompressing it. This means that when compressed data is contained part of a - larger file, you can only find the end of it by reading data and feeding it - followed by some non-empty bytestring into a decompression object's - :meth:`decompress` method until the :attr:`unused_data` attribute is no longer - empty. - .. attribute:: Decompress.unconsumed_tail @@ -179,6 +172,17 @@ :meth:`decompress` method call in order to get correct output. +.. attribute:: Decompress.eof + + A boolean indicating whether the end of the compressed data stream has been + reached. + + This makes it possible to distinguish between a properly-formed compressed + stream, and an incomplete or truncated one. + + .. versionadded:: 3.3 + + .. method:: Decompress.decompress(data[, max_length]) Decompress *data*, returning a bytes object containing the uncompressed data diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -447,6 +447,26 @@ y += dco.flush() self.assertEqual(y, b'foo') + def test_decompress_eof(self): + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' + dco = zlib.decompressobj() + self.assertFalse(dco.eof) + dco.decompress(x[:-5]) + self.assertFalse(dco.eof) + dco.decompress(x[-5:]) + self.assertTrue(dco.eof) + dco.flush() + self.assertTrue(dco.eof) + + def test_decompress_eof_incomplete_stream(self): + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' + dco = zlib.decompressobj() + self.assertFalse(dco.eof) + dco.decompress(x[:-5]) + self.assertFalse(dco.eof) + dco.flush() + self.assertFalse(dco.eof) + if hasattr(zlib.compressobj(), "copy"): def test_compresscopy(self): # Test copying a compression object diff --git a/Misc/NEWS b/Misc/NEWS --- a/Misc/NEWS +++ b/Misc/NEWS @@ -249,6 +249,9 @@ Library ------- +- Issue #12646: Add an 'eof' attribute to zlib.Decompress, to make it easier to + detect truncated input streams. + - Issue #12540: Prevent zombie IDLE processes on Windows due to changes in os.kill(). diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -43,6 +43,7 @@ z_stream zst; PyObject *unused_data; PyObject *unconsumed_tail; + char eof; int is_initialised; #ifdef WITH_THREAD PyThread_type_lock lock; @@ -89,6 +90,7 @@ self = PyObject_New(compobject, type); if (self == NULL) return NULL; + self->eof = 0; self->is_initialised = 0; self->unused_data = PyBytes_FromStringAndSize("", 0); if (self->unused_data == NULL) { @@ -291,7 +293,7 @@ err = inflateEnd(&zst); if (err != Z_OK) { - zlib_error(zst, err, "while finishing data decompression"); + zlib_error(zst, err, "while finishing decompression"); goto error; } @@ -476,7 +478,7 @@ */ if (err != Z_OK && err != Z_BUF_ERROR) { - zlib_error(self->zst, err, "while compressing"); + zlib_error(self->zst, err, "while compressing data"); Py_DECREF(RetVal); RetVal = NULL; goto error; @@ -611,12 +613,13 @@ Py_DECREF(RetVal); goto error; } + self->eof = 1; /* We will only get Z_BUF_ERROR if the output buffer was full but there wasn't more output when we tried again, so it is not an error condition. */ } else if (err != Z_OK && err != Z_BUF_ERROR) { - zlib_error(self->zst, err, "while decompressing"); + zlib_error(self->zst, err, "while decompressing data"); Py_DECREF(RetVal); RetVal = NULL; goto error; @@ -697,7 +700,7 @@ if (err == Z_STREAM_END && flushmode == Z_FINISH) { err = deflateEnd(&(self->zst)); if (err != Z_OK) { - zlib_error(self->zst, err, "from deflateEnd()"); + zlib_error(self->zst, err, "while finishing compression"); Py_DECREF(RetVal); RetVal = NULL; goto error; @@ -765,6 +768,7 @@ Py_XDECREF(retval->unconsumed_tail); retval->unused_data = self->unused_data; retval->unconsumed_tail = self->unconsumed_tail; + retval->eof = self->eof; /* Mark it as being initialized */ retval->is_initialised = 1; @@ -816,6 +820,7 @@ Py_XDECREF(retval->unconsumed_tail); retval->unused_data = self->unused_data; retval->unconsumed_tail = self->unconsumed_tail; + retval->eof = self->eof; /* Mark it as being initialized */ retval->is_initialised = 1; @@ -885,10 +890,11 @@ various data structures. Note we should only get Z_STREAM_END when flushmode is Z_FINISH */ if (err == Z_STREAM_END) { + self->eof = 1; + self->is_initialised = 0; err = inflateEnd(&(self->zst)); - self->is_initialised = 0; if (err != Z_OK) { - zlib_error(self->zst, err, "from inflateEnd()"); + zlib_error(self->zst, err, "while finishing decompression"); Py_DECREF(retval); retval = NULL; goto error; @@ -936,6 +942,7 @@ static PyMemberDef Decomp_members[] = { {"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY}, {"unconsumed_tail", T_OBJECT, COMP_OFF(unconsumed_tail), READONLY}, + {"eof", T_BOOL, COMP_OFF(eof), READONLY}, {NULL}, };