diff -r 1fde0d70f2b8 Doc/library/gzip.rst --- a/Doc/library/gzip.rst Fri Nov 09 18:58:47 2012 -0800 +++ b/Doc/library/gzip.rst Sat Nov 10 17:06:13 2012 +0100 @@ -54,6 +54,10 @@ and the *encoding*, *errors* and *newline* arguments. +.. class:: BadGzipfile + + Is raised when encountering invalid gzip files; extends :class:`IOError`. + .. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None) Constructor for the :class:`GzipFile` class, which simulates most of the diff -r 1fde0d70f2b8 Doc/whatsnew/3.4.rst --- a/Doc/whatsnew/3.4.rst Fri Nov 09 18:58:47 2012 -0800 +++ b/Doc/whatsnew/3.4.rst Sat Nov 10 17:06:13 2012 +0100 @@ -149,7 +149,7 @@ Improved Modules ================ -* None yet. +* The :module:`gzip` module throws a specific :class:`BadGzipFile` in case of invalid or corrupted files. Optimizations diff -r 1fde0d70f2b8 Lib/gzip.py --- a/Lib/gzip.py Fri Nov 09 18:58:47 2012 -0800 +++ b/Lib/gzip.py Sat Nov 10 17:06:13 2012 +0100 @@ -10,7 +10,7 @@ import builtins import io -__all__ = ["GzipFile", "open", "compress", "decompress"] +__all__ = ["GzipFile", "BadGzipFile", "open", "compress", "decompress"] FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 @@ -124,6 +124,12 @@ return getattr(self.file, name) +class BadGzipFile(IOError): + """ + Exception raised if the processed gzip file is not valid. + """ + + class GzipFile(io.BufferedIOBase): """The GzipFile class simulates most of the methods of a file object with the exception of the readinto() and truncate() methods. @@ -286,10 +292,10 @@ raise EOFError("Reached EOF") if magic != b'\037\213': - raise IOError('Not a gzipped file') + raise BadGzipFile('Not a gzipped file') method = ord( self.fileobj.read(1) ) if method != 8: - raise IOError('Unknown compression method') + raise BadGzipFile('Unknown compression method') flag = ord( self.fileobj.read(1) ) self.mtime = read32(self.fileobj) # extraflag = self.fileobj.read(1) @@ -486,10 +492,10 @@ crc32 = read32(self.fileobj) isize = read32(self.fileobj) # may exceed 2GB if crc32 != self.crc: - raise IOError("CRC check failed %s != %s" % (hex(crc32), - hex(self.crc))) + raise BadGzipFile("CRC check failed %s != %s" % + (hex(crc32), hex(self.crc))) elif isize != (self.size & 0xffffffff): - raise IOError("Incorrect length of data produced") + raise BadGzipFile("Incorrect length of data produced") # Gzip files can be padded with zeroes and still have archives. # Consume all zero bytes and set the file position to the first diff -r 1fde0d70f2b8 Lib/test/test_gzip.py --- a/Lib/test/test_gzip.py Fri Nov 09 18:58:47 2012 -0800 +++ b/Lib/test/test_gzip.py Sat Nov 10 17:06:13 2012 +0100 @@ -307,6 +307,17 @@ d = f.read() self.assertEqual(d, data1 * 50, "Incorrect data in file") + def test_gzip_exception(self): + self.assertTrue(issubclass(gzip.BadGzipFile, IOError)) + + def test_bad_gzip_file(self): + with open(self.filename, 'wb') as file: + file.write(data1 * 50) + + with gzip.GzipFile(self.filename, 'r') as file: + self.assertRaises(gzip.BadGzipFile, file.readlines) + self.assertRaises(IOError, file.readlines) + def test_non_seekable_file(self): uncompressed = data1 * 50 buf = UnseekableIO()