diff -r 50606131a987 Lib/gzip.py --- a/Lib/gzip.py Sun Jan 13 22:24:27 2013 +0200 +++ b/Lib/gzip.py Mon Jan 14 20:53:39 2013 +0200 @@ -65,9 +65,6 @@ # or unsigned. output.write(struct.pack(" self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: + while size > self.extrasize: + if not self._read(readsize): size = self.extrasize + break + readsize = min(self.max_read_chunk, readsize * 2) offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -385,12 +376,9 @@ if self.extrasize <= 0 and self.fileobj is None: return b'' - try: - # For certain input data, a single call to _read() may not return - # any data. In this case, retry until we get some data or reach EOF. - while self.extrasize <= 0: - self._read() - except EOFError: + # For certain input data, a single call to _read() may not return + # any data. In this case, retry until we get some data or reach EOF. + while self.extrasize <= 0 and self._read(): pass if size < 0 or size > self.extrasize: size = self.extrasize @@ -413,12 +401,9 @@ if self.extrasize == 0: if self.fileobj is None: return b'' - try: - # Ensure that we don't return b"" if we haven't reached EOF. - while self.extrasize == 0: - # 1024 is the same buffering heuristic used in read() - self._read(max(n, 1024)) - except EOFError: + # Ensure that we don't return b"" if we haven't reached EOF. + # 1024 is the same buffering heuristic used in read() + while self.extrasize == 0 and self._read(max(n, 1024)): pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -431,45 +416,52 @@ def _read(self, size=1024): if self.fileobj is None: - raise EOFError("Reached EOF") + return False - if self._new_member: - # If the _new_member flag is set, we have to - # jump to the next member, if there is one. - self._init_read() - self._read_gzip_header() - self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) - self._new_member = False + try: + if self._new_member: + # If the _new_member flag is set, we have to + # jump to the next member, if there is one. + self._init_read() + if not self._read_gzip_header(): + return False + self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) + self._new_member = False - # Read a chunk of data from the file - buf = self.fileobj.read(size) + # Read a chunk of data from the file + buf = self.fileobj.read(size) - # If the EOF has been reached, flush the decompression object - # and mark this object as finished. + # If the EOF has been reached, flush the decompression object + # and mark this object as finished. - if buf == b"": - uncompress = self.decompress.flush() - # Prepend the already read bytes to the fileobj to they can be - # seen by _read_eof() - self.fileobj.prepend(self.decompress.unused_data, True) - self._read_eof() + if buf == b"": + uncompress = self.decompress.flush() + # Prepend the already read bytes to the fileobj to they can be + # seen by _read_eof() + self.fileobj.prepend(self.decompress.unused_data, True) + self._read_eof() + self._add_read_data( uncompress ) + return False + + uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) - raise EOFError('Reached EOF') - uncompress = self.decompress.decompress(buf) - self._add_read_data( uncompress ) - - if self.decompress.unused_data != b"": - # Ending case: we've come to the end of a member in the file, - # so seek back to the start of the unused data, finish up - # this member, and read a new gzip header. - # Prepend the already read bytes to the fileobj to they can be - # seen by _read_eof() and _read_gzip_header() - self.fileobj.prepend(self.decompress.unused_data, True) - # Check the CRC and file size, and set the flag so we read - # a new member on the next call - self._read_eof() - self._new_member = True + if self.decompress.unused_data != b"": + # Ending case: we've come to the end of a member in the file, + # so seek back to the start of the unused data, finish up + # this member, and read a new gzip header. + # Prepend the already read bytes to the fileobj to they can be + # seen by _read_eof() and _read_gzip_header() + self.fileobj.prepend(self.decompress.unused_data, True) + # Check the CRC and file size, and set the flag so we read + # a new member on the next call + self._read_eof() + self._new_member = True + except struct.error: + # _read_gzip_header() and _read_eof() raise struct.error on EOF + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffff @@ -484,8 +476,8 @@ # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB + # This function raises struct.error on incompleted footer. + crc32, isize = struct.unpack("