CHUNK_SIZE = 16 * 8192 class GzipFile(gzip.GzipFile): "Version of GzipFile with larger read sizes and thus faster seeks." def seek(self, offset, whence=0): if whence: if whence == 1: offset = self.offset + offset else: raise ValueError('Seek from end not supported') if self.mode == gzip.WRITE: if offset < self.offset: raise IOError('Negative seek in write mode') count = offset - self.offset for i in xrange(count // CHUNK_SIZE): self.write(CHUNK_SIZE * '\0') self.write((count % CHUNK_SIZE) * '\0') elif self.mode == gzip.READ: if offset < self.offset: # for negative seek, rewind and do positive seek self.rewind() count = offset - self.offset for i in xrange(count // CHUNK_SIZE): self.read(CHUNK_SIZE) self.read(count % CHUNK_SIZE) return self.offset def read(self, size=-1): self._check_closed() if self.mode != gzip.READ: import errno raise IOError(errno.EBADF, "read() on write-only GzipFile object") if self.extrasize <= 0 and self.fileobj is None: return '' readsize = CHUNK_SIZE if size < 0: # get the whole thing try: while True: self._read(readsize) readsize = min(self.max_read_chunk, readsize * 2) except EOFError: size = self.extrasize else: # just get some more of it try: while size > self.extrasize: self._read(readsize) readsize = min(self.max_read_chunk, readsize * 2) except EOFError: if size > self.extrasize: size = self.extrasize offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] self.extrasize = self.extrasize - size self.offset += size return chunk