Index: Lib/gzip.py =================================================================== --- Lib/gzip.py (revision 76872) +++ Lib/gzip.py (working copy) @@ -7,6 +7,7 @@ import struct, sys, time, os import zlib +import io import __builtin__ __all__ = ["GzipFile","open"] @@ -32,7 +33,7 @@ """ return GzipFile(filename, mode, compresslevel) -class GzipFile: +class GzipFile(io.BufferedIOBase): """The GzipFile class simulates most of the methods of a file object with the exception of the readinto() and truncate() methods. @@ -97,8 +98,12 @@ self.mode = READ # Set flag indicating start of a new member self._new_member = True + # Buffer data read from gzip file. extrastart is offset in + # stream where buffer starts. extrasize is number of + # bytes remaining in buffer from current stream position. self.extrabuf = "" self.extrasize = 0 + self.extrastart = 0 self.name = filename # Starts small, scales exponentially self.min_readsize = 100 @@ -196,7 +201,6 @@ if flag & FHCRC: self.fileobj.read(2) # Read & discard the 16-bit header CRC - def write(self,data): if self.mode != WRITE: import errno @@ -204,12 +208,19 @@ if self.fileobj is None: raise ValueError, "write() on closed GzipFile object" + + # Convert data type if called by io.BufferedWriter. + if isinstance(data, memoryview): + data = data.tobytes() + if len(data) > 0: self.size = self.size + len(data) self.crc = zlib.crc32(data, self.crc) & 0xffffffffL self.fileobj.write( self.compress.compress(data) ) self.offset += len(data) + return len(data) + def read(self, size=-1): if self.mode != READ: import errno @@ -235,15 +246,14 @@ if size > self.extrasize: size = self.extrasize - chunk = self.extrabuf[:size] - self.extrabuf = self.extrabuf[size:] + offset = self.offset - self.extrastart + chunk = self.extrabuf[offset: offset + size] self.extrasize = self.extrasize - size self.offset += size return chunk def _unread(self, buf): - self.extrabuf = buf + self.extrabuf self.extrasize = len(buf) + self.extrasize self.offset -= len(buf) @@ -299,8 +309,10 @@ def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffffL - self.extrabuf = self.extrabuf + data + offset = self.offset - self.extrastart + self.extrabuf = self.extrabuf[offset:] + data self.extrasize = self.extrasize + len(data) + self.extrastart = self.offset self.size = self.size + len(data) def _read_eof(self): @@ -318,6 +330,10 @@ elif isize != (self.size & 0xffffffffL): raise IOError, "Incorrect length of data produced" + @property + def closed(self): + return self.fileobj is None + def close(self): if self.fileobj is None: return @@ -333,15 +349,6 @@ self.myfileobj.close() self.myfileobj = None - def __del__(self): - try: - if (self.myfileobj is None and - self.fileobj is None): - return - except AttributeError: - return - self.close() - def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): if self.mode == WRITE: # Ensure the compressor's buffer is flushed @@ -356,12 +363,6 @@ """ return self.fileobj.fileno() - def isatty(self): - return False - - def tell(self): - return self.offset - def rewind(self): '''Return the uncompressed stream file position indicator to the beginning of the file''' @@ -371,8 +372,18 @@ self._new_member = True self.extrabuf = "" self.extrasize = 0 + self.extrastart = 0 self.offset = 0 + def readable(self): + return self.mode == READ + + def writable(self): + return self.mode == WRITE + + def seekable(self): + return True + def seek(self, offset, whence=0): if whence: if whence == 1: @@ -395,8 +406,18 @@ self.read(1024) self.read(count % 1024) + return self.offset + def readline(self, size=-1): if size < 0: + # Shortcut common case - newline found in buffer. + offset = self.offset - self.extrastart + i = self.extrabuf.find('\n', offset) + 1 + if i > 0: + self.extrasize -= i - offset + self.offset += i - offset + return self.extrabuf[offset: i] + size = sys.maxint readsize = self.min_readsize else: @@ -426,43 +447,7 @@ self.min_readsize = min(readsize, self.min_readsize * 2, 512) return ''.join(bufs) # Return resulting line - def readlines(self, sizehint=0): - # Negative numbers result in reading all the lines - if sizehint <= 0: - sizehint = sys.maxint - L = [] - while sizehint > 0: - line = self.readline() - if line == "": - break - L.append(line) - sizehint = sizehint - len(line) - return L - - def writelines(self, L): - for line in L: - self.write(line) - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if line: - return line - else: - raise StopIteration - - def __enter__(self): - if self.fileobj is None: - raise ValueError("I/O operation on closed GzipFile object") - return self - - def __exit__(self, *args): - self.close() - - def _test(): # Act like gzip; with -d, act like gunzip. # The input file is not deleted, however, nor are any other gzip Index: Lib/test/test_gzip.py =================================================================== --- Lib/test/test_gzip.py (revision 76872) +++ Lib/test/test_gzip.py (working copy) @@ -5,6 +5,7 @@ import unittest from test import test_support import os +import io import struct gzip = test_support.import_module('gzip') @@ -80,7 +81,16 @@ zgfile.close() self.assertEquals(contents, 'a'*201) + def test_buffered_reader_7471(self): + self.test_write() + f = gzip.GzipFile(self.filename, 'rb') + r = io.BufferedReader(f) + lines = [line for line in r] + r.close() + + self.assertEqual(lines, 50 * data1.splitlines(True)) + def test_readline(self): self.test_write() # Try .readline() with varying line lengths