Index: Lib/gzip.py =================================================================== --- Lib/gzip.py (revision 76854) +++ Lib/gzip.py (working copy) @@ -7,6 +7,7 @@ import struct, sys, time, os import zlib +import io import __builtin__ __all__ = ["GzipFile","open"] @@ -32,15 +33,9 @@ """ return GzipFile(filename, mode, compresslevel) -class GzipFile: - """The GzipFile class simulates most of the methods of a file object with - the exception of the readinto() and truncate() methods. +class GzipFile(io.BufferedRandom): + """Read and write gzip files.""" - """ - - myfileobj = None - max_read_chunk = 10 * 1024 * 1024 # 10Mb - def __init__(self, filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None): """Constructor for the GzipFile class. @@ -80,6 +75,36 @@ """ + self._gzip = _GzipFileRaw(filename, mode, compresslevel, fileobj, mtime) + io.BufferedRandom.__init__(self, self._gzip) + self._gzip._setmode() + + # + # Seek with support of keyword arguments missing from io.BufferedRandom. + # + def seek(self, offset, whence=io.SEEK_SET): + return io.BufferedRandom.seek(self, offset, whence) + + # + # Delegate attributes of internal gzip object for compatibility + # with existing code, but degrades performance by 15% even if never invoked. + # + def __getattr__(self, name): + return getattr(self._gzip, name) + +class _GzipFileRaw: + """The __GzipFileRaw class simulates most of the methods of a file object with + the exception of truncate() method. + + """ + + myfileobj = None + max_read_chunk = 10 * 1024 * 1024 # 10Mb + + def __init__(self, filename=None, mode=None, + compresslevel=9, fileobj=None, mtime=None): + self._modeset = False + # guarantee the file is opened in binary mode on platforms # that care about that sort of thing if mode and 'b' not in mode: @@ -198,6 +223,9 @@ def write(self,data): + if type(data) is memoryview: + data = data.tobytes() + if self.mode != WRITE: import errno raise IOError(errno.EBADF, "write() on read-only GzipFile object") @@ -210,6 +238,13 @@ self.fileobj.write( self.compress.compress(data) ) self.offset += len(data) + return len(data) + + def readinto(self, b): + chunk = self.read(len(b)) + b[: len(chunk)] = chunk + return len(chunk) + def read(self, size=-1): if self.mode != READ: import errno @@ -242,11 +277,6 @@ self.offset += size return chunk - def _unread(self, buf): - self.extrabuf = buf + self.extrabuf - self.extrasize = len(buf) + self.extrasize - self.offset -= len(buf) - def _read(self, size=1024): if self.fileobj is None: raise EOFError, "Reached EOF" @@ -318,6 +348,10 @@ elif isize != (self.size & 0xffffffffL): raise IOError, "Incorrect length of data produced" + @property + def closed(self): + return self.fileobj is None + def close(self): if self.fileobj is None: return @@ -373,6 +407,9 @@ self.extrasize = 0 self.offset = 0 + def seekable(self): + return True + def seek(self, offset, whence=0): if whence: if whence == 1: @@ -395,74 +432,22 @@ self.read(1024) self.read(count % 1024) - def readline(self, size=-1): - if size < 0: - size = sys.maxint - readsize = self.min_readsize - else: - readsize = size - bufs = [] - while size != 0: - c = self.read(readsize) - i = c.find('\n') + return self.offset - # We set i=size to break out of the loop under two - # conditions: 1) there's no newline, and the chunk is - # larger than size, or 2) there is a newline, but the - # resulting line would be longer than 'size'. - if (size <= i) or (i == -1 and len(c) > size): - i = size - 1 + def _setmode(self): + """ + Establish readability and writability. + Until invoked, object will appear as readable and writable, a hack + that enables using it to initialize io.BufferedRandom objects. + """ + self._modeset = True - if i >= 0 or c == '': - bufs.append(c[:i + 1]) # Add portion of last chunk - self._unread(c[i + 1:]) # Push back rest of chunk - break + def readable(self): + return not self._modeset or self.mode == READ - # Append chunk to list, decrease 'size', - bufs.append(c) - size = size - len(c) - readsize = min(size, readsize * 2) - if readsize > self.min_readsize: - self.min_readsize = min(readsize, self.min_readsize * 2, 512) - return ''.join(bufs) # Return resulting line + def writable(self): + return not self._modeset or self.mode == WRITE - def readlines(self, sizehint=0): - # Negative numbers result in reading all the lines - if sizehint <= 0: - sizehint = sys.maxint - L = [] - while sizehint > 0: - line = self.readline() - if line == "": - break - L.append(line) - sizehint = sizehint - len(line) - - return L - - def writelines(self, L): - for line in L: - self.write(line) - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if line: - return line - else: - raise StopIteration - - def __enter__(self): - if self.fileobj is None: - raise ValueError("I/O operation on closed GzipFile object") - return self - - def __exit__(self, *args): - self.close() - - def _test(): # Act like gzip; with -d, act like gunzip. # The input file is not deleted, however, nor are any other gzip