diff -r a14012352f65 Lib/gzip.py --- a/Lib/gzip.py Mon Apr 28 13:08:28 2014 +0200 +++ b/Lib/gzip.py Mon Apr 28 15:05:34 2014 +0200 @@ -134,7 +134,8 @@ max_read_chunk = 10 * 1024 * 1024 # 10Mb def __init__(self, filename=None, mode=None, - compresslevel=9, fileobj=None, mtime=None): + compresslevel=9, fileobj=None, mtime=None, + buffer_size=io.DEFAULT_BUFFER_SIZE): """Constructor for the GzipFile class. At least one of fileobj and filename must be given a @@ -171,6 +172,8 @@ return value of time.time() and of the st_mtime member of the object returned by os.stat(). + If the buffer_size is omitted it defaults to DEFAULT_BUFFER_SIZE. + """ if mode and ('t' in mode or 'U' in mode): @@ -215,6 +218,7 @@ self.fileobj = fileobj self.offset = 0 self.mtime = mtime + self.buffer_size = buffer_size if self.mode == WRITE: self._write_gzip_header() @@ -355,7 +359,7 @@ if self.extrasize <= 0 and self.fileobj is None: return b'' - readsize = 1024 + readsize = self.buffer_size if size < 0: # get the whole thing while self._read(readsize): readsize = min(self.max_read_chunk, readsize * 2) @@ -410,8 +414,7 @@ if self.fileobj is None: return b'' # Ensure that we don't return b"" if we haven't reached EOF. - # 1024 is the same buffering heuristic used in read() - while self.extrasize == 0 and self._read(max(n, 1024)): + while self.extrasize == 0 and self._read(max(n, self.buffer_size)): pass offset = self.offset - self.extrastart remaining = self.extrasize @@ -422,7 +425,7 @@ self.extrasize = len(buf) + self.extrasize self.offset -= len(buf) - def _read(self, size=1024): + def _read(self, size=io.DEFAULT_BUFFER_SIZE): if self.fileobj is None: return False @@ -560,18 +563,18 @@ if offset < self.offset: raise OSError('Negative seek in write mode') count = offset - self.offset - chunk = bytes(1024) - for i in range(count // 1024): + chunk = bytes(self.buffer_size) + for i in range(count // self.buffer_size): self.write(chunk) - self.write(bytes(count % 1024)) + self.write(bytes(count % self.buffer_size)) elif self.mode == READ: if offset < self.offset: # for negative seek, rewind and do positive seek self.rewind() count = offset - self.offset - for i in range(count // 1024): - self.read(1024) - self.read(count % 1024) + for i in range(count // self.buffer_size): + self.read(self.buffer_size) + self.read(count % self.buffer_size) return self.offset @@ -661,7 +664,7 @@ f = builtins.open(arg, "rb") g = open(arg + ".gz", "wb") while True: - chunk = f.read(1024) + chunk = f.read(io.DEFAULT_BUFFER_SIZE) if not chunk: break g.write(chunk)