Index: Doc/library/gzip.rst =================================================================== --- Doc/library/gzip.rst (revision 68905) +++ Doc/library/gzip.rst (working copy) @@ -46,7 +46,9 @@ or ``'wb'``, depending on whether the file will be read or written. The default is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``. If not given, the 'b' flag will be added to the mode to ensure the file is opened - in binary mode for cross-platform portability. + in binary mode for cross-platform portability. 'U' may be included as + well when reading. If present, it is removed from the mode before the gzip + file is opened then applied to lines read from the compressed file. The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the level of compression; ``1`` is fastest and produces the least compression, and Index: Lib/gzip.py =================================================================== --- Lib/gzip.py (revision 68905) +++ Lib/gzip.py (working copy) @@ -63,7 +63,9 @@ depending on whether the file will be read or written. The default is the mode of fileobj if discernible; otherwise, the default is 'rb'. Be aware that only the 'rb', 'ab', and 'wb' values should be used - for cross-platform portability. + for cross-platform portability. 'U' may be included as well when + reading. If present, it is removed from the mode before the gzip + file is opened then applied to lines read from the compressed file. The compresslevel argument is an integer from 1 to 9 controlling the level of compression; 1 is fastest and produces the least compression, @@ -84,8 +86,11 @@ # that care about that sort of thing if mode and 'b' not in mode: mode += 'b' + mode = mode or 'rb' + self._universal = 'U' in mode and 'r' in mode + mode = mode.replace('U', '') if fileobj is None: - fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb') + fileobj = self.myfileobj = __builtin__.open(filename, mode) if filename is None: if hasattr(fileobj, 'name'): filename = fileobj.name else: filename = '' @@ -236,6 +241,15 @@ size = self.extrasize chunk = self.extrabuf[:size] + if self._universal: + if (chunk and + chunk[-1] == "\r" and + len(self.extrabuf) > size and + self.extrabuf[size] == "\n"): + # We split a CRLF pair - put it back together. + size += 1 + chunk += "\n" + chunk = chunk.replace("\r\n", "\n").replace("\r", "\n") self.extrabuf = self.extrabuf[size:] self.extrasize = self.extrasize - size Index: Lib/test/test_gzip.py =================================================================== --- Lib/test/test_gzip.py (revision 68905) +++ Lib/test/test_gzip.py (working copy) @@ -21,7 +21,12 @@ /* See http://www.winimage.com/zLibDll for Windows */ """ +data3 = """/* zlibmodule.c -- gzip-compatible data compression */\r +/* See http://www.gzip.org/zlib/\r\r +/* See http://www.winimage.com/zLibDll for Windows */\r +""" + class TestGzip(unittest.TestCase): filename = test_support.TESTFN @@ -31,9 +36,13 @@ def tearDown(self): test_support.unlink(self.filename) + def _write(self, mode, data): + f = gzip.GzipFile(self.filename, mode) + f.write(data) + return f def test_write(self): - f = gzip.GzipFile(self.filename, 'wb') ; f.write(data1 * 50) + f = self._write('wb', data1*50) # Try flush and fileno. f.flush() @@ -54,7 +63,8 @@ def test_append(self): self.test_write() # Append to the previous file - f = gzip.GzipFile(self.filename, 'ab') ; f.write(data2 * 15) ; f.close() + f = self._write('ab', data2 * 15) + f.close() f = gzip.GzipFile(self.filename, 'rb') ; d = f.read() ; f.close() self.assertEqual(d, (data1*50) + (data2*15)) @@ -95,6 +105,20 @@ line_length = (line_length + 1) % 50 f.close() + def test_read_universal(self): + self._write('wb', data3) + f = gzip.GzipFile(self.filename, 'rbU') + udata3 = (data3.replace("\r\n", "\n") + .replace("\r", "\n") + .rstrip("\n") + .split("\n")) + n = 0 + for (fline, dline) in zip(f, udata3): + self.assert_(fline == dline + "\n", (fline, dline)) + n += 1 + self.assert_(n == len(udata3), (n, len(udata3))) + f.close() + def test_readlines(self): self.test_write() # Try .readlines()