diff -r 0fadf332f0d4 Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Fri Jun 05 21:03:52 2015 -0500 +++ b/Doc/whatsnew/3.5.rst Tue Jun 09 13:36:56 2015 +0000 @@ -819,6 +819,10 @@ * The UTF-32 encoder is now 3x to 7x faster. (Contributed by Serhiy Storchaka in :issue:`15027`.) +* Iterating over lines of :class:`~gzip.GzipFile`, :class:`~lzma.LZMAFile`, + and :class:`~bz2.BZ2File` objects is about 4 times faster. + (:issue:`18003`) + Build and C API Changes ======================= diff -r 0fadf332f0d4 Lib/_compression.py --- a/Lib/_compression.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/_compression.py Tue Jun 09 13:36:56 2015 +0000 @@ -60,6 +60,11 @@ self._decompressor = None return super().close() + @property + def closed(self): + # This is polled for every operation; default implementation too slow + return self._decompressor is None + def seekable(self): return self._fp.seekable() diff -r 0fadf332f0d4 Lib/bz2.py --- a/Lib/bz2.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/bz2.py Tue Jun 09 13:36:56 2015 +0000 @@ -232,6 +232,12 @@ self._check_can_read() return self._buffer.readlines(size) + def __iter__(self): + # Shortcut to bypass the readline() method above + with self._lock: + self._check_can_read() + return iter(self._buffer) + def write(self, data): """Write a byte string to the file. diff -r 0fadf332f0d4 Lib/gzip.py --- a/Lib/gzip.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/gzip.py Tue Jun 09 13:36:56 2015 +0000 @@ -371,6 +371,11 @@ self._check_not_closed() return self._buffer.readline(size) + def __iter__(self): + # Shortcut to bypass the readline() method above + self._check_not_closed() + return iter(self._buffer) + class _GzipReader(_compression.DecompressReader): def __init__(self, fp): diff -r 0fadf332f0d4 Lib/lzma.py --- a/Lib/lzma.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/lzma.py Tue Jun 09 13:36:56 2015 +0000 @@ -219,6 +219,11 @@ self._check_can_read() return self._buffer.readline(size) + def __iter__(self): + # Shortcut to bypass the readline() method above + self._check_can_read() + return iter(self._buffer) + def write(self, data): """Write a bytes object to the file. diff -r 0fadf332f0d4 Lib/test/test_bz2.py --- a/Lib/test/test_bz2.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/test/test_bz2.py Tue Jun 09 13:36:56 2015 +0000 @@ -230,7 +230,9 @@ def testIterator(self): self.createTempFile() with BZ2File(self.filename) as bz2f: - self.assertEqual(list(iter(bz2f)), self.TEXT_LINES) + lines = iter(bz2f) + self.assertEqual(list(lines), self.TEXT_LINES) + self.assertRaises(ValueError, next, lines) def testIteratorMultiStream(self): self.createTempFile(streams=5) diff -r 0fadf332f0d4 Lib/test/test_gzip.py --- a/Lib/test/test_gzip.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/test/test_gzip.py Tue Jun 09 13:36:56 2015 +0000 @@ -123,6 +123,7 @@ # Write to a file, open it for reading, then close it. self.test_write() f = gzip.GzipFile(self.filename, 'r') + lines = iter(f) fileobj = f.fileobj self.assertFalse(fileobj.closed) f.close() @@ -133,6 +134,8 @@ f.seek(0) with self.assertRaises(ValueError): f.tell() + with self.assertRaises(ValueError): + next(lines) # Open the file for writing, then close it. f = gzip.GzipFile(self.filename, 'w') fileobj = f.fileobj diff -r 0fadf332f0d4 Lib/test/test_lzma.py --- a/Lib/test/test_lzma.py Fri Jun 05 21:03:52 2015 -0500 +++ b/Lib/test/test_lzma.py Tue Jun 09 13:36:56 2015 +0000 @@ -912,6 +912,9 @@ with LZMAFile(BytesIO(COMPRESSED_RAW_2), format=lzma.FORMAT_RAW, filters=FILTERS_RAW_2) as f: self.assertListEqual(list(iter(f)), lines) + with LZMAFile(BytesIO(COMPRESSED_XZ)) as f: + line_iter = iter(f) + self.assertRaises(ValueError, next, line_iter) def test_readline(self): with BytesIO(INPUT) as f: