diff -r 9ceac471bd8c Doc/library/zipfile.rst --- a/Doc/library/zipfile.rst Thu Mar 22 22:40:44 2012 -0400 +++ b/Doc/library/zipfile.rst Fri Mar 23 21:18:34 2012 +0200 @@ -87,7 +87,31 @@ .. data:: ZIP_DEFLATED The numeric constant for the usual ZIP compression method. This requires the - zlib module. No other compression methods are currently supported. + zlib module. + + +.. data:: ZIP_BZIP2 + + The numeric constant for the BZIP2 compression method. This requires the + bz2 module. + + .. versionadded:: 3.3 + + +.. data:: ZIP_LZMA + + The numeric constant for the LZMA compression method. This requires the + lzma module. + + .. versionadded:: 3.3 + + .. note:: + + The ZIP file format specification has included support for bzip2 compression + since 2001, and for LZMA compression since 2006. However, some tools + (including older Python releases) do not support these compression + methods, and may either refuse to process the ZIP file altogether, + or fail to extract individual files. .. seealso:: @@ -118,9 +142,11 @@ adding a ZIP archive to another file (such as :file:`python.exe`). If *mode* is ``a`` and the file does not exist at all, it is created. *compression* is the ZIP compression method to use when writing the archive, - and should be :const:`ZIP_STORED` or :const:`ZIP_DEFLATED`; unrecognized - values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED` - is specified but the :mod:`zlib` module is not available, :exc:`RuntimeError` + and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`; or + :const:`ZIP_DEFLATED`; unrecognized + values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED`, + :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified but the corresponded module + (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not available, :exc:`RuntimeError` is also raised. The default is :const:`ZIP_STORED`. If *allowZip64* is ``True`` zipfile will create ZIP files that use the ZIP64 extensions when the zipfile is larger than 2 GB. If it is false (the default) :mod:`zipfile` @@ -143,6 +169,9 @@ .. versionadded:: 3.2 Added the ability to use :class:`ZipFile` as a context manager. + .. versionchanged:: 3.3 + Added support for :mod:`bzip2` and :mod:`lzma` compression. + .. method:: ZipFile.close() diff -r 9ceac471bd8c Lib/test/support.py --- a/Lib/test/support.py Thu Mar 22 22:40:44 2012 -0400 +++ b/Lib/test/support.py Fri Mar 23 21:18:34 2012 +0200 @@ -46,6 +46,16 @@ zlib = None try: + import bz2 +except ImportError: + bz2 = None + +try: + import lzma +except ImportError: + lzma = None + +try: import fcntl except ImportError: fcntl = None @@ -67,7 +77,7 @@ "get_attribute", "swap_item", "swap_attr", "requires_IEEE_754", "TestHandler", "Matcher", "can_symlink", "skip_unless_symlink", "import_fresh_module", "requires_zlib", "PIPE_MAX_SIZE", "failfast", - "anticipate_failure" + "anticipate_failure", "requires_bz2", "requires_lzma" ] class Error(Exception): @@ -516,6 +526,10 @@ requires_zlib = unittest.skipUnless(zlib, 'requires zlib') +requires_bz2 = unittest.skipUnless(bz2, 'requires bz2') + +requires_lzma = unittest.skipUnless(lzma, 'requires lzma') + is_jython = sys.platform.startswith('java') # Filename used for testing diff -r 9ceac471bd8c Lib/test/test_zipfile.py --- a/Lib/test/test_zipfile.py Thu Mar 22 22:40:44 2012 -0400 +++ b/Lib/test/test_zipfile.py Fri Mar 23 21:18:34 2012 +0200 @@ -13,7 +13,7 @@ from random import randint, random from unittest import skipUnless -from test.support import TESTFN, run_unittest, findfile, unlink, requires_zlib +from test.support import TESTFN, run_unittest, findfile, unlink, requires_zlib, requires_bz2, requires_lzma TESTFN2 = TESTFN + "2" TESTFNDIR = TESTFN + "d" @@ -313,6 +313,103 @@ self.assertEqual(openobj.read(1), b'1') self.assertEqual(openobj.read(1), b'2') + @requires_bz2 + def test_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_open_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_open_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_random_open_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_random_open_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_readline_read_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_readline_read_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_readline_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_readline_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_readlines_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_readlines_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_iterlines_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_iterlines_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_low_compression_bzip2(self): + """Check for cases where compressed data is larger than original.""" + # Create the ZIP archive + with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_BZIP2) as zipfp: + zipfp.writestr("strfile", '12') + + # Get an open object for strfile + with zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_BZIP2) as zipfp: + with zipfp.open("strfile") as openobj: + self.assertEqual(openobj.read(1), b'1') + self.assertEqual(openobj.read(1), b'2') + + @requires_lzma + def test_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_open_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_open_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_random_open_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_random_open_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_readline_read_lzma(self): + # Issue #7610: calls to readline() interleaved with calls to read(). + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_readline_read_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_readline_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_readline_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_readlines_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_readlines_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_iterlines_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_iterlines_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_low_compression_lzma(self): + """Check for cases where compressed data is larger than original.""" + # Create the ZIP archive + with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_LZMA) as zipfp: + zipfp.writestr("strfile", '12') + + # Get an open object for strfile + with zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_LZMA) as zipfp: + with zipfp.open("strfile") as openobj: + self.assertEqual(openobj.read(1), b'1') + self.assertEqual(openobj.read(1), b'2') + def test_absolute_arcnames(self): with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp: zipfp.write(TESTFN, "/absolute") @@ -453,6 +550,20 @@ info = zipfp.getinfo('b.txt') self.assertEqual(info.compress_type, zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_writestr_compression_bzip2(self): + zipfp = zipfile.ZipFile(TESTFN2, "w") + zipfp.writestr("b.txt", "hello world", compress_type=zipfile.ZIP_BZIP2) + info = zipfp.getinfo('b.txt') + self.assertEqual(info.compress_type, zipfile.ZIP_BZIP2) + + @requires_lzma + def test_writestr_compression_lzma(self): + zipfp = zipfile.ZipFile(TESTFN2, "w") + zipfp.writestr("b.txt", "hello world", compress_type=zipfile.ZIP_LZMA) + info = zipfp.getinfo('b.txt') + self.assertEqual(info.compress_type, zipfile.ZIP_LZMA) + def zip_test_writestr_permissions(self, f, compression): # Make sure that writestr creates files with mode 0600, # when it is passed a name rather than a ZipInfo instance. @@ -626,6 +737,16 @@ for f in (TESTFN2, TemporaryFile(), io.BytesIO()): self.zip_test(f, zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_test(f, zipfile.ZIP_BZIP2) + + @requires_lzma + def test_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_test(f, zipfile.ZIP_LZMA) + def test_absolute_arcnames(self): with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True) as zipfp: @@ -754,6 +875,28 @@ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01\x00\x00\x00' b'\x00afilePK\x05\x06\x00\x00\x00\x00\x01\x00' b'\x01\x003\x00\x00\x003\x00\x00\x00\x00\x00'), + zipfile.ZIP_BZIP2: ( + b'PK\x03\x04\x14\x03\x00\x00\x0c\x00nu\x0c=FA' + b'KE8\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00af' + b'ileBZh91AY&SY\xd4\xa8\xca' + b'\x7f\x00\x00\x0f\x11\x80@\x00\x06D\x90\x80 \x00 \xa5' + b'P\xd9!\x03\x03\x13\x13\x13\x89\xa9\xa9\xc2u5:\x9f' + b'\x8b\xb9"\x9c(HjTe?\x80PK\x01\x02\x14' + b'\x03\x14\x03\x00\x00\x0c\x00nu\x0c=FAKE8' + b'\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00 \x80\x80\x81\x00\x00\x00\x00afilePK' + b'\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00\x00[\x00' + b'\x00\x00\x00\x00'), + zipfile.ZIP_LZMA: ( + b'PK\x03\x04\x14\x03\x00\x00\x0e\x00nu\x0c=FA' + b'KE\x1b\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00af' + b'ile\t\x04\x05\x00]\x00\x00\x00\x04\x004\x19I' + b'\xee\x8d\xe9\x17\x89:3`\tq!.8\x00PK' + b'\x01\x02\x14\x03\x14\x03\x00\x00\x0e\x00nu\x0c=FA' + b'KE\x1b\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00 \x80\x80\x81\x00\x00\x00\x00afil' + b'ePK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00' + b'\x00>\x00\x00\x00\x00\x00'), } def test_unicode_filenames(self): @@ -985,6 +1128,14 @@ def test_testzip_with_bad_crc_deflated(self): self.check_testzip_with_bad_crc(zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_testzip_with_bad_crc_bzip2(self): + self.check_testzip_with_bad_crc(zipfile.ZIP_BZIP2) + + @requires_lzma + def test_testzip_with_bad_crc_lzma(self): + self.check_testzip_with_bad_crc(zipfile.ZIP_LZMA) + def check_read_with_bad_crc(self, compression): """Tests that files with bad CRCs raise a BadZipFile exception when read.""" zipdata = self.zips_with_bad_crc[compression] @@ -1013,6 +1164,14 @@ def test_read_with_bad_crc_deflated(self): self.check_read_with_bad_crc(zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_read_with_bad_crc_bzip2(self): + self.check_read_with_bad_crc(zipfile.ZIP_BZIP2) + + @requires_lzma + def test_read_with_bad_crc_lzma(self): + self.check_read_with_bad_crc(zipfile.ZIP_LZMA) + def check_read_return_size(self, compression): # Issue #9837: ZipExtFile.read() shouldn't return more bytes # than requested. @@ -1033,6 +1192,14 @@ def test_read_return_size_deflated(self): self.check_read_return_size(zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_read_return_size_bzip2(self): + self.check_read_return_size(zipfile.ZIP_BZIP2) + + @requires_lzma + def test_read_return_size_lzma(self): + self.check_read_return_size(zipfile.ZIP_LZMA) + def test_empty_zipfile(self): # Check that creating a file in 'w' or 'a' mode and closing without # adding any files to the archives creates a valid empty ZIP file @@ -1174,6 +1341,16 @@ for f in (TESTFN2, TemporaryFile(), io.BytesIO()): self.zip_test(f, zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_test(f, zipfile.ZIP_BZIP2) + + @requires_lzma + def test_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_test(f, zipfile.ZIP_LZMA) + def zip_open_test(self, f, compression): self.make_test_archive(f, compression) @@ -1214,6 +1391,16 @@ for f in (TESTFN2, TemporaryFile(), io.BytesIO()): self.zip_open_test(f, zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_open_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_open_test(f, zipfile.ZIP_BZIP2) + + @requires_lzma + def test_open_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_open_test(f, zipfile.ZIP_LZMA) + def zip_random_open_test(self, f, compression): self.make_test_archive(f, compression) @@ -1242,6 +1429,16 @@ for f in (TESTFN2, TemporaryFile(), io.BytesIO()): self.zip_random_open_test(f, zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_random_open_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_random_open_test(f, zipfile.ZIP_BZIP2) + + @requires_lzma + def test_random_open_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.zip_random_open_test(f, zipfile.ZIP_LZMA) + @requires_zlib class TestsWithMultipleOpens(unittest.TestCase): @@ -1461,6 +1658,56 @@ for f in (TESTFN2, TemporaryFile(), io.BytesIO()): self.iterlines_test(f, zipfile.ZIP_DEFLATED) + @requires_bz2 + def test_read_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.read_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_readline_read_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.readline_read_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_readline_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.readline_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_readlines_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.readlines_test(f, zipfile.ZIP_BZIP2) + + @requires_bz2 + def test_iterlines_bzip2(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.iterlines_test(f, zipfile.ZIP_BZIP2) + + @requires_lzma + def test_read_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.read_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_readline_read_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.readline_read_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_readline_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.readline_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_readlines_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.readlines_test(f, zipfile.ZIP_LZMA) + + @requires_lzma + def test_iterlines_lzma(self): + for f in (TESTFN2, TemporaryFile(), io.BytesIO()): + self.iterlines_test(f, zipfile.ZIP_LZMA) + def tearDown(self): for sep, fn in self.arcfiles.items(): os.remove(fn) diff -r 9ceac471bd8c Lib/zipfile.py --- a/Lib/zipfile.py Thu Mar 22 22:40:44 2012 -0400 +++ b/Lib/zipfile.py Fri Mar 23 21:18:34 2012 +0200 @@ -22,7 +22,18 @@ zlib = None crc32 = binascii.crc32 -__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", +try: + import bz2 # We may need its compression method +except ImportError: + bz2 = None + +try: + import lzma # We may need its compression method +except ImportError: + lzma = None + +__all__ = ["BadZipFile", "BadZipfile", "error", + "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] class BadZipFile(Exception): @@ -45,8 +56,15 @@ # constants for Zip file compression methods ZIP_STORED = 0 ZIP_DEFLATED = 8 +ZIP_BZIP2 = 12 +ZIP_LZMA = 14 # Other ZIP compression methods not supported +DEFAULT_VERSION = 20 +ZIP64_VERSION = 45 +BZIP2_VERSION = 46 +LZMA_VERSION = 63 + # Below are some formats and associated data for reading/writing headers using # the struct module. The names and structures of headers/records are those used # in the PKWARE description of the ZIP file format: @@ -313,8 +331,8 @@ else: # Assume everything else is unix-y self.create_system = 3 # System which created ZIP archive - self.create_version = 20 # Version which created ZIP archive - self.extract_version = 20 # Version needed to extract archive + self.create_version = DEFAULT_VERSION # Version which created ZIP archive + self.extract_version = DEFAULT_VERSION # Version needed to extract archive self.reserved = 0 # Must be zero self.flag_bits = 0 # ZIP flag bits self.volume = 0 # Volume number of file header @@ -341,6 +359,7 @@ extra = self.extra + min_version = 0 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: # File is larger than what fits into a 4 byte integer, # fall back to the ZIP64 extension @@ -349,9 +368,15 @@ 1, struct.calcsize(fmt)-4, file_size, compress_size) file_size = 0xffffffff compress_size = 0xffffffff - self.extract_version = max(45, self.extract_version) - self.create_version = max(45, self.extract_version) + min_version = ZIP64_VERSION + if self.compress_type == ZIP_BZIP2: + min_version = max(BZIP2_VERSION, min_version) + elif self.compress_type == ZIP_LZMA: + min_version = max(LZMA_VERSION, min_version) + + self.extract_version = max(min_version, self.extract_version) + self.create_version = max(min_version, self.create_version) filename, flag_bits = self._encodeFilenameFlags() header = struct.pack(structFileHeader, stringFileHeader, self.extract_version, self.reserved, flag_bits, @@ -461,6 +486,122 @@ self._UpdateKeys(c) return c + +def lzma_props_encode(specs): + assert specs['id'] == lzma.FILTER_LZMA1 + prop = (specs['pb'] * 5 + specs['lp']) * 9 + specs['lc'] + return struct.pack(' len(self._readbuffer) - self._offset: chunk = self.read(n) - self._offset -= len(chunk) + if len(chunk) > self._offset: + self._readbuffer = chunk + self._readbuffer[self._offset:] + self._offset = 0 + else: + self._offset -= len(chunk) # Return up to 512 bytes to reduce allocation overhead for tight loops. return self._readbuffer[self._offset: self._offset + 512] @@ -575,80 +719,121 @@ """Read and return up to n bytes. If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. """ - buf = b'' - if n is None: - n = -1 - while True: - if n < 0: - data = self.read1(n) - elif n > len(buf): - data = self.read1(n - len(buf)) - else: - return buf - if len(data) == 0: - return buf + if n is None or n < 0: + buf = self._readbuffer[self._offset:] + self._readbuffer = b'' + self._offset = 0 + while not self._eof: + buf += self._read1(self.MAX_N) + return buf + + n -= len(self._readbuffer) - self._offset + if n < 0: + buf = self._readbuffer[self._offset:n] + self._offset += len(buf) + return buf + + buf = self._readbuffer[self._offset:] + self._readbuffer = b'' + self._offset = 0 + while n > 0 and not self._eof: + data = self._read1(n) + if n < len(data): + self._readbuffer = data + self._offset = n + buf += data[:n] + break buf += data + n -= len(data) + return buf - def _update_crc(self, newdata, eof): + def _update_crc(self, newdata): # Update the CRC using the given data. if self._expected_crc is None: # No need to compute the CRC if we don't have a reference value return self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff # Check the CRC if we're at the end of the file - if eof and self._running_crc != self._expected_crc: + if self._eof and self._running_crc != self._expected_crc: raise BadZipFile("Bad CRC-32 for file %r" % self.name) def read1(self, n): """Read up to n bytes with at most one read() system call.""" - # Simplify algorithm (branching) by transforming negative n to large n. - if n < 0 or n is None: - n = self.MAX_N + if n is None or n < 0: + buf = self._readbuffer[self._offset:] + self._readbuffer = b'' + self._offset = 0 + data = self._read1(self.MAX_N) + buf += data + return buf - # Bytes available in read buffer. - len_readbuffer = len(self._readbuffer) - self._offset + n -= len(self._readbuffer) - self._offset + if n < 0: + buf = self._readbuffer[self._offset:n] + self._offset += len(buf) + return buf + + buf = self._readbuffer[self._offset:] + self._readbuffer = b'' + self._offset = 0 + if n > 0: + data = self._read1(n) + if n < len(data): + self._readbuffer = data + self._offset = n + data = data[:n] + buf += data + return buf + + def _read1(self, n): + # Read up to n compressed bytes with at most one read() system call, + # decrypt and decompress them. + if self._eof or n <= 0: + return b'' # Read from file. - if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): - nbytes = n - len_readbuffer - len(self._unconsumed) - nbytes = max(nbytes, self.MIN_READ_SIZE) - nbytes = min(nbytes, self._compress_left) + if self._compress_type == ZIP_DEFLATED: + ## Handle unconsumed data. + data = self._decompressor.unconsumed_tail + if n > len(data): + data += self._read2(n - len(data)) + else: + data = self._read2(n) - data = self._fileobj.read(nbytes) - self._compress_left -= len(data) + if self._compress_type == ZIP_STORED: + self._eof = self._compress_left <= 0 + elif self._compress_type == ZIP_DEFLATED: + n = max(n, self.MIN_READ_SIZE) + data = self._decompressor.decompress(data, n) + self._eof = (self._decompressor.eof or + self._compress_left <= 0 and + not self._decompressor.unconsumed_tail) + if self._eof: + data += self._decompressor.flush() + else: + data = self._decompressor.decompress(data) + self._eof = self._decompressor.eof or self._compress_left <= 0 - if data and self._decrypter is not None: - data = bytes(map(self._decrypter, data)) + data = data[:self._left] + self._left -= len(data) + if self._left <= 0: + self._eof = True + self._update_crc(data) + return data - if self._compress_type == ZIP_STORED: - self._update_crc(data, eof=(self._compress_left==0)) - self._readbuffer = self._readbuffer[self._offset:] + data - self._offset = 0 - else: - # Prepare deflated bytes for decompression. - self._unconsumed += data + def _read2(self, n): + if self._compress_left <= 0: + return b'' - # Handle unconsumed data. - if (len(self._unconsumed) > 0 and n > len_readbuffer and - self._compress_type == ZIP_DEFLATED): - data = self._decompressor.decompress( - self._unconsumed, - max(n - len_readbuffer, self.MIN_READ_SIZE) - ) + n = max(n, self.MIN_READ_SIZE) + n = min(n, self._compress_left) - self._unconsumed = self._decompressor.unconsumed_tail - eof = len(self._unconsumed) == 0 and self._compress_left == 0 - if eof: - data += self._decompressor.flush() + data = self._fileobj.read(n) + self._compress_left -= len(data) - self._update_crc(data, eof=eof) - self._readbuffer = self._readbuffer[self._offset:] + data - self._offset = 0 - - # Read from buffer. - data = self._readbuffer[self._offset: self._offset + n] - self._offset += len(data) + if self._decrypter is not None: + data = bytes(map(self._decrypter, data)) return data def close(self): @@ -667,7 +852,8 @@ file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. mode: The mode can be either read "r", write "w" or append "a". - compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). + compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), + ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. @@ -681,14 +867,7 @@ if mode not in ("r", "w", "a"): raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') - if compression == ZIP_STORED: - pass - elif compression == ZIP_DEFLATED: - if not zlib: - raise RuntimeError( - "Compression requires the (missing) zlib module") - else: - raise RuntimeError("That compression method is not supported") + _check_compression(compression) self._allowZip64 = allowZip64 self._didModify = False @@ -1052,11 +1231,7 @@ if not self.fp: raise RuntimeError( "Attempt to write ZIP archive that was already closed") - if zinfo.compress_type == ZIP_DEFLATED and not zlib: - raise RuntimeError( - "Compression requires the (missing) zlib module") - if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): - raise RuntimeError("That compression method is not supported") + _check_compression(zinfo.compress_type) if zinfo.file_size > ZIP64_LIMIT: if not self._allowZip64: raise LargeZipFile("Filesize would require ZIP64 extensions") @@ -1094,6 +1269,8 @@ zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + zinfo.flag_bits |= 0x02 self._writecheck(zinfo) self._didModify = True @@ -1107,17 +1284,13 @@ self.fp.write(zinfo.FileHeader()) return + cmpr = _get_compressor(zinfo.compress_type) with open(filename, "rb") as fp: # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 zinfo.file_size = file_size = 0 self.fp.write(zinfo.FileHeader()) - if zinfo.compress_type == ZIP_DEFLATED: - cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) - else: - cmpr = None while 1: buf = fp.read(1024 * 8) if not buf: @@ -1170,13 +1343,14 @@ zinfo.header_offset = self.fp.tell() # Start of header data if compress_type is not None: zinfo.compress_type = compress_type + if zinfo.compress_type == ZIP_LZMA: + zinfo.flag_bits |= 0x02 self._writecheck(zinfo) self._didModify = True zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum - if zinfo.compress_type == ZIP_DEFLATED: - co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) + co = _get_compressor(zinfo.compress_type) + if co: data = co.compress(data) + co.flush() zinfo.compress_size = len(data) # Compressed size else: @@ -1228,18 +1402,22 @@ header_offset = zinfo.header_offset extra_data = zinfo.extra + min_version = 0 if extra: # Append a ZIP64 field to the extra's extra_data = struct.pack( '