diff -r 32a4e7b337c9 Lib/zipfile.py --- a/Lib/zipfile.py Fri Jan 15 02:37:21 2016 +0000 +++ b/Lib/zipfile.py Fri Jan 15 17:43:11 2016 +0000 @@ -958,6 +958,75 @@ super().close() +class _ZipWriteFile: + def __init__(self, zf, zinfo, zip64): + self.zinfo = zinfo + self.zip64 = zip64 + self.zipfile = zf + self.compressor = _get_compressor(zinfo.compress_type) + self.file_size = 0 + self.compress_size = 0 + self.crc = 0 + + @property + def fileobj(self): + return self.zipfile.fp + + def write(self, data): + self.file_size += len(data) + self.crc = crc32(data, self.crc) + if self.compressor: + data = self.compressor.compress(data) + self.compress_size += len(data) + self.fileobj.write(data) + + def close(self): + # Flush any data from the compressor, and update header info + if self.compressor: + buf = self.compressor.flush() + self.compress_size += len(buf) + with self.zipfile._lock: + self.fileobj.write(buf) + self.zinfo.compress_size = self.compress_size + else: + self.zinfo.compress_size = self.file_size + self.zinfo.CRC = self.crc + self.zinfo.file_size = self.file_size + + # Write updated header info + if self.zinfo.flag_bits & 0x08: + # Write CRC and file sizes after the file data + fmt = ' ZIP64_LIMIT: + raise RuntimeError('File size unexpectedly exceeded ZIP64 limit') + if self.compress_size > ZIP64_LIMIT: + raise RuntimeError('Compressed size unexpectedly exceeded ZIP64 limit') + # Seek backwards and write file header (which will now include + # correct CRC and file sizes) + with self.zipfile._lock: + self.zipfile.start_dir = self.fileobj.tell() # Preserve current position in file + self.fileobj.seek(self.zinfo.header_offset) + self.fileobj.write(self.zinfo.FileHeader(self.zip64)) + self.fileobj.seek(self.zipfile.start_dir) + + self.zipfile._fpclose(self.fileobj) + + self.zipfile._open_writer = False + + # Context manager protocol: with zf.open(spam, 'w') as f: ... + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + class ZipFile: """ Class with methods to open, read, write, close, list zip files. @@ -1020,6 +1089,7 @@ self._fileRefCnt = 1 self._lock = threading.RLock() self._seekable = True + self._open_writer = False try: if mode == 'r': @@ -1232,28 +1302,97 @@ with self.open(name, "r", pwd) as fp: return fp.read() - def open(self, name, mode="r", pwd=None): - """Return file-like object for 'name'.""" - if mode not in ("r", "U", "rU"): - raise RuntimeError('open() requires mode "r", "U", or "rU"') + def _open_to_write(self, zinfo, force_zip64=False): + if force_zip64 and not self._allowZip64: + raise ValueError( + "force_zip64 is True, but self._allowZip64 is False" + ) + + # Sizes and CRC are overwritten with correct data after processing the file + if not hasattr(zinfo, 'file_size'): + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + + zinfo.flag_bits = 0x00 + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + if not self._seekable: + zinfo.flag_bits |= 0x08 + + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) + + with self._lock: + if self._open_writer: + raise RuntimeError("A ZipFile instance can only have one " + "open writing file handle at once. " + "Close the first handle before opening another.") + self._open_writer = True + + self._writecheck(zinfo) + self._didModify = True + + if self._seekable: + self.fp.seek(self.start_dir) + zinfo.header_offset = self.fp.tell() + + self.fp.write(zinfo.FileHeader(zip64)) + + # Add file to our caches + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + + self._fileRefCnt += 1 + return _ZipWriteFile(self, zinfo, zip64) + + def open(self, name, mode="r", pwd=None, force_zip64=False): + """Return file-like object for 'name'. + + name is a string for the file name within the zip file, or a ZipInfo + object. + + mode should be 'r' to read a file already in the zip file, or 'w' to + write to a file newly added to the archive. + + pwd is the password to decrypt files (on read only) + + When writing, if the file size is not known in advance but may exceed + 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large + files. If the size is known in advance, it is best to pass a ZipInfo + instance for name, with zinfo.file_size set. + """ + if mode not in {"r", "w", "U", "rU"}: + raise RuntimeError('open() requires mode "r", "w", "U", or "rU"') if 'U' in mode: import warnings warnings.warn("'U' mode is deprecated", DeprecationWarning, 2) if pwd and not isinstance(pwd, bytes): raise TypeError("pwd: expected bytes, got %s" % type(pwd)) + if pwd and (mode == "w"): + raise ValueError("pwd is only supported for reading files") if not self.fp: raise RuntimeError( - "Attempt to read ZIP archive that was already closed") + "Attempt to use ZIP archive that was already closed") # Make sure we have an info object if isinstance(name, ZipInfo): # 'name' is already an info object zinfo = name + elif mode == 'w': + zinfo = ZipInfo(name) + zinfo.compress_type = self.compression else: # Get info object for name zinfo = self.getinfo(name) + if mode == 'w': + return self._open_to_write(zinfo, force_zip64=force_zip64) + + # Open for reading: self._fileRefCnt += 1 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) try: @@ -1429,6 +1568,10 @@ if not self.fp: raise RuntimeError( "Attempt to write to ZIP archive that was already closed") + if self._open_writer: + raise RuntimeError( + "Can't write to ZIP archive while an open writing handle exists." + ) st = os.stat(filename) isdir = stat.S_ISDIR(st.st_mode) @@ -1453,18 +1596,19 @@ zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 - with self._lock: - if self._seekable: - self.fp.seek(self.start_dir) - zinfo.header_offset = self.fp.tell() # Start of header bytes - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 - self._writecheck(zinfo) - self._didModify = True + if isdir: + with self._lock: + if self._seekable: + self.fp.seek(self.start_dir) + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 - if isdir: + self._writecheck(zinfo) + self._didModify = True + zinfo.file_size = 0 zinfo.compress_size = 0 zinfo.CRC = 0 @@ -1473,59 +1617,13 @@ self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False)) self.start_dir = self.fp.tell() - return - - cmpr = _get_compressor(zinfo.compress_type) - if not self._seekable: - zinfo.flag_bits |= 0x08 - with open(filename, "rb") as fp: - # Must overwrite CRC and sizes with correct data later - zinfo.CRC = CRC = 0 - zinfo.compress_size = compress_size = 0 - # Compressed size can be larger than uncompressed size - zip64 = self._allowZip64 and \ - zinfo.file_size * 1.05 > ZIP64_LIMIT - self.fp.write(zinfo.FileHeader(zip64)) - file_size = 0 - while 1: - buf = fp.read(1024 * 8) + else: + with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: + while True: + buf = src.read(1024 * 8) if not buf: break - file_size = file_size + len(buf) - CRC = crc32(buf, CRC) - if cmpr: - buf = cmpr.compress(buf) - compress_size = compress_size + len(buf) - self.fp.write(buf) - if cmpr: - buf = cmpr.flush() - compress_size = compress_size + len(buf) - self.fp.write(buf) - zinfo.compress_size = compress_size - else: - zinfo.compress_size = file_size - zinfo.CRC = CRC - zinfo.file_size = file_size - if zinfo.flag_bits & 0x08: - # Write CRC and file sizes after the file data - fmt = ' ZIP64_LIMIT: - raise RuntimeError('File size has increased during compressing') - if compress_size > ZIP64_LIMIT: - raise RuntimeError('Compressed size larger than uncompressed size') - # Seek backwards and write file header (which will now include - # correct CRC and file sizes) - self.start_dir = self.fp.tell() # Preserve current position in file - self.fp.seek(zinfo.header_offset) - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.seek(self.start_dir) - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo + dest.write(buf) def writestr(self, zinfo_or_arcname, data, compress_type=None): """Write a file into the archive. The contents is 'data', which