Index: zipfile.py =================================================================== --- zipfile.py (revision 42911) +++ zipfile.py (working copy) @@ -15,6 +15,8 @@ pass error = BadZipfile # The exception raised by this module +ZIP64_LIMIT= 1 << 30 + # constants for Zip file compression methods ZIP_STORED = 0 ZIP_DEFLATED = 8 @@ -27,7 +29,12 @@ stringCentralDir = "PK\001\002" # magic number for central directory structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes stringFileHeader = "PK\003\004" # magic number for file header +structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes +stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header +structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes +stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header + # indexes of entries in the central directory structure _CD_SIGNATURE = 0 _CD_CREATE_VERSION = 1 @@ -75,6 +82,40 @@ pass return False +def _EndRecData64(fpin, offset, endrec): + """ + Read the ZIP64 end-of-archive records and use that to update endrec + """ + locatorSize = struct.calcsize(structEndArchive64Locator) + fpin.seek(offset - locatorSize, 2) + data = fpin.read(locatorSize) + sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) + if sig != stringEndArchive64Locator: + return endrec + + assert diskno == 0 + assert disks == 1 + + # Assume no 'zip64 extensible data' + endArchiveSize = struct.calcsize(structEndArchive64) + fpin.seek(offset - locatorSize - endArchiveSize, 2) + data = fpin.read(endArchiveSize) + sig, sz, create_version, read_version, disk_num, disk_dir, \ + dircount, dircount2, dirsize, diroffset = \ + struct.unpack(structEndArchive64, data) + if sig != stringEndArchive64: + return endrec + + # Update the original endrec using data from the ZIP64 record + endrec[1] = disk_num + endrec[2] = disk_dir + endrec[3] = dircount + endrec[4] = dircount2 + endrec[5] = dirsize + endrec[6] = diroffset + return endrec + + def _EndRecData(fpin): """Return data from the "End of Central Directory" record, or None. @@ -88,6 +129,8 @@ endrec = list(endrec) endrec.append("") # Append the archive comment endrec.append(filesize - 22) # Append the record start offset + if endrec[-4] == -1: + return _EndRecData64(fpin, -22, endrec) return endrec # Search the last END_BLOCK bytes of the file for the record signature. # The comment is appended to the ZIP file and has a 16 bit length. @@ -106,6 +149,8 @@ # Append the archive comment and start offset endrec.append(comment) endrec.append(filesize - END_BLOCK + start) + if endrec[-4] == -1: + return _EndRecData64(fpin, - END_BLOCK + start, endrec) return endrec return # Error, return None @@ -162,14 +207,51 @@ CRC = self.CRC compress_size = self.compress_size file_size = self.file_size + + extra = self.extra + + if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: + # File is larger than what fits into a 4 byte integer, + # fall back to the ZIP64 extension + fmt = ' ZIP64_LIMIT: + x = endrec[9] - size_cd - 56 - 20 + else: + x = endrec[9] - size_cd # "concat" is zero, unless zip was concatenated to another file concat = x - offset_cd if self.debug > 2: @@ -275,7 +360,7 @@ total = (total + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH]) - x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat + x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] # file_offset must be computed below... (x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, @@ -284,6 +369,10 @@ # Convert date/time code to (year, month, day, hour, min, sec) x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) + + x._decodeExtra() + x.header_offset = x.header_offset + concat + self.filelist.append(x) self.NameToInfo[x.filename] = x if self.debug > 2: @@ -491,6 +580,7 @@ records.""" if self.fp is None: return + if self.mode in ("w", "a"): # write ending records count = 0 pos1 = self.fp.tell() @@ -499,23 +589,72 @@ dt = zinfo.date_time dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = -1 + compress_size = -1 + else: + file_size = zinfo.file_size + compress_size = zinfo.file_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = -1 + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + ' ZIP64_LIMIT: + # Need to write the ZIP64 end-of-archive records + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) + self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + self.fp.write(zip64locrec) + + pos3 = self.fp.tell() + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, count, count, pos2 - pos1, -1, 0) + self.fp.write(endrec) + + else: + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, count, count, pos2 - pos1, pos1, 0) + self.fp.write(endrec) self.fp.flush() if not self._filePassed: self.fp.close()