diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 0646994..aaaaad6 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -250,13 +250,16 @@ ZipFile Objects :meth:`testzip` on a closed ZipFile will raise a :exc:`RuntimeError`. -.. method:: ZipFile.write(filename, arcname=None, compress_type=None) +.. method:: ZipFile.write(filename, arcname=None, compress_type=None, unicode=None) Write the file named *filename* to the archive, giving it the archive name *arcname* (by default, this will be the same as *filename*, but without a drive letter and with leading path separators removed). If given, *compress_type* overrides the value given for the *compression* parameter to the constructor for - the new entry. The archive must be open with mode ``'w'`` or ``'a'`` -- calling + the new entry. *unicode* defines how the filename is encoded: see + :data:`ZipInfo.unicode`. + + The archive must be open with mode ``'w'`` or ``'a'`` -- calling :meth:`write` on a ZipFile created with mode ``'r'`` will raise a :exc:`RuntimeError`. Calling :meth:`write` on a closed ZipFile will raise a :exc:`RuntimeError`. @@ -284,7 +287,10 @@ ZipFile Objects Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file name it will be given in the archive, or a :class:`ZipInfo` instance. If it's an instance, at least the filename, date, and time must be given. If it's a - name, the date and time is set to the current date and time. The archive must be + name, the date and time is set to the current date and time. *unicode* + defines how the filename is encoded: see :data:`ZipInfo.unicode`. + + The archive must be opened with mode ``'w'`` or ``'a'`` -- calling :meth:`writestr` on a ZipFile created with mode ``'r'`` will raise a :exc:`RuntimeError`. Calling :meth:`writestr` on a closed ZipFile will raise a :exc:`RuntimeError`. @@ -407,6 +413,13 @@ Instances have the following attributes: +-------+--------------------------+ +.. attribute:: ZipInfo.unicode + + If True, encode the filename to ``'UTF-8'`` (set unicode flag). If False, + encode the filename to ``'cp437'`` (unset unicode flag). Otherwise (default + case), try to encode to ``'cp437'`` or use ``'UTF-8'`` on error. + + .. attribute:: ZipInfo.compress_type Type of compression for the archive member. diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 50f4848..46823fc 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -268,6 +268,7 @@ class ZipInfo (object): 'orig_filename', 'filename', 'date_time', + 'unicode', 'compress_type', 'comment', 'extra', @@ -286,7 +287,8 @@ class ZipInfo (object): '_raw_time', ) - def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0), + unicode=None): self.orig_filename = filename # Original file name in archive # Terminate the file name at the first null byte. Null bytes in file @@ -302,6 +304,7 @@ class ZipInfo (object): self.filename = filename # Normalized file name self.date_time = date_time # year, month, day, hour, min, sec + self.unicode = unicode # Standard values: self.compress_type = ZIP_STORED # Type of compression for the file self.comment = b"" # Comment for each file @@ -359,10 +362,15 @@ class ZipInfo (object): return header + filename + extra def _encodeFilenameFlags(self): - try: - return self.filename.encode('ascii'), self.flag_bits - except UnicodeEncodeError: + if self.unicode is None: + try: + return self.filename.encode('ascii'), self.flag_bits + except UnicodeEncodeError: + return self.filename.encode('utf-8'), self.flag_bits | 0x800 + elif self.unicode: return self.filename.encode('utf-8'), self.flag_bits | 0x800 + else: + return self.filename.encode('ascii'), self.flag_bits def _decodeExtra(self): # Try to decode the extra field. @@ -801,11 +809,13 @@ class ZipFile: if flags & 0x800: # UTF-8 file names extension filename = filename.decode('utf-8') + unicode = True else: # Historical ZIP filename encoding filename = filename.decode('cp437') + unicode = False # Create ZipInfo instance to store file information - x = ZipInfo(filename) + x = ZipInfo(filename, unicode=unicode) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] @@ -1063,7 +1073,7 @@ class ZipFile: raise LargeZipFile( "Zipfile size would require ZIP64 extensions") - def write(self, filename, arcname=None, compress_type=None): + def write(self, filename, arcname=None, compress_type=None, unicode=None): """Put the bytes from filename into the archive under the name arcname.""" if not self.fp: @@ -1082,7 +1092,7 @@ class ZipFile: arcname = arcname[1:] if isdir: arcname += '/' - zinfo = ZipInfo(arcname, date_time) + zinfo = ZipInfo(arcname, date_time, unicode=unicode) zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes if compress_type is None: zinfo.compress_type = self.compression @@ -1144,7 +1154,8 @@ class ZipFile: self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo - def writestr(self, zinfo_or_arcname, data, compress_type=None): + def writestr(self, zinfo_or_arcname, data, compress_type=None, + unicode=None): """Write a file into the archive. The contents is 'data', which may be either a 'str' or a 'bytes' instance; if it is a 'str', it is encoded as UTF-8 first. @@ -1154,7 +1165,8 @@ class ZipFile: data = data.encode("utf-8") if not isinstance(zinfo_or_arcname, ZipInfo): zinfo = ZipInfo(filename=zinfo_or_arcname, - date_time=time.localtime(time.time())[:6]) + date_time=time.localtime(time.time())[:6], + unicode=unicode) zinfo.compress_type = self.compression zinfo.external_attr = 0o600 << 16 else: