--- /home/umedoblock/src/Python-3.2.2-orig/Lib/zipfile.py 2011-09-04 01:16:44.000000000 +0900 +++ /home/umedoblock/local/lib/python3.2/zipfile.py 2012-07-13 12:47:25.549546588 +0900 @@ -757,6 +757,30 @@ self.fp = None raise + def _decode_filename(self, bytes_filename, flags, default_encoding=None): + encodings = [] + if default_encoding: + encodings.append(default_encoding) + if flags & 0x800: + # UTF-8 file names extension + encodings.append('utf-8') + # Japanese encoding + encodings.extend(['iso-2022-jp', 'euc-jp', 'sjis']) + # Historical ZIP filename encoding + encodings.append('cp437') + + for encoding in encodings: + try: + filename = bytes_filename.decode(encoding) + break + except UnicodeDecodeError as raiz: + if raiz.reason != 'illegal multibyte sequence': + raise UnicodeDecodeError(*raiz.args) + if self.debug: + print('encoding: {}, filename: {}'.format(encoding, filename)) + + return filename + def _RealGetContents(self): """Read in the table of contents for the ZIP file.""" fp = self.fp @@ -796,12 +820,7 @@ print(centdir) filename = fp.read(centdir[_CD_FILENAME_LENGTH]) flags = centdir[5] - if flags & 0x800: - # UTF-8 file names extension - filename = filename.decode('utf-8') - else: - # Historical ZIP filename encoding - filename = filename.decode('cp437') + filename = self._decode_filename(filename, flags) # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) @@ -928,11 +947,7 @@ if fheader[_FH_EXTRA_FIELD_LENGTH]: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) - if zinfo.flag_bits & 0x800: - # UTF-8 filename - fname_str = fname.decode("utf-8") - else: - fname_str = fname.decode("cp437") + fname_str = self._decode_filename(fname, zinfo.flag_bits) if fname_str != zinfo.orig_filename: if not self._filePassed: