Index: Modules/zipimport.c =================================================================== --- Modules/zipimport.c (revision 82847) +++ Modules/zipimport.c (working copy) @@ -45,6 +45,7 @@ /* forward decls */ static PyObject *read_directory(char *archive); +static int find_endof_central_dir(FILE* fp,char *eocd,long *header_pos); static PyObject *get_data(char *archive, PyObject *toc_entry); static PyObject *get_module_code(ZipImporter *self, char *fullname, int *p_ispackage, char **p_modpath); @@ -718,11 +719,14 @@ return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { - /* Bad: End of Central Dir signature */ - fclose(fp); - PyErr_Format(ZipImportError, "not a Zip file: " - "'%.200s'", archive); - return NULL; + /* Bad End of Central Dir signature */ + /* Perhaps it has an appended comment? */ + if(find_endof_central_dir(fp,endof_central_dir,&header_position) != 0) { + fclose(fp); + PyErr_Format(ZipImportError, "not a Zip file: " + "'%.200s'", archive); + return NULL; + } } header_size = get_long((unsigned char *)endof_central_dir + 12); @@ -796,6 +800,77 @@ return NULL; } + +/* Find the end-of-central-directory record for a zipfile with comment. + This reads data from the end of the zipfile in EOCD_CHUNK_SIZE chunks, + up to the maximum supported comment size, and searches backwards for + the end-of-central-directory marker. + + If the EOCD signature is not found, this function returns -1. If it + is found then 0 is returned, the 22-byte EOCD record is copied into + eocd_out, and the position at which is it was found it written into + header_pos. */ + +#define EOCD_MAX_SIZE ((1 << 16) + 22) +#define EOCD_CHUNK_SIZE (1024 * 16) + +static int find_endof_central_dir(FILE* fp,char *eocd_out,long *header_pos) { + long pos, size, count; + char *eocd; + char chunk[EOCD_CHUNK_SIZE + 32]; + struct stat st; + + if (fstat(fileno(fp), &st) != 0) { + return -1; + } + + eocd = NULL; + count = 1; pos = 0; size = 0; + while(eocd == NULL && pos < EOCD_MAX_SIZE && pos < st.st_size) { + pos = count * EOCD_CHUNK_SIZE; + size = EOCD_CHUNK_SIZE; + /* Overlap previous chunk, in case the EOCD record is in the gap */ + if(count > 1) { + size += 32; + } + /* Don't try to search past beginning of file */ + if(pos > st.st_size) { + pos = st.st_size; + if(size > st.st_size) { + size = st.st_size; + } + } + /* Don't try to search past max comment size */ + if(pos > EOCD_MAX_SIZE) { + pos = EOCD_MAX_SIZE; + } + /* Read the next chunk */ + fseek(fp, -1*pos, SEEK_END); + if(fread(chunk, 1, size, fp) != size) { + return -1; + } + /* Search backwards looking for EOCD signature */ + eocd = chunk + size - 4; + while(get_long((unsigned char *)eocd) != 0x06054B50) { + if(eocd == chunk) { + eocd = NULL; + break; + } + eocd -= 1; + } + count++; + } + + if(eocd == NULL) { + return -1; + } + + memcpy(eocd_out,eocd,22); + *header_pos = ftell(fp) - size + eocd - chunk; + return 0; + +} + /* Return the zlib.decompress function object, or NULL if zlib couldn't be imported. The function is cached when found, so subsequent calls don't import zlib again. Returns a *borrowed* reference. Index: Doc/library/zipimport.rst =================================================================== --- Doc/library/zipimport.rst (revision 82847) +++ Doc/library/zipimport.rst (working copy) @@ -33,8 +33,6 @@ loaded from a ZIP archive; it is unlikely that :func:`reload` would be needed, since this would imply that the ZIP has been altered during runtime. -ZIP archives with an archive comment are currently not supported. - .. seealso:: `PKZIP Application Note `_ Index: Doc/library/zipfile.rst =================================================================== --- Doc/library/zipfile.rst (revision 82847) +++ Doc/library/zipfile.rst (working copy) @@ -15,10 +15,8 @@ defined in `PKZIP Application Note `_. -This module does not currently handle multi-disk ZIP files, or ZIP files -which have appended comments (although it correctly handles comments -added to individual archive members---for which see the :ref:`zipinfo-objects` -documentation). It can handle ZIP files that use the ZIP64 extensions +This module does not currently handle multi-disk ZIP files. +It can handle ZIP files that use the ZIP64 extensions (that is ZIP files that are more than 4 GByte in size). It supports decryption of encrypted files in ZIP archives, but it currently cannot create an encrypted file. Decryption is extremely slow as it is @@ -67,7 +65,6 @@ Returns ``True`` if *filename* is a valid ZIP file based on its magic number, otherwise returns ``False``. *filename* may be a file or file-like object too. - This module does not currently handle ZIP files which have appended comments. .. versionchanged:: 2.7 Support for file and file-like objects. Index: Lib/test/test_zipimport.py =================================================================== --- Lib/test/test_zipimport.py (revision 82847) +++ Lib/test/test_zipimport.py (working copy) @@ -92,6 +92,16 @@ f.write(data) f.close() + comment = kw.get("comment", None) + if comment is not None: + # Append 'comment' to the end of the zipfile. + # Technically we should adjust the "comment size" field at the + # end of the zipfile, but zipimport doesn't check it anyway. + f = open(TEMP_ZIP, "ab") + f.seek(0,os.SEEK_END) + f.write(comment) + f.close() + sys.path.insert(0, TEMP_ZIP) mod = __import__(".".join(modules), globals(), locals(), @@ -320,6 +330,30 @@ self.doTest(".py", files, TESTMOD, stuff="Some Stuff"*31) + def testImport_WithComment(self): + # try importing from a zipfile which contains a comment + # at the end of the file + files = {TESTMOD + ".py": (NOW, test_src)} + self.doTest(".py", files, TESTMOD, + comment="Some Comment"*31) + + def testImport_WithLargeComment(self): + # try importing from a zipfile which contains a comment + # at the end of the file, where the EOCD signature falls on + # a chunk boundary. Chunk size == 1024 * 16. + files = {TESTMOD + ".py": (NOW, test_src)} + self.doTest(".py", files, TESTMOD, + stuff="X"*(1024*16), + comment="X"*(1024*16 - 20)) + + def testImport_WithStuffAndComment(self): + # try importing from a zipfile which contains additional + # stuff at the beginning of the file and an appended comment. + files = {TESTMOD + ".py": (NOW, test_src)} + self.doTest(".py", files, TESTMOD, + stuff="Some Stuff"*31, + comment="Some Comment"*31) + def assertModuleSource(self, module): self.assertEqual(inspect.getsource(module), test_src)