diff -r f27af2243e2a Lib/test/test_zipimport.py --- a/Lib/test/test_zipimport.py Fri Sep 20 21:25:53 2013 +0300 +++ b/Lib/test/test_zipimport.py Tue Sep 24 08:33:00 2013 -0700 @@ -1,3 +1,4 @@ +import io import sys import os import marshal @@ -55,6 +56,27 @@ TEMP_ZIP = os.path.abspath("junk95142" + os.extsep + "zip") +def _write_zip_package(zipname, files, + data_to_prepend=b"", compression=ZIP_STORED): + z = ZipFile(zipname, "w") + try: + for name, (mtime, data) in files.items(): + zinfo = ZipInfo(name, time.localtime(mtime)) + zinfo.compress_type = compression + z.writestr(zinfo, data) + finally: + z.close() + + if data_to_prepend: + # Prepend data to the start of the zipfile + with open(zipname, "rb") as f: + zip_data = f.read() + + with open(zipname, "wb") as f: + f.write(data_to_prepend) + f.write(zip_data) + + class UncompressedZipImportTestCase(ImportHooksBaseTestCase): compression = ZIP_STORED @@ -67,26 +89,9 @@ ImportHooksBaseTestCase.setUp(self) def doTest(self, expected_ext, files, *modules, **kw): - z = ZipFile(TEMP_ZIP, "w") + _write_zip_package(TEMP_ZIP, files, data_to_prepend=kw.get("stuff"), + compression=self.compression) try: - for name, (mtime, data) in files.items(): - zinfo = ZipInfo(name, time.localtime(mtime)) - zinfo.compress_type = self.compression - z.writestr(zinfo, data) - z.close() - - stuff = kw.get("stuff", None) - if stuff is not None: - # Prepend 'stuff' to the start of the zipfile - f = open(TEMP_ZIP, "rb") - data = f.read() - f.close() - - f = open(TEMP_ZIP, "wb") - f.write(stuff) - f.write(data) - f.close() - sys.path.insert(0, TEMP_ZIP) mod = __import__(".".join(modules), globals(), locals(), @@ -101,7 +106,6 @@ self.assertEqual(file, os.path.join(TEMP_ZIP, *modules) + expected_ext) finally: - z.close() os.remove(TEMP_ZIP) def testAFakeZlib(self): @@ -387,6 +391,62 @@ compression = ZIP_DEFLATED +class ZipFileModifiedAfterImportTestCase(ImportHooksBaseTestCase): + def setUp(self): + zipimport._zip_directory_cache.clear() + ImportHooksBaseTestCase.setUp(self) + + def tearDown(self): + if os.path.exists(TEMP_ZIP): + os.remove(TEMP_ZIP) + + def testZipFileChangesAfterFirstImport(self): + """Alter the zip file after caching its index and try an import.""" + packdir = TESTPACK + os.sep + files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc), + packdir + TESTMOD + ".py": (NOW, "test_value = 38\n"), + "ziptest_a.py": (NOW, "test_value = 23\n"), + "ziptest_b.py": (NOW, "test_value = 42\n"), + "ziptest_c.py": (NOW, "test_value = 1337\n")} + zipfile_path = TEMP_ZIP + _write_zip_package(zipfile_path, files) + self.assertTrue(os.path.exists(zipfile_path)) + sys.path.insert(0, zipfile_path) + + # Import something out of the zipfile and confirm it is correct. + testmod = __import__(TESTPACK + "." + TESTMOD, + globals(), locals(), ["__dummy__"]) + self.assertEqual(testmod.test_value, 38) + # Import something else out of the zipfile and confirm it is correct. + ziptest_b = __import__("ziptest_b", globals(), locals(), ["test_value"]) + self.assertEqual(ziptest_b.test_value, 42) + + # Truncate and fill the zip file with non-zip garbage. + with io.open(zipfile_path, "rb") as orig_zip_file: + orig_zip_file_contents = orig_zip_file.read() + with io.open(zipfile_path, "wb") as byebye_valid_zip_file: + byebye_valid_zip_file.write(b"Tear down this wall!\n"*1987) + # Now that the zipfile has been replaced, import something else from it + # which should fail as the file contents are now garbage. + with self.assertRaises(ImportError): + ziptest_a = __import__("ziptest_a", globals(), locals(), + ["test_value"]) + + # Now lets make it a valid zipfile that has some garbage at the start. + # This alters all of the offsets within the file + with io.open(zipfile_path, "wb") as new_zip_file: + new_zip_file.write(b"X"*1991) # The year Python was created. + new_zip_file.write(orig_zip_file_contents) + + # Now that the zip file has been "restored" to a valid but different + # zipfile the zipimporter should *successfully* re-read the new zip + # file's end of file central index and be able to import from it again. + ziptest_a = __import__("ziptest_a", globals(), locals(), ["test_value"]) + self.assertEqual(ziptest_a.test_value, 23) + ziptest_c = __import__("ziptest_c", globals(), locals(), ["test_value"]) + self.assertEqual(ziptest_c.test_value, 1337) + + class BadFileZipImportTestCase(unittest.TestCase): def assertZipFailure(self, filename): self.assertRaises(zipimport.ZipImportError, @@ -464,6 +524,7 @@ UncompressedZipImportTestCase, CompressedZipImportTestCase, BadFileZipImportTestCase, + ZipFileModifiedAfterImportTestCase, ) finally: test_support.unlink(TESTMOD) diff -r f27af2243e2a Modules/zipimport.c --- a/Modules/zipimport.c Fri Sep 20 21:25:53 2013 +0300 +++ b/Modules/zipimport.c Tue Sep 24 08:33:00 2013 -0700 @@ -42,10 +42,16 @@ static PyObject *ZipImportError; static PyObject *zip_directory_cache = NULL; +static PyObject *zip_stat_cache = NULL; +/* posix.fstat or nt.fstat function. Used due to posixmodule.c's + * superior fstat implementation over libc's on Windows. */ +static PyObject *fstat_function = NULL; /* posix.fstat() or nt.fstat() */ /* forward decls */ -static PyObject *read_directory(char *archive); -static PyObject *get_data(char *archive, PyObject *toc_entry); +static FILE * fopen_rb_and_stat(char *path, PyObject **py_stat_p); +static FILE * safely_reopen_archive(ZipImporter *self, char **archive_p); +static PyObject *read_directory(char *archive, FILE *fp); +static PyObject *get_data(char *archive, FILE *fp, PyObject *toc_entry); static PyObject *get_module_code(ZipImporter *self, char *fullname, int *p_ispackage, char **p_modpath); @@ -123,15 +129,39 @@ prefix = p; } if (path != NULL) { - PyObject *files; + PyObject *files, *zip_stat = NULL; files = PyDict_GetItemString(zip_directory_cache, path); if (files == NULL) { - files = read_directory(buf); - if (files == NULL) + FILE *fp = fopen_rb_and_stat(buf, &zip_stat); + if (fp == NULL) { + PyErr_Format(ZipImportError, "can't open Zip file: " + "'%.200s'", buf); + Py_XDECREF(zip_stat); return -1; + } + + if (Py_VerboseFlag) + PySys_WriteStderr("# zipimport: %s not cached, " + "reading TOC.\n", path); + + files = read_directory(buf, fp); + fclose(fp); + if (files == NULL) { + Py_XDECREF(zip_stat); + return -1; + } if (PyDict_SetItemString(zip_directory_cache, path, - files) != 0) + files) != 0) { + Py_DECREF(files); + Py_XDECREF(zip_stat); return -1; + } + if (zip_stat && PyDict_SetItemString(zip_stat_cache, path, + zip_stat) != 0) { + Py_DECREF(files); + Py_DECREF(zip_stat); + return -1; + } } else Py_INCREF(files); @@ -419,7 +449,8 @@ zipimporter_get_data(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; - char *path; + char *path, *archive; + FILE *fp; #ifdef ALTSEP char *p, buf[MAXPATHLEN + 1]; #endif @@ -448,12 +479,21 @@ path = path + len + 1; } + fp = safely_reopen_archive(self, &archive); + if (fp == NULL) + return NULL; + toc_entry = PyDict_GetItemString(self->files, path); if (toc_entry == NULL) { PyErr_SetFromErrnoWithFilename(PyExc_IOError, path); + fclose(fp); return NULL; } - return get_data(PyString_AsString(self->archive), toc_entry); + { + PyObject *data = get_data(archive, fp, toc_entry); + fclose(fp); + return data; + } } static PyObject * @@ -473,7 +513,8 @@ { ZipImporter *self = (ZipImporter *)obj; PyObject *toc_entry; - char *fullname, *subname, path[MAXPATHLEN+1]; + FILE *fp; + char *fullname, *subname, path[MAXPATHLEN+1], *archive; int len; enum zi_module_info mi; @@ -501,13 +542,20 @@ else strcpy(path + len, ".py"); + fp = safely_reopen_archive(self, &archive); + if (fp == NULL) + return NULL; + toc_entry = PyDict_GetItemString(self->files, path); - if (toc_entry != NULL) - return get_data(PyString_AsString(self->archive), toc_entry); + if (toc_entry != NULL) { + PyObject *data = get_data(archive, fp, toc_entry); + fclose(fp); + return data; + } + fclose(fp); /* we have the module, but no source */ - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } PyDoc_STRVAR(doc_find_module, @@ -662,7 +710,142 @@ } /* - read_directory(archive) -> files dict (new reference) + fopen_rb_and_stat(path, &py_stat) -> FILE * + + Opens path in "rb" mode and populates the Python py_stat stat_result + with information about the opened file. *py_stat may be filled with + NULL if there is no fstat_function or if it fails. + + Returns NULL and sets *py_stat to NULL if the open failed. +*/ +static FILE * +fopen_rb_and_stat(char *path, PyObject **py_stat_p) +{ + FILE *fp; + assert(py_stat_p != NULL); + + fp = fopen(path, "rb"); + if (fp == NULL) { + *py_stat_p = NULL; + return NULL; + } + + if (fstat_function) { + *py_stat_p = PyObject_CallFunction(fstat_function, + "i", fileno(fp)); + if (*py_stat_p == NULL) { + PyErr_Clear(); /* We can function without it. */ + } + + } else { + *py_stat_p = NULL; + } + + return fp; +} + +/* Return 1 if objects a and b fail a Py_EQ test for an attr. */ +static int +compare_obj_attr_strings(PyObject *obj_a, PyObject *obj_b, char *attr_name) +{ + int problem = 0; + PyObject *attr_a = PyObject_GetAttrString(obj_a, attr_name); + PyObject *attr_b = PyObject_GetAttrString(obj_b, attr_name); + if (attr_a == NULL) + problem = 1; + else if (attr_b == NULL) + problem = 1; + else + problem = (PyObject_RichCompareBool(attr_a, attr_b, Py_EQ) != 1); + Py_XDECREF(attr_a); + Py_XDECREF(attr_b); + return problem; +} + +/* + * Returns an open FILE * on success and sets *archive_p to point to + * a read only C string representation of the archive name (as a + * convenience for use in error messages). + * + * Returns NULL on error with the Python error context set. + */ +static FILE * +safely_reopen_archive(ZipImporter *self, char **archive_p) +{ + FILE *fp; + PyObject *stat_now = NULL; + char *archive; + + assert(archive_p != NULL); + *archive_p = PyString_AsString(self->archive); + if (*archive_p == NULL) + return NULL; + archive = *archive_p; + + fp = fopen_rb_and_stat(archive, &stat_now); + if (!fp) { + PyErr_Format(PyExc_IOError, + "zipimport: can not open file %s", archive); + Py_XDECREF(stat_now); + return NULL; + } + + if (stat_now != NULL) { + int problem = 0; + PyObject *files; + PyObject *prev_stat = PyDict_GetItemString(zip_stat_cache, archive); + /* Test stat_now vs the old cached stat on some key attributes. */ + if (prev_stat != NULL) { + problem = compare_obj_attr_strings(prev_stat, stat_now, + "st_ino"); + problem |= compare_obj_attr_strings(prev_stat, stat_now, + "st_size"); + problem |= compare_obj_attr_strings(prev_stat, stat_now, + "st_mtime"); + } else { + if (Py_VerboseFlag) + PySys_WriteStderr("# zipimport: no stat data for %s!\n", + archive); + problem = 1; + } + + if (problem) { + if (Py_VerboseFlag) + PySys_WriteStderr("# zipimport: %s modified since last" + " import, rereading TOC.\n", archive); + files = read_directory(archive, fp); + if (files == NULL) { + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + if (PyDict_SetItem(zip_directory_cache, self->archive, + files) != 0) { + Py_DECREF(files); + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + if (stat_now && PyDict_SetItem(zip_stat_cache, self->archive, + stat_now) != 0) { + Py_DECREF(files); + Py_DECREF(stat_now); + fclose(fp); + return NULL; + } + Py_XDECREF(self->files); /* free the old value. */ + self->files = files; + } else { + /* No problem, discard the new stat data. */ + Py_DECREF(stat_now); + } + } /* stat succeeded */ + + return fp; +} + +/* + read_directory(archive, fp) -> files dict (new reference) Given a path to a Zip archive, build a dict, mapping file names (local to the archive, using SEP as a separator) to toc entries. @@ -683,10 +866,9 @@ data_size and file_offset are 0. */ static PyObject * -read_directory(char *archive) +read_directory(char *archive, FILE *fp) { PyObject *files = NULL; - FILE *fp; long compress, crc, data_size, file_size, file_offset, date, time; long header_offset, name_size, header_size, header_position; long i, l, count; @@ -696,6 +878,7 @@ char *p, endof_central_dir[22]; long arc_offset; /* offset from beginning of file to start of zip-archive */ + assert(fp != NULL); if (strlen(archive) > MAXPATHLEN) { PyErr_SetString(PyExc_OverflowError, "Zip path name is too long"); @@ -703,28 +886,18 @@ } strcpy(path, archive); - fp = fopen(archive, "rb"); - if (fp == NULL) { - PyErr_Format(ZipImportError, "can't open Zip file: " - "'%.200s'", archive); - return NULL; - } - if (fseek(fp, -22, SEEK_END) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %s", archive); return NULL; } header_position = ftell(fp); if (fread(endof_central_dir, 1, 22, fp) != 22) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: " "'%.200s'", archive); return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { /* Bad: End of Central Dir signature */ - fclose(fp); PyErr_Format(ZipImportError, "not a Zip file: " "'%.200s'", archive); return NULL; @@ -793,18 +966,15 @@ goto error; count++; } - fclose(fp); if (Py_VerboseFlag) PySys_WriteStderr("# zipimport: found %ld names in %s\n", count, archive); return files; fseek_error: - fclose(fp); Py_XDECREF(files); PyErr_Format(ZipImportError, "can't read Zip file: %s", archive); return NULL; error: - fclose(fp); Py_XDECREF(files); return NULL; } @@ -841,14 +1011,13 @@ return decompress; } -/* Given a path to a Zip file and a toc_entry, return the (uncompressed) +/* Given a FILE* to a Zip file and a toc_entry, return the (uncompressed) data as a new reference. */ static PyObject * -get_data(char *archive, PyObject *toc_entry) +get_data(char *archive, FILE *fp, PyObject *toc_entry) { PyObject *raw_data, *data = NULL, *decompress; char *buf; - FILE *fp; int err; Py_ssize_t bytes_read = 0; long l; @@ -862,16 +1031,8 @@ return NULL; } - fp = fopen(archive, "rb"); - if (!fp) { - PyErr_Format(PyExc_IOError, - "zipimport: can not open file %s", archive); - return NULL; - } - /* Check to make sure the local file header is correct */ if (fseek(fp, file_offset, 0) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %s", archive); return NULL; } @@ -882,11 +1043,9 @@ PyErr_Format(ZipImportError, "bad local file header in %s", archive); - fclose(fp); return NULL; } if (fseek(fp, file_offset + 26, 0) == -1) { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %s", archive); return NULL; } @@ -898,7 +1057,6 @@ raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ? data_size : data_size + 1); if (raw_data == NULL) { - fclose(fp); return NULL; } buf = PyString_AsString(raw_data); @@ -907,11 +1065,9 @@ if (err == 0) { bytes_read = fread(buf, 1, data_size, fp); } else { - fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: %s", archive); return NULL; } - fclose(fp); if (err || bytes_read != data_size) { PyErr_SetString(PyExc_IOError, "zipimport: can't read data"); @@ -1107,17 +1263,13 @@ /* Return the code object for the module named by 'fullname' from the Zip archive as a new reference. */ static PyObject * -get_code_from_data(ZipImporter *self, int ispackage, int isbytecode, - time_t mtime, PyObject *toc_entry) +get_code_from_data(char *archive, FILE *fp, int ispackage, + int isbytecode, time_t mtime, PyObject *toc_entry) { PyObject *data, *code; char *modpath; - char *archive = PyString_AsString(self->archive); - if (archive == NULL) - return NULL; - - data = get_data(archive, toc_entry); + data = get_data(archive, fp, toc_entry); if (data == NULL) return NULL; @@ -1152,12 +1304,19 @@ for (zso = zip_searchorder; *zso->suffix; zso++) { PyObject *code = NULL; + FILE *fp; + char *archive; strcpy(path + len, zso->suffix); if (Py_VerboseFlag > 1) PySys_WriteStderr("# trying %s%c%s\n", PyString_AsString(self->archive), SEP, path); + + fp = safely_reopen_archive(self, &archive); + if (fp == NULL) + return NULL; + toc_entry = PyDict_GetItemString(self->files, path); if (toc_entry != NULL) { time_t mtime = 0; @@ -1168,7 +1327,7 @@ mtime = get_mtime_of_source(self, path); if (p_ispackage != NULL) *p_ispackage = ispackage; - code = get_code_from_data(self, ispackage, + code = get_code_from_data(archive, fp, ispackage, isbytecode, mtime, toc_entry); if (code == Py_None) { @@ -1180,8 +1339,10 @@ if (code != NULL && p_modpath != NULL) *p_modpath = PyString_AsString( PyTuple_GetItem(toc_entry, 0)); + fclose(fp); return code; } + fclose(fp); } PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname); return NULL; @@ -1199,6 +1360,8 @@ subclass of ImportError, so it can be caught as ImportError, too.\n\ - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\ info dicts, as used in zipimporter._files.\n\ +- _zip_stat_cache: a dict, mapping archive paths to stat_result\n\ + info for the .zip the last time anything was imported from it.\n\ \n\ It is usually not needed to use the zipimport module explicitly; it is\n\ used by the builtin import mechanism for sys.path items that are paths\n\ @@ -1254,4 +1417,29 @@ if (PyModule_AddObject(mod, "_zip_directory_cache", zip_directory_cache) < 0) return; + + zip_stat_cache = PyDict_New(); + if (zip_stat_cache == NULL) + return; + Py_INCREF(zip_stat_cache); + if (PyModule_AddObject(mod, "_zip_stat_cache", zip_stat_cache) < 0) + return; + + { + /* We cannot import "os" here as that is a .py/.pyc file that could + * live within a zipped up standard library. Import the posix or nt + * builtin that provides the fstat() function we want instead. */ + PyObject *os_like_module; + os_like_module = PyImport_ImportModule("posix"); + if (os_like_module == NULL) { + os_like_module = PyImport_ImportModule("nt"); + } + if (os_like_module != NULL) { + fstat_function = PyObject_GetAttrString(os_like_module, "fstat"); + Py_DECREF(os_like_module); + } + if (fstat_function == NULL) { + PyErr_Clear(); /* non-fatal, we'll go on without it. */ + } + } }