diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -331,7 +331,7 @@ class _LoaderBasics: filename = self.get_filename(fullname).rpartition(path_sep)[2] return filename.rsplit('.', 1)[0] == '__init__' - def _bytes_from_bytecode(self, fullname, data, source_mtime): + def _bytes_from_bytecode(self, fullname, data, source_stat): """Return the marshalled bytes from bytecode, verifying the magic number and timestamp along the way. @@ -340,16 +340,30 @@ class _LoaderBasics: """ magic = data[:4] raw_timestamp = data[4:8] + raw_size = data[8:12] if len(magic) != 4 or magic != imp.get_magic(): raise ImportError("bad magic number in {}".format(fullname)) elif len(raw_timestamp) != 4: raise EOFError("bad timestamp in {}".format(fullname)) - elif source_mtime is not None: + elif len(raw_size) != 4: + raise EOFError("bad size in {}".format(fullname)) + try: + source_mtime = int(source_stat.st_mtime) + except AttributeError: + pass + else: if marshal._r_long(raw_timestamp) != source_mtime: raise ImportError("bytecode is stale for {}".format(fullname)) + try: + source_size = source_stat.st_size & 0xFFFFFFFF + except AttributeError: + pass + else: + if marshal._r_long(raw_size) != source_size: + raise ImportError("bytecode is stale for {}".format(fullname)) # Can't return the code object as errors from marshal loading need to # propagate even when source is available. - return data[8:] + return data[12:] @module_for_loader def _load_module(self, module, *, sourceless=False): @@ -374,8 +388,8 @@ class _LoaderBasics: class SourceLoader(_LoaderBasics): - def path_mtime(self, path): - """Optional method that returns the modification time (an int) for the + def path_stats(self, path): + """Optional method that returns the stat() results for the specified path, where path is a str. Implementing this method allows the loader to read bytecode files. @@ -407,7 +421,7 @@ class SourceLoader(_LoaderBasics): def get_code(self, fullname): """Concrete implementation of InspectLoader.get_code. - Reading of bytecode requires path_mtime to be implemented. To write + Reading of bytecode requires path_stats to be implemented. To write bytecode, set_data must also be implemented. """ @@ -416,10 +430,11 @@ class SourceLoader(_LoaderBasics): source_mtime = None if bytecode_path is not None: try: - source_mtime = self.path_mtime(source_path) + st = self.path_stats(source_path) except NotImplementedError: pass else: + source_mtime = int(st.st_mtime) try: data = self.get_data(bytecode_path) except IOError: @@ -427,7 +442,7 @@ class SourceLoader(_LoaderBasics): else: try: bytes_data = self._bytes_from_bytecode(fullname, data, - source_mtime) + st) except (ImportError, EOFError): pass else: @@ -448,6 +463,7 @@ class SourceLoader(_LoaderBasics): # throw an exception. data = bytearray(imp.get_magic()) data.extend(marshal._w_long(source_mtime)) + data.extend(marshal._w_long(len(source_bytes))) data.extend(marshal.dumps(code_object)) try: self.set_data(bytecode_path, data) @@ -492,9 +508,9 @@ class _SourceFileLoader(_FileLoader, Sou """Concrete implementation of SourceLoader using the file system.""" - def path_mtime(self, path): + def path_stats(self, path): """Return the modification time for the path.""" - return int(_os.stat(path).st_mtime) + return _os.stat(path) def set_data(self, path, data): """Write bytes data to a file.""" diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py --- a/Lib/importlib/abc.py +++ b/Lib/importlib/abc.py @@ -121,8 +121,9 @@ class SourceLoader(_bootstrap.SourceLoad """ - def path_mtime(self, path): - """Return the (int) modification time for the path (str).""" + def path_stats(self, path): + """Return a os.stat-like structure for the path (str). + At least st_mtime and st_size should be filled.""" raise NotImplementedError def set_data(self, path, data): diff --git a/Lib/importlib/test/source/test_abc_loader.py b/Lib/importlib/test/source/test_abc_loader.py --- a/Lib/importlib/test/source/test_abc_loader.py +++ b/Lib/importlib/test/source/test_abc_loader.py @@ -5,6 +5,7 @@ from .. import abc as testing_abc from .. import util from . import util as source_util +import collections import imp import inspect import io @@ -16,6 +17,9 @@ import unittest import warnings +MockStatResult = collections.namedtuple('MockStatResult', 'st_mtime st_size') + + class SourceOnlyLoaderMock(abc.SourceLoader): # Globals that should be defined for all modules. @@ -40,8 +44,10 @@ class SourceLoaderMock(SourceOnlyLoaderM def __init__(self, path, magic=imp.get_magic()): super().__init__(path) self.bytecode_path = imp.cache_from_source(self.path) + self.source_size = len(self.source) data = bytearray(magic) data.extend(marshal._w_long(self.source_mtime)) + data.extend(marshal._w_long(self.source_size)) code_object = compile(self.source, self.path, 'exec', dont_inherit=True) data.extend(marshal.dumps(code_object)) @@ -56,9 +62,9 @@ class SourceLoaderMock(SourceOnlyLoaderM else: raise IOError - def path_mtime(self, path): + def path_stats(self, path): assert path == self.path - return self.source_mtime + return MockStatResult(st_mtime=self.source_mtime, st_size=self.source_size) def set_data(self, path, data): self.written[path] = bytes(data) @@ -657,6 +663,7 @@ class SourceLoaderBytecodeTests(SourceLo self.assertIn(self.cached, self.loader.written) data = bytearray(imp.get_magic()) data.extend(marshal._w_long(self.loader.source_mtime)) + data.extend(marshal._w_long(self.loader.source_size)) data.extend(marshal.dumps(code_object)) self.assertEqual(self.loader.written[self.cached], bytes(data)) @@ -847,7 +854,7 @@ class AbstractMethodImplTests(unittest.T # Required abstractmethods. self.raises_NotImplementedError(ins, 'get_filename', 'get_data') # Optional abstractmethods. - self.raises_NotImplementedError(ins,'path_mtime', 'set_data') + self.raises_NotImplementedError(ins,'path_stats', 'set_data') def test_PyLoader(self): self.raises_NotImplementedError(self.PyLoader(), 'source_path', diff --git a/Lib/importlib/test/source/test_file_loader.py b/Lib/importlib/test/source/test_file_loader.py --- a/Lib/importlib/test/source/test_file_loader.py +++ b/Lib/importlib/test/source/test_file_loader.py @@ -70,11 +70,6 @@ class SimpleTest(unittest.TestCase): module_dict_id = id(module.__dict__) with open(mapping['_temp'], 'w') as file: file.write("testing_var = 42\n") - # For filesystems where the mtime is only to a second granularity, - # everything that has happened above can be too fast; - # force an mtime on the source that is guaranteed to be different - # than the original mtime. - loader.path_mtime = self.fake_mtime(loader.path_mtime) module = loader.load_module('_temp') self.assertTrue('testing_var' in module.__dict__, "'testing_var' not in " @@ -190,10 +185,17 @@ class BadBytecodeTest(unittest.TestCase) del_source=del_source) test('_temp', mapping, bc_path) + def _test_partial_size(self, test, *, del_source=False): + with source_util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:11], + del_source=del_source) + test('_temp', mapping, bc_path) + def _test_no_marshal(self, *, del_source=False): with source_util.create_modules('_temp') as mapping: bc_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:8], + lambda bc: bc[:12], del_source=del_source) file_path = mapping['_temp'] if not del_source else bc_path with self.assertRaises(EOFError): @@ -202,7 +204,7 @@ class BadBytecodeTest(unittest.TestCase) def _test_non_code_marshal(self, *, del_source=False): with source_util.create_modules('_temp') as mapping: bytecode_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:8] + marshal.dumps(b'abcd'), + lambda bc: bc[:12] + marshal.dumps(b'abcd'), del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path with self.assertRaises(ImportError): @@ -211,7 +213,7 @@ class BadBytecodeTest(unittest.TestCase) def _test_bad_marshal(self, *, del_source=False): with source_util.create_modules('_temp') as mapping: bytecode_path = self.manipulate_bytecode('_temp', mapping, - lambda bc: bc[:8] + b'', + lambda bc: bc[:12] + b'', del_source=del_source) file_path = mapping['_temp'] if not del_source else bytecode_path with self.assertRaises(EOFError): @@ -235,7 +237,7 @@ class SourceLoaderBadBytecodeTest(BadByt def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) self._test_empty_file(test) @@ -243,7 +245,7 @@ class SourceLoaderBadBytecodeTest(BadByt def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) self._test_partial_magic(test) @@ -254,7 +256,9 @@ class SourceLoaderBadBytecodeTest(BadByt def test(name, mapping, bytecode_path): self.import_(mapping[name], name) with open(bytecode_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) + + self._test_magic_only(test) @source_util.writes_bytecode_files def test_bad_magic(self): @@ -274,7 +278,20 @@ class SourceLoaderBadBytecodeTest(BadByt def test(name, mapping, bc_path): self.import_(mapping[name], name) with open(bc_path, 'rb') as file: - self.assertGreater(len(file.read()), 8) + self.assertGreater(len(file.read()), 12) + + self._test_partial_timestamp(test) + + @source_util.writes_bytecode_files + def test_partial_size(self): + # When the size is partial, regenerate the .pyc, else + # raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 12) + + self._test_partial_size(test) @source_util.writes_bytecode_files def test_no_marshal(self): @@ -371,6 +388,13 @@ class SourcelessLoaderBadBytecodeTest(Ba self._test_partial_timestamp(test, del_source=True) + def test_partial_size(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_size(test, del_source=True) + def test_no_marshal(self): self._test_no_marshal(del_source=True) diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -21,7 +21,7 @@ def read_code(stream): if magic != imp.get_magic(): return None - stream.read(4) # Skip timestamp + stream.read(8) # Skip timestamp and size return marshal.load(stream) diff --git a/Lib/py_compile.py b/Lib/py_compile.py --- a/Lib/py_compile.py +++ b/Lib/py_compile.py @@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None """ with tokenize.open(file) as f: try: - timestamp = int(os.fstat(f.fileno()).st_mtime) + st = os.fstat(f.fileno()) except AttributeError: - timestamp = int(os.stat(file).st_mtime) + st = os.stat(file) + timestamp = int(st.st_mtime) + size = st.st_size & 0xFFFFFFFF codestring = f.read() try: codeobject = builtins.compile(codestring, dfile or file, 'exec', @@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None with open(cfile, 'wb') as fc: fc.write(b'\0\0\0\0') wr_long(fc, timestamp) + wr_long(fc, size) marshal.dump(codeobject, fc) fc.flush() fc.seek(0, 0) diff --git a/Lib/test/test_import.py b/Lib/test/test_import.py --- a/Lib/test/test_import.py +++ b/Lib/test/test_import.py @@ -380,7 +380,7 @@ func_filename = func.__code__.co_filenam def test_foreign_code(self): py_compile.compile(self.file_name) with open(self.compiled_name, "rb") as f: - header = f.read(8) + header = f.read(12) code = marshal.load(f) constants = list(code.co_consts) foreign_code = test_main.__code__ @@ -644,6 +644,16 @@ class PycacheTests(unittest.TestCase): self.assertEqual(sys.modules['pep3147.foo'].__cached__, os.path.join(os.curdir, foo_pyc)) + def test_recompute_pyc_same_second(self): + # Even when the source file doesn't change timestamp, a change in + # source size is enough to trigger recomputation of the pyc file. + __import__(TESTFN) + unload(TESTFN) + with open(self.source, 'a') as fp: + print("x = 5", file=fp) + m = __import__(TESTFN) + self.assertEqual(m.x, 5) + class RelativeImportFromImportlibTests(test_relative_imports.RelativeImports): diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -19,7 +19,7 @@ import io from traceback import extract_tb, extract_stack, print_tb raise_src = 'def do_raise(): raise TypeError\n' -def make_pyc(co, mtime): +def make_pyc(co, mtime, size): data = marshal.dumps(co) if type(mtime) is type(0.0): # Mac mtimes need a bit of special casing @@ -27,14 +27,14 @@ def make_pyc(co, mtime): mtime = int(mtime) else: mtime = int(-0x100000000 + int(mtime)) - pyc = imp.get_magic() + struct.pack("", "exec"), NOW) + pyc = make_pyc(compile(src, "", "exec"), NOW, len(src)) files = {TESTMOD + pyc_ext: (NOW, pyc), "some.data": (NOW, "some data")} self.doTest(pyc_ext, files, TESTMOD) @@ -313,7 +313,7 @@ class UncompressedZipImportTestCase(Impo self.doTest(".py", files, TESTMOD, call=self.assertModuleSource) def testGetCompiledSource(self): - pyc = make_pyc(compile(test_src, "", "exec"), NOW) + pyc = make_pyc(compile(test_src, "", "exec"), NOW, len(test_src)) files = {TESTMOD + ".py": (NOW, test_src), TESTMOD + pyc_ext: (NOW, pyc)} self.doTest(pyc_ext, files, TESTMOD, call=self.assertModuleSource) diff --git a/Modules/zipimport.c b/Modules/zipimport.c --- a/Modules/zipimport.c +++ b/Modules/zipimport.c @@ -1033,7 +1033,9 @@ unmarshal_code(PyObject *pathname, PyObj return Py_None; /* signal caller to try alternative */ } - code = PyMarshal_ReadObjectFromString(buf + 8, size - 8); + /* XXX the pyc's size field is ignored; timestamp collisions are probably + unimportant with zip files. */ + code = PyMarshal_ReadObjectFromString(buf + 12, size - 12); if (code == NULL) return NULL; if (!PyCode_Check(code)) { diff --git a/Python/import.c b/Python/import.c --- a/Python/import.c +++ b/Python/import.c @@ -104,6 +104,7 @@ typedef unsigned short mode_t; Python 3.2a2 3180 (add DELETE_DEREF) Python 3.3a0 3190 __class__ super closure changed Python 3.3a0 3200 (__qualname__ added) + 3210 (added size modulo 2**32 to the pyc header) */ /* MAGIC must change whenever the bytecode emitted by the compiler may no @@ -116,7 +117,7 @@ typedef unsigned short mode_t; #define STRIFY(name) QUOTE(name) #define MAJOR STRIFY(PY_MAJOR_VERSION) #define MINOR STRIFY(PY_MINOR_VERSION) -#define MAGIC (3200 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (3210 | ((long)'\r'<<16) | ((long)'\n'<<24)) #define TAG "cpython-" MAJOR MINOR; #define CACHEDIR "__pycache__" /* Current magic word and string tag as globals. */ @@ -1071,11 +1072,12 @@ make_source_pathname(PyObject *path) Doesn't set an exception. */ static FILE * -check_compiled_module(PyObject *pathname, time_t mtime, PyObject *cpathname) +check_compiled_module(PyObject *pathname, struct stat *srcstat, PyObject *cpathname) { FILE *fp; long magic; long pyc_mtime; + long pyc_size; fp = _Py_fopen(cpathname, "rb"); if (fp == NULL) @@ -1088,12 +1090,19 @@ check_compiled_module(PyObject *pathname return NULL; } pyc_mtime = PyMarshal_ReadLongFromFile(fp); - if (pyc_mtime != mtime) { + if (pyc_mtime != srcstat->st_mtime) { if (Py_VerboseFlag) PySys_FormatStderr("# %R has bad mtime\n", cpathname); fclose(fp); return NULL; } + pyc_size = PyMarshal_ReadLongFromFile(fp); + if (pyc_size != (srcstat->st_size & 0xFFFFFFFF)) { + if (Py_VerboseFlag) + PySys_FormatStderr("# %R has bad size\n", cpathname); + fclose(fp); + return NULL; + } if (Py_VerboseFlag) PySys_FormatStderr("# %R matches %R\n", cpathname, pathname); return fp; @@ -1136,6 +1145,8 @@ load_compiled_module(PyObject *name, PyO "Bad magic number in %R", cpathname); return NULL; } + /* Skip mtime and size */ + (void) PyMarshal_ReadLongFromFile(fp); (void) PyMarshal_ReadLongFromFile(fp); co = read_compiled_module(cpathname, fp); if (co == NULL) @@ -1196,6 +1207,7 @@ write_compiled_module(PyCodeObject *co, Py_UCS4 *cpathname_ucs4; FILE *fp; time_t mtime = srcstat->st_mtime; + long size = srcstat->st_size & 0xFFFFFFFF; PyObject *cpathname_tmp; #ifdef MS_WINDOWS /* since Windows uses different permissions */ mode_t mode = srcstat->st_mode & ~S_IEXEC; @@ -1326,14 +1338,16 @@ write_compiled_module(PyCodeObject *co, return; } PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); - /* First write a 0 for mtime */ + /* First write a 0 for mtime and size */ + PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION); fflush(fp); - /* Now write the true mtime */ + /* Now write the true mtime and size */ fseek(fp, 4L, 0); assert(mtime < LONG_MAX); PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION); + PyMarshal_WriteLongToFile(size, fp, Py_MARSHAL_VERSION); if (fflush(fp) != 0 || ferror(fp)) { if (Py_VerboseFlag) PySys_FormatStderr("# can't write %R\n", cpathname); @@ -1478,7 +1492,7 @@ load_source_module(PyObject *name, PyObj cpathname = make_compiled_pathname(pathname, !Py_OptimizeFlag); if (cpathname != NULL) - fpc = check_compiled_module(pathname, st.st_mtime, cpathname); + fpc = check_compiled_module(pathname, &st, cpathname); else fpc = NULL; diff --git a/Python/pythonrun.c b/Python/pythonrun.c --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1844,6 +1844,8 @@ run_pyc_file(FILE *fp, const char *filen "Bad magic number in .pyc file"); return NULL; } + /* Skip mtime and size */ + (void) PyMarshal_ReadLongFromFile(fp); (void) PyMarshal_ReadLongFromFile(fp); v = PyMarshal_ReadLastObjectFromFile(fp); fclose(fp);