diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -478,7 +478,7 @@ try: source_path = source_from_cache(bytecode_path) except (NotImplementedError, ValueError): - source_path = bytcode_path[-1:] + source_path = bytecode_path[-1:] return source_path if _path_isfile(source_stats) else bytecode_path @@ -622,7 +622,83 @@ _warnings.warn(msg.format(portions[0]), ImportWarning) return loader +# Helper functions for reading bytecode +def _bytes_from_bytecode(fullname, data, bytecode_path, source_stats=None): + """Return the marshalled bytes from bytecode, verifying the magic + number, timestamp and source size along the way. + If source_stats is None then skip the timestamp check. + + """ + magic = data[:4] + raw_timestamp = data[4:8] + raw_size = data[8:12] + if magic != _MAGIC_BYTES: + msg = 'bad magic number in {!r}: {!r}'.format(fullname, magic) + raise ImportError(msg, name=fullname, path=bytecode_path) + elif len(raw_timestamp) != 4: + message = 'bad timestamp in {}'.format(fullname) + _verbose_message(message) + raise EOFError(message) + elif len(raw_size) != 4: + message = 'bad size in {}'.format(fullname) + _verbose_message(message) + raise EOFError(message) + if source_stats is not None: + try: + source_mtime = int(source_stats['mtime']) + except KeyError: + pass + else: + if _r_long(raw_timestamp) != source_mtime: + message = 'bytecode is stale for {}'.format(fullname) + _verbose_message(message) + raise ImportError(message, name=fullname, + path=bytecode_path) + try: + source_size = source_stats['size'] & 0xFFFFFFFF + except KeyError: + pass + else: + if _r_long(raw_size) != source_size: + raise ImportError( + "bytecode is stale for {}".format(fullname), + name=fullname, path=bytecode_path) + # Can't read the code object yet as errors from marshal loading need to + # propagate even when source is available. + return data[12:] + +def _read_marshalled_code(fullname, data, bytecode_path): + """Read a marshalled code object from binary data""" + codeobj = marshal.loads(data) + if isinstance(codeobj, _code_type): + _verbose_message('code object from {}', + bytecode_path) + return codeobj + raise ImportError("Non-code object in {}".format(bytecode_path), + name=fullname, path=bytecode_path) + +class BytecodeHeaderError(Exception): + """Report an error in parsing the headers in a bytecode container + + Inspect __cause__ for details of the error + """ + +def parse_bytecode_container(fullname, data, bytecode_path=None, + source_path=None, source_stats=None): + """Process data from a bytecode container to produce a code object""" + try: + code_data = _bytes_from_bytecode(fullname, data, + bytecode_path, + source_stats) + except (ImportError, EOFError) as exc: + # We change the exception type so SourceLoader.get_code can ignore + # header processing errors when reading from the cache + raise BytecodeHeaderError from exc + codeobj = _read_marshalled_code(fullname, code_data, bytecode_path) + if source_path is not None: + _imp._fix_co_filename(codeobj, source_path) + return codeobj # Loaders ##################################################################### @@ -801,51 +877,6 @@ tail_name = fullname.rpartition('.')[2] return filename_base == '__init__' and tail_name != '__init__' - def _bytes_from_bytecode(self, fullname, data, bytecode_path, source_stats): - """Return the marshalled bytes from bytecode, verifying the magic - number, timestamp and source size along the way. - - If source_stats is None then skip the timestamp check. - - """ - magic = data[:4] - raw_timestamp = data[4:8] - raw_size = data[8:12] - if magic != _MAGIC_BYTES: - msg = 'bad magic number in {!r}: {!r}'.format(fullname, magic) - raise ImportError(msg, name=fullname, path=bytecode_path) - elif len(raw_timestamp) != 4: - message = 'bad timestamp in {}'.format(fullname) - _verbose_message(message) - raise EOFError(message) - elif len(raw_size) != 4: - message = 'bad size in {}'.format(fullname) - _verbose_message(message) - raise EOFError(message) - if source_stats is not None: - try: - source_mtime = int(source_stats['mtime']) - except KeyError: - pass - else: - if _r_long(raw_timestamp) != source_mtime: - message = 'bytecode is stale for {}'.format(fullname) - _verbose_message(message) - raise ImportError(message, name=fullname, - path=bytecode_path) - try: - source_size = source_stats['size'] & 0xFFFFFFFF - except KeyError: - pass - else: - if _r_long(raw_size) != source_size: - raise ImportError( - "bytecode is stale for {}".format(fullname), - name=fullname, path=bytecode_path) - # Can't return the code object as errors from marshal loading need to - # propagate even when source is available. - return data[12:] - @module_for_loader def _load_module(self, module, *, sourceless=False): """Helper for load_module able to handle either source or sourceless @@ -957,24 +988,16 @@ pass else: try: - bytes_data = self._bytes_from_bytecode(fullname, data, - bytecode_path, - st) - except (ImportError, EOFError): + found = parse_bytecode_container(fullname, data, + bytecode_path, + source_path, st) + except BytecodeHeaderError: pass else: - _verbose_message('{} matches {}', bytecode_path, - source_path) - found = marshal.loads(bytes_data) - if isinstance(found, _code_type): - _imp._fix_co_filename(found, source_path) - _verbose_message('code object from {}', - bytecode_path) - return found - else: - msg = "Non-code object in {}" - raise ImportError(msg.format(bytecode_path), - name=fullname, path=bytecode_path) + _verbose_message('{} matches {}', + bytecode_path, source_path) + return found + source_bytes = self.get_data(source_path) code_object = _call_with_frames_removed(compile, source_bytes, source_path, 'exec', @@ -1092,14 +1115,8 @@ def get_code(self, fullname): path = self.get_filename(fullname) data = self.get_data(path) - bytes_data = self._bytes_from_bytecode(fullname, data, path, None) - found = marshal.loads(bytes_data) - if isinstance(found, _code_type): - _verbose_message('code object from {!r}', path) - return found - else: - raise ImportError("Non-code object in {}".format(path), - name=fullname, path=path) + code_data = _bytes_from_bytecode(fullname, data, path) + return _read_marshalled_code(fullname, code_data, path) def get_source(self, fullname): """Return None as there is no source code."""