# HG changeset patch # Parent cb911e1fb3dc936553da29917f9ce0eb1a20bb7c Implement PEP 441 - Improving Python ZIP Application Support diff -r cb911e1fb3dc Doc/library/distribution.rst --- a/Doc/library/distribution.rst Wed Feb 25 18:14:47 2015 +0200 +++ b/Doc/library/distribution.rst Thu Feb 26 16:38:09 2015 +0000 @@ -12,3 +12,4 @@ distutils.rst ensurepip.rst venv.rst + zipapp.rst diff -r cb911e1fb3dc Doc/library/zipapp.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Doc/library/zipapp.rst Thu Feb 26 16:38:09 2015 +0000 @@ -0,0 +1,255 @@ +:mod:`zipapp` --- Manage executable python zip archives +======================================================= + +.. module:: zipapp + :synopsis: Manage executable python zip archives + + +.. index:: + single: Executable Zip Files + +.. versionadded:: 3.5 + +**Source code:** :source:`Lib/zipapp.py` + +-------------- + +This module provides tools to manage the creation of zip files containing +Python code, which can be executed directly by the Python interpreter (see +:ref:`using-on-interface-options`). The module provides both a +:ref:`zipapp-command-line-interface` and a :ref:`zipapp-python-api`. + + +Basic Example +------------- + +The following example shows how the :ref:`command-line-interface` +can be used to create an executable archive from a directory containing +Python code. When run, the archive will execute the "main" function from +the module "myapp" in the archive. + +.. code-block:: sh + + $ python -m zipapp myapp -m "myapp:main" + $ python myapp.pyz + + + +.. _zipapp-command-line-interface: + +Command-Line Interface +---------------------- + +When called as a program from the command line, the following form is used:: + + python -m zipapp source [options] + +If SOURCE is a directory, this will create an archive from the contents of +SOURCE. If SOURCE is a file, it should be an archive, and it will be copied to +the target archive (or the contents of its shebang line will be displayed if the +--info option is specified). + +The following options are understood: + +.. program:: zipapp + +.. cmdoption:: -o OUTPUT, --output=OUTPUT + + Write the output to a file named OUTPUT. If this option is not specified, + the output filename will be the same as the input SOURCE, with the extension + ".pyz" added. If an explicit filename is given, it is used as is (so a + ".pyz" extension should be included if required). + + An output filename must be specified if the SOURCE is an archive (and in that + case, OUTPUT must not be the same as SOURCE). + +.. cmdoption:: -p INTERPRETER, --python=INTERPRETER + + Add a ``#!`` line to the archive specifying INTERPRETER as the command + to run. Also, on POSIX, make the archive executable. The default is to + write no ``#!`` line, and not make the file executable. + +.. cmdoption:: -m MAINFN, --main=MAINFN + + Write a ``__main__.py`` file to the archive that executes MAINFN. The + MAINFN argument should have the form "pkg.mod:fn", where "pkg.mod" is a + package/module in the archive, and "fn" is a callable in the given + module. The ``__main__.py`` file will execute that callable. + + --main cannot be specified when copying an archive. + +.. cmdoption:: --info + + Display the interpreter embedded in the archive, for diagnostic purposes. In + this case, any other options are ignored and SOURCE must be an archive, not a + directory. + +.. cmdoption:: -h, --help + + print a short usage message and exit + + +.. _zipapp-python-api: + +Python API +---------- + +The module defines two convenience functions: + + +.. function:: create_archive(directory, target=None, interpreter=None, main=None) + + Create an application archive from *source*. The source can be any + of the following: + + * The name of a directory, in which case a new application archive + will be created from the content of that directory. + * The name of an existing application archive file, in which case the file is + copied to the target (modifying it to reflect the value given for the + *interpreter* argument). The file name should include the ``.pyz`` + extension, if required. + * A file object open for reading in bytes mode. The content of the + file should be an application archive, and the file object is + assumed to be positioned at the start of the archive. + + The *target* argument determines where the resulting archive will be + written: + + * If it is the name of a file, the archive will be written to that + file. + * If it is an open file object, the archive will be written to that + file object, which must be open for writing in bytes mode. + * If the target is omitted (or None), the source must be a directory + and the target will be a file with the same name as the source, with + a ``.pyz`` extension added. + + The *interpreter* argument specifies the name of the Python + interpreter with which the archive will be executed. It is written as + a "shebang" line at the start of the archive. On POSIX, this will be + interpreted by the OS, and on Windows it will be handled by the Python + launcher. Omitting the *interpreter* results in no shebang line being + written. If an interpreter is specified, and the target is a + filename, the executable bit of the target file will be set. + + The *main* argument specifies the name of a callable which will be + used as the main program for the archive. It can only be specified if + the source is a directory, and the source does not already contain a + ``__main__.py`` file. The *main* argument should take the form + "pkg.module:callable" and the archive will be run by importing + "pkg.module" and executing the given callable with no arguments. It + is an error to omit *main* if the source is a directory and does not + contain a ``__main__.py`` file, as otherwise the resulting archive + would not be executable. + + If a file object is specified for *source* or *target*, it is the + caller's responsibility to close it after calling create_archive. + + When copying an existing archive, file objects supplied only need + ``read`` and ``readline``, or ``write`` methods. When creating an + archive from a directory, if the target is a file object it will be + passed to the ``zipfile.ZipFile`` class, and must supply the methods + needed by that class. + +.. function:: get_interpreter(archive) + + Return the interpreter specified in the ``#!`` line at the start of the + archive. If there is no ``#!`` line, return :const:`None`. + The *archive* argument can be a filename or a file-like object open + for reading in bytes mode. It is assumed to be at the start of the archive. + + +.. _zipapp-examples: + +Examples +-------- + +Pack up a directory into an archive, and run it. + +.. code-block:: sh + + $ python -m zipapp myapp + $ python myapp.pyz + + +The same can be done using the :func:`create_archive` functon:: + + >>> import zipapp + >>> zipapp.create_archive('myapp.pyz', 'myapp') + +To make the application directly executable on POSIX, specify an interpreter +to use. + +.. code-block:: sh + + $ python -m zipapp myapp -p "/usr/bin/env python" + $ ./myapp.pyz + + +To replace the shebang line on an existing archive, create a modified archive +using the :func:`create_archive` function:: + + >>> import zipapp + >>> zipapp.create_archive('old_archive.pyz', 'new_archive.pyz', '/usr/bin/python3') + +To update the file in place, do the replacement in memory using a :class:`BytesIO` +object, and then overwrite the source afterwards. Note that there is a risk +when overwriting a file in place that an error will result in the loss of +the original file. This code does not protect against such errors, but +production code should do so. Also, this method will only work if the archive +fits in memory:: + + >>> import zipapp + >>> import io + >>> temp = io.BytesIO() + >>> zipapp.create_archive('myapp.pyz', temp, '/usr/bin/python2') + >>> with open('myapp.pyz', 'wb') as f: + >>> f.write(temp.getvalue()) + +Note that if you specify an interpreter and then distribute your application +archive, you need to ensure that the interpreter used is portable. The Python +launcher for Windows supports most common forms of POSIX ``#!`` line, but there +are other issues to consider: + +* If you use "/usr/bin/env python" (or other forms of the "python" command, + such as "/usr/bin/python"), you need to consider that your users may have + either Python 2 or Python 3 as their default, and write your code to work + under both versions. +* If you use an explicit version, for example "/usr/bin/env python3" your + application will not work for users who do not have that version. (This + may be what you want if you have not made your code Python 2 compatible). +* There is no way to say "python X.Y or later", so be careful of using an + exact version like "/usr/bin/env python3.4" as you will need to change your + shebang line for users of Python 3.5, for example. + +The Python Zip Application Archive Format +----------------------------------------- + +Python has been able to execute zip files which contain a ``__main__.py`` file +since version 2.6. In order to be executed by Python, an application archive +simply has to be a standard zip file containing a ``__main__.py`` file which +will be run as the entry point for the application. As usual for any Python +script, the parent of the script (in this case the zip file) will be placed on +:data:`sys.path` and thus further modules can be imported from the zip file. + +The zip file format allows arbitrary data to be prepended to a zip file. The +zip application format uses this ability to prepend a standard POSIX "shebang" +line to the file (``#!/path/to/interpreter``). + +Formally, the Python zip application format is therefore: + +1. An optional shebang line, containing the characters ``b'#!'`` followed by an + interpreter name, and then a newline (``b'\n'``) character. The interpreter + name can be anything acceptable to the OS "shebang" processing, or the Python + launcher on Windows. The interpreter should be encoded in UTF-8 on Windows, + and in :func:`sys.getfilesystemencoding()` on POSIX. +2. Standard zipfile data, as generated by the :mod:`zipfile` module. The + zipfile content *must* include a file called ``__main__.py`` (which must be + in the "root" of the zipfile - i.e., it cannot be in a subdirectory). The + zipfile data can be compressed or uncompressed. + +If an application archive has a shebang line, it may have the executable bit set +on POSIX systems, to allow it to be executed directly. + +There is no requirement that the tools in this module are used to create +application archives - the module is a convenience, but archives in the above +format created by any means are acceptable to Python. diff -r cb911e1fb3dc Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst Wed Feb 25 18:14:47 2015 +0200 +++ b/Doc/whatsnew/3.5.rst Thu Feb 26 16:38:09 2015 +0000 @@ -71,7 +71,8 @@ New library modules: -* None yet. +* :mod:`zipapp`: :ref:`Improving Python ZIP Application Support + ` (:pep:`441`). New built-in features: @@ -137,10 +138,22 @@ New Modules =========== -.. module name -.. ----------- +.. _whatsnew-zipapp: -* None yet. +zipapp +------ + +The new :mod:`zipapp` module (specified in :pep:`441`) provides an API and +command line tool for creating executable Python Zip Applications, which +were introduced in Python 2.6 in :issue:1739468 but which were not well +publicised, either at the time or since. + +With the new module, bundling your application is as simple as putting all +the files, including a ``__main__.py`` file, into a directory ``myapp`` +and running:: + + $ python -m zipapp myapp + $ python myapp.pyz Improved Modules diff -r cb911e1fb3dc Lib/test/test_zipapp.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_zipapp.py Thu Feb 26 16:38:09 2015 +0000 @@ -0,0 +1,241 @@ +""" +Test harness for the zipapp module. +""" + +import io +import sys +import stat +import pathlib +import tempfile +from test.support import (run_unittest, rmtree) +import unittest +import zipfile +import zipapp + + +class ZipAppTest(unittest.TestCase): + """Test zipapp module functionality.""" + def setUp(self): + self.tmpdir = pathlib.Path(tempfile.mkdtemp()) + + def tearDown(self): + rmtree(str(self.tmpdir)) + + def test_create_archive(self): + """ + Test packing a directory + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target)) + self.assertTrue(target.is_file()) + + def test_create_archive_default_target(self): + """ + Test packing a directory to the default name + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + zipapp.create_archive(str(source)) + expected_target = self.tmpdir / 'source.pyz' + self.assertTrue(expected_target.is_file()) + + def test_no_main(self): + """ + Test that packing a directory with no __main__.py fails + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / 'foo.py').touch() + target = self.tmpdir / 'source.pyz' + with self.assertRaises(zipapp.ZipAppError): + zipapp.create_archive(str(source), str(target)) + + def test_main_and_main_py(self): + """ + Test that supplying a main argument with __main__.py fails + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + with self.assertRaises(zipapp.ZipAppError): + zipapp.create_archive(str(source), str(target), main='pkg.mod:fn') + + def test_main_written(self): + """ + Test that the __main__.py is written correctly + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / 'foo.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), main='pkg.mod:fn') + with zipfile.ZipFile(str(target), 'r') as z: + self.assertIn('__main__.py', z.namelist()) + self.assertIn(b'pkg.mod.fn()', z.read('__main__.py')) + + def test_default_no_shebang(self): + """ + Test that no shebang line is written to the target by default + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target)) + with target.open('rb') as f: + self.assertNotEqual(f.read(2), b'#!') + + def test_custom_interpreter(self): + """ + Test that a shebang line with a custom interpreter is written correctly + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + with target.open('rb') as f: + self.assertEqual(f.read(2), b'#!') + self.assertEqual(b'python\n', f.readline()) + + def test_pack_to_fileobj(self): + """ + Test that we can pack to a file object + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = io.BytesIO() + zipapp.create_archive(str(source), target, interpreter='python') + self.assertTrue(target.getvalue().startswith(b'#!python\n')) + + def test_read_shebang(self): + """ + Test that we can read the shebang line correctly + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + self.assertEqual(zipapp.get_interpreter(str(target)), 'python') + + def test_read_missing_shebang(self): + """ + Test that we reading the shebang line of a file without one returns None + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target)) + self.assertEqual(zipapp.get_interpreter(str(target)), None) + + def test_modify_shebang(self): + """ + Test that we can change the shebang of a file + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + new_target = self.tmpdir / 'changed.pyz' + zipapp.create_archive(str(target), str(new_target), interpreter='python2.7') + self.assertEqual(zipapp.get_interpreter(str(new_target)), 'python2.7') + + def test_write_shebang_to_fileobj(self): + """ + Test that we can change the shebang of a file, writing the result to a file object + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + new_target = io.BytesIO() + zipapp.create_archive(str(target), new_target, interpreter='python2.7') + self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + + def test_read_from_fileobj(self): + """ + Test that we can copy an archive using an open file object + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + temp_archive = io.BytesIO() + zipapp.create_archive(str(source), temp_archive, interpreter='python') + new_target = io.BytesIO() + temp_archive.seek(0) + zipapp.create_archive(temp_archive, new_target, interpreter='python2.7') + self.assertTrue(new_target.getvalue().startswith(b'#!python2.7\n')) + + def test_remove_shebang(self): + """ + Test that we can remove the shebang from a file + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + new_target = self.tmpdir / 'changed.pyz' + zipapp.create_archive(str(target), str(new_target), interpreter=None) + self.assertEqual(zipapp.get_interpreter(str(new_target)), None) + + def test_content_of_copied_archive(self): + """ + Test that copying an archive doesn't corrupt it + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = io.BytesIO() + zipapp.create_archive(str(source), target, interpreter='python') + new_target = io.BytesIO() + target.seek(0) + zipapp.create_archive(target, new_target, interpreter=None) + new_target.seek(0) + with zipfile.ZipFile(new_target, 'r') as z: + self.assertEqual(set(z.namelist()), {'__main__.py'}) + + # (Unix only) tests that archives with shebang lines are made executable + @unittest.skipIf(sys.platform == 'win32', + 'Windows does not support an executable bit') + def test_shebang_is_executable(self): + """ + Test that a shebang line with a custom interpreter is written correctly + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter='python') + self.assertNotEqual((target.stat().st_mode & stat.S_IEXEC), 0) + + @unittest.skipIf(sys.platform == 'win32', + 'Windows does not support an executable bit') + def test_no_shebang_is_not_executable(self): + """ + Test that a shebang line with a custom interpreter is written correctly + """ + source = self.tmpdir / 'source' + source.mkdir() + (source / '__main__.py').touch() + target = self.tmpdir / 'source.pyz' + zipapp.create_archive(str(source), str(target), interpreter=None) + self.assertEqual((target.stat().st_mode & stat.S_IEXEC), 0) + + +def test_main(): + run_unittest(ZipAppTest) + +if __name__ == "__main__": + test_main() diff -r cb911e1fb3dc Lib/zipapp.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/zipapp.py Thu Feb 26 16:38:09 2015 +0000 @@ -0,0 +1,183 @@ +from zipfile import ZipFile +import pathlib +import argparse +import os +import sys +import stat +import shutil + +# The __main__.py used if the users specifies "-m module:fn". +# Note that this will always be written as UTF-8 (module and +# function names can be non-ASCII in Python 3). +MAIN_TEMPLATE = '''\ +# -*- coding: utf-8 -*- +import {module} +{module}.{fn}() +''' + +# The Windows launcher defaults to UTF-8 when parsing shebang lines if the +# file has no BOM. So use UTF-8 on Windows. +# On Unix, use the filesystem encoding. +if sys.platform.startswith('win'): + shebang_encoding = 'utf-8' +else: + shebang_encoding = sys.getfilesystemencoding() + +class ZipAppError(ValueError): + pass + +def _write_file_prefix(f, interpreter): + """Write a shebang line""" + if interpreter: + shebang = b'#!' + interpreter.encode(shebang_encoding) + b'\n' + f.write(shebang) + +def _copy_archive(archive, new_archive, interpreter=None): + """Copy an application archive, modifying the shebang line""" + + src_filename_supplied = isinstance(archive, str) + dst_filename_supplied = isinstance(new_archive, str) + + if src_filename_supplied: + src = open(archive, 'rb') + else: + src = archive + + try: + if dst_filename_supplied: + dst = open(new_archive, 'wb') + else: + dst = new_archive + + # Skip the shebang line from the source + magic = src.read(2) + if magic == b'#!': + src.readline() + magic = b'' + + try: + _write_file_prefix(dst, interpreter) + dst.write(magic) + shutil.copyfileobj(src, dst) + finally: + if dst_filename_supplied: + dst.close() + finally: + if src_filename_supplied: + src.close() + + if dst_filename_supplied and interpreter: + os.chmod(new_archive, os.stat(new_archive).st_mode | stat.S_IEXEC) + + +def create_archive(source, target=None, interpreter=None, main=None): + """Create an application archive from SOURCE. + + The SOURCE can be the name of a directory, or a filename or a file-like + object referring to an existing archive. + + The content of SOURCE is packed into an application archive in TARGET, + which can be a filename or a file-like object. If SOURCE is a directory, + TARGET can be omitted and will default to the name of SOURCE with .pyz + appended. + + The created application archive will have a shebang line specifying + that it should run with INTERPRETER (there will be no shebang line if + INTERPRETER is None), and a __main__.py which runs MAIN (if MAIN is + not specified, an existing __main__.py will be used). It is an to specify + MAIN for anything other than a directory source with no __main__.py, and it + is an error to omit MAIN if the directory has no __main__.py. + """ + + # Are we copying an existing archive? + if not (isinstance(source, str) and os.path.isdir(source)): + _copy_archive(source, target, interpreter) + return + + # We are creating a new archive from a directory + has_main = os.path.exists(os.path.join(source, '__main__.py')) + if main and has_main: + raise ZipAppError("Cannot specify entry point if the source has __main__.py") + if not (main or has_main): + raise ZipAppError("Archive has no entry point") + + if target is None: + target = source + '.pyz' + + filename_supplied = isinstance(target, str) + if filename_supplied: + fd = open(target, 'wb') + else: + fd = target + + try: + _write_file_prefix(fd, interpreter) + with ZipFile(fd, 'w') as z: + root = pathlib.Path(source) + for child in root.rglob('*'): + if child.is_file(): + arcname = str(child.relative_to(root)) + z.write(str(child), arcname) + if main: + module, sep, fn = main.partition(':') + main = MAIN_TEMPLATE.format(module=module, fn=fn) + z.writestr('__main__.py', main.encode('utf-8')) + finally: + if filename_supplied: + fd.close() + + if filename_supplied and interpreter: + os.chmod(target, os.stat(target).st_mode | stat.S_IEXEC) + +def get_interpreter(archive): + filename_supplied = isinstance(archive, str) + + if filename_supplied: + f = open(archive, 'rb') + else: + f = archive + + try: + if f.read(2) == b'#!': + return f.readline().strip().decode(shebang_encoding) + finally: + if filename_supplied: + f.close() + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--output', '-o', default=None, + help="The name of the output archive. Required if SOURCE is an archive.") + parser.add_argument('--python', '-p', default=None, + help="The name of the Python interpreter to use (default: no shebang line).") + parser.add_argument('--main', '-m', default=None, + help="The main function of the application (default: use an existing __main__.py).") + parser.add_argument('--info', default=False, action='store_true', + help="Display the interpreter from the archive.") + parser.add_argument('source', + help="Source directory (or existing archive).") + + args = parser.parse_args() + + # Handle python -m zipapp archive.pyz --info + if args.info: + if not os.path.isfile(args.source): + print("Can only get info for an archive file", file=sys.stderr) + sys.exit(1) + interpreter = get_interpreter(args.source) + print("Interpreter: {}".format("" if interpreter is None else interpreter)) + sys.exit(0) + + if os.path.isfile(args.source): + if args.output is None or os.path.samefile(args.source, args.output): + print("In-place editing of archives is not supported", file=sys.stderr) + sys.exit(1) + if args.main: + print("Cannot change the main function when copying", file=sys.stderr) + sys.exit(1) + + create_archive(args.source, args.output, interpreter=args.python, main=args.main) + + +if __name__ == '__main__': + main()