diff --git a/Lib/packaging/command/install_distinfo.py b/Lib/packaging/command/install_distinfo.py --- a/Lib/packaging/command/install_distinfo.py +++ b/Lib/packaging/command/install_distinfo.py @@ -8,6 +8,7 @@ import hashlib from shutil import rmtree from packaging import logger +from packaging.util import write_record_file from packaging.command.cmd import Command @@ -98,7 +99,8 @@ class install_distinfo(Command): if not self.no_resources: install_data = self.get_finalized_command('install_data') - if install_data.get_resources_out() != []: + resources = install_data.get_resources_out() + if resources: resources_path = os.path.join(self.install_dir, 'RESOURCES') logger.info('creating %s', resources_path) @@ -107,8 +109,7 @@ class install_distinfo(Command): writer = csv.writer(f, delimiter=',', lineterminator='\n', quotechar='"') - for row in install_data.get_resources_out(): - writer.writerow(row) + writer.writerows(resources) self.outfiles.append(resources_path) @@ -116,27 +117,15 @@ class install_distinfo(Command): record_path = os.path.join(self.install_dir, 'RECORD') logger.info('creating %s', record_path) if not self.dry_run: - with open(record_path, 'w', encoding='utf-8') as f: - writer = csv.writer(f, delimiter=',', - lineterminator='\n', - quotechar='"') + install = self.get_finalized_command('install_dist') + installed_files = install.get_outputs() + write_record_file(record_path, installed_files) - install = self.get_finalized_command('install_dist') - - for fpath in install.get_outputs(): - if fpath.endswith('.pyc') or fpath.endswith('.pyo'): - # do not put size and md5 hash, as in PEP-376 - writer.writerow((fpath, '', '')) - else: - size = os.path.getsize(fpath) - with open(fpath, 'rb') as fp: - hash = hashlib.md5() - hash.update(fp.read()) - md5sum = hash.hexdigest() - writer.writerow((fpath, md5sum, size)) - - # add the RECORD file itself - writer.writerow((record_path, '', '')) + # the write_record_file function used above already adds the RECORD + # file path to the RECORD file itself, so it's okay that we add it + # to the list of install_distinfo's outputs now, *after* + # install_dist.get_outputs (which delegates to the subcommands) has + # been called self.outfiles.append(record_path) def get_outputs(self): diff --git a/Lib/packaging/tests/test_command_install_distinfo.py b/Lib/packaging/tests/test_command_install_distinfo.py --- a/Lib/packaging/tests/test_command_install_distinfo.py +++ b/Lib/packaging/tests/test_command_install_distinfo.py @@ -157,6 +157,7 @@ class InstallDistinfoTestCase(support.Te # checksum and size are not hard-coded for METADATA as it is # platform-dependent (line endings) + # FIXME line endings are not specified in any metadata PEP! metadata = os.path.join(modules_dest, 'Spamlib-0.1.dist-info', 'METADATA') with open(metadata, 'rb') as fp: @@ -187,6 +188,7 @@ class InstallDistinfoTestCase(support.Te self.assertEqual(found, expected) def test_record(self): + # XXX this test is too confusing, remove pkg_dir, dist = self.create_dist(name='foo', version='1.0') install_dir = self.mkdtemp() diff --git a/Lib/packaging/tests/test_util.py b/Lib/packaging/tests/test_util.py --- a/Lib/packaging/tests/test_util.py +++ b/Lib/packaging/tests/test_util.py @@ -2,6 +2,7 @@ import os import sys import time +import hashlib import logging import tempfile import textwrap @@ -19,7 +20,8 @@ from packaging.util import ( get_compiler_versions, _MAC_OS_X_LD_VERSION, byte_compile, find_packages, spawn, get_pypirc_path, generate_pypirc, read_pypirc, resolve_name, iglob, RICH_GLOB, egginfo_to_distinfo, is_setuptools, is_distutils, is_packaging, - get_install_method, cfg_to_args, generate_setup_py, encode_multipart) + get_install_method, cfg_to_args, generate_setup_py, encode_multipart, + write_record_file) from packaging.tests import support, unittest from packaging.tests.test_config import SETUP_CFG @@ -111,6 +113,7 @@ class UtilTestCase(support.EnvironRestor support.LoggingCatcher, unittest.TestCase): + maxDiff = None restore_environ = ['HOME', 'PLAT'] def setUp(self): @@ -567,6 +570,50 @@ class UtilTestCase(support.EnvironRestor self.assertEqual(b'multipart/form-data; boundary=-x', content_type) self.assertEqual(EXPECTED_MULTIPART_OUTPUT, body.split(b'\r\n')) + def test_write_record_file(self): + tempdir = self.mkdtemp() + record = self.mktempfile().name + os.chdir(tempdir) + os.mkdir('spam') + os.makedirs(os.path.join('ham', '__pycache__')) + files = [(('shrubbery.py',), '1'), + (('knights.csv',), '12'), + # one test with a multi-byte character, to make sure the size + # in bytes, not characters, is recorded + (('spam', '__init__.py'), '123'), + (('ham', '__init__.py'), '12\u00E9'), + (('ham', '__pycache__', '__init__.pyc'), '12345'), + (('ham', '__init__.pyo'), '123456'), + # make sure only pyc and pyo get no size and checksum + (('ham', '__init__.pye'), '1234567'), + ] + + # list of paths to be passed to the function + paths = [] + # list of (paths, size, checksum) to check against the CSV file + wanted = [] + + for file, contents in files: + self.write_file(file, contents, encoding='utf-8') + path = os.path.join(tempdir, *file) + paths.append(path) + if not path.endswith(('.pyc', '.pyo')): + bcontents = contents.encode('utf-8') + checksum = hashlib.md5(bcontents).hexdigest() + size = str(len(bcontents)) + else: + checksum, size = '', '' + wanted.append([path, checksum, size]) + wanted.append([record, '', '']) + + # now call the function and checks its results + write_record_file(record, paths) + + with open(record) as file: + contents = file.read() + found = [line.split(',') for line in contents.splitlines()] + self.assertEqual(found, wanted) + class GlobTestCaseBase(support.TempdirManager, support.LoggingCatcher, diff --git a/Lib/packaging/util.py b/Lib/packaging/util.py --- a/Lib/packaging/util.py +++ b/Lib/packaging/util.py @@ -36,7 +36,7 @@ from packaging.errors import (PackagingP 'Mixin2to3', 'run_2to3', # packaging compatibility helpers 'cfg_to_args', 'generate_setup_py', - 'egginfo_to_distinfo', + 'egginfo_to_distinfo', 'write_record_file', 'get_install_method', # misc 'ask', 'check_environ', 'encode_multipart', 'resolve_name', @@ -253,7 +253,6 @@ def split_quoted(s): def split_multiline(value): """Split a multiline string into a list, excluding blank lines.""" - return [element for element in (line.strip() for line in value.split('\n')) if element] @@ -1130,25 +1129,36 @@ def _parse_record_file(record_file): return distinfo -def _write_record_file(record_path, installed_files): +def write_record_file(record_path, installed_files): + """Write a PEP 376-compliant RECORD file. + + This creates a CSV file with rows containing for each file its path, its + MD5 checksum and its size. *record_path* is the full path to the CSV + file; *installed_files* is an iterator of file paths. Files must exist. + A row for record_path itself will be added automatically. + + Files ending in .pyc or .pyo as well as the RECORD file itself get empty + cells for checksum and size (see the PEP). + """ + rows = [] + for fpath in installed_files: + if fpath.endswith(('.pyc', '.pyo')): + rows.append((fpath, '', '')) + else: + hash = hashlib.md5() + with open(fpath, 'rb') as fp: + hash.update(fp.read()) + md5sum = hash.hexdigest() + size = os.path.getsize(fpath) + rows.append((fpath, md5sum, size)) + + rows.append((record_path, '', '')) + with open(record_path, 'w', encoding='utf-8') as f: writer = csv.writer(f, delimiter=',', lineterminator=os.linesep, quotechar='"') + writer.writerows(rows) - for fpath in installed_files: - if fpath.endswith('.pyc') or fpath.endswith('.pyo'): - # do not put size and md5 hash, as in PEP-376 - writer.writerow((fpath, '', '')) - else: - hash = hashlib.md5() - with open(fpath, 'rb') as fp: - hash.update(fp.read()) - md5sum = hash.hexdigest() - size = os.path.getsize(fpath) - writer.writerow((fpath, md5sum, size)) - - # add the RECORD file itself - writer.writerow((record_path, '', '')) return record_path @@ -1194,7 +1204,7 @@ def egginfo_to_distinfo(record_file, ins record_path = distinfo['record_path'] logger.info('creating %s', record_path) - _write_record_file(record_path, distinfo['installed']) + write_record_file(record_path, distinfo['installed']) if remove_egginfo: egginfo = distinfo['egginfo']