diff -r 3d6b67361749 Doc/library/hashlib.rst --- a/Doc/library/hashlib.rst Fri Apr 01 06:55:55 2016 +0000 +++ b/Doc/library/hashlib.rst Sat Apr 02 11:36:35 2016 +0300 @@ -176,6 +176,43 @@ compute the digests of data sharing a common initial substring. +.. _hashlib-commandline: + +Command Line Interface +---------------------- + +.. versionadded:: 3.6 + +The :mod:`hashlib` module can also be invoked directly using the :option:`-m` +switch of the interpreter with a ``algorithm`` argument. Available algorithms +are :func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, +and :func:`sha512`. Additional algorithms may also be available depending upon +the OpenSSL library that Python uses on your platform. + +When a ``file`` argument is given the hash is calculated on the file:: + + $ python -m hashlib sha256 /bin/sh + 2934648ffdb7b77f507a6dbd3b2b3fbffdbf56c39e29e21849edd2cd5f390b76 /bin/sh + +Multiple files can also be specified:: + + $ python -m hashlib sha256 /bin/sh /bin/ls + 2934648ffdb7b77f507a6dbd3b2b3fbffdbf56c39e29e21849edd2cd5f390b76 /bin/sh + 1959304caf1c2b4abe1546056ad71223e75027b02411dd95a5e6969a84419c27 /bin/ls + +With no ``file``, or when ``file`` is ``"-"``, read standard input:: + + $ cat /bin/sh | python -m hashlib sha256 + 2934648ffdb7b77f507a6dbd3b2b3fbffdbf56c39e29e21849edd2cd5f390b76 - + +If you wish to check a set of files against an existing list of file +hashes, you may use the :option:`!-c` option:: + + $ python -m hashlib sha256 /bin/sh /bin/ls > /tmp/check + $ python -m hashlib -c sha256 /tmp/check + /bin/sh: OK + /bin/ls: OK + Key derivation -------------- diff -r 3d6b67361749 Lib/hashlib.py --- a/Lib/hashlib.py Fri Apr 01 06:55:55 2016 +0000 +++ b/Lib/hashlib.py Sat Apr 02 11:36:35 2016 +0300 @@ -1,7 +1,8 @@ +#! /usr/bin/env python3 #. Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org) # Licensed to PSF under a Contributor Agreement. # - +import sys __doc__ = """hashlib module - A common interface to many hash functions. new(name, data=b'') - returns a new hash object implementing the @@ -215,3 +216,72 @@ # Cleanup locals() del __always_supported, __func_name, __get_hash del __py_new, __hash_new, __get_openssl_constructor + + +def _calculate_file_hash(algorithm, file_obj): + hash_obj = new(algorithm) + block_size = 262144 # 256 KB + while True: + data = file_obj.read(block_size) + hash_obj.update(data) + if len(data) < block_size: + break + return hash_obj.hexdigest() + + +def _convert_to_stdin(file_name): + if file_name == '-': + return sys.stdin.fileno(), False + else: + return file_name, True + + +def _sanitize_file_name(file_name): + return repr(file_name)[1:-1] if isinstance(file_name, str) else '-' + + +def _check_file(file_name, algorithm, expected_hash): + try: + file_name, closefd = _convert_to_stdin(file_name) + with open(file_name, 'rb', closefd=closefd) as checked_file: + if expected_hash == _calculate_file_hash(algorithm, checked_file): + file_name = _sanitize_file_name(file_name) + print(f'{file_name}: OK') + else: + print(f'{file_name}: FAILED') + except OSError as e: + print("'{}': {}".format(e.filename, e.strerror), file=sys.stderr) + + +# Usable as a script... +def main(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('algorithm', action='store', + choices=algorithms_available, + help='Specify algorithm to use for hash calculation.') + parser.add_argument('file', metavar='FILE', + default='-', nargs='*', + help="FILE's list. With no FILE, or when FILE is -, read standard input.") + parser.add_argument('-c', '--check', action="store_true", + help="Read hash sums from the FILE's list and check them") + args = parser.parse_args() + for file_name in args.file: + file_name, closefd = _convert_to_stdin(file_name) + try: + if args.check: + with open(file_name, 'r', closefd=closefd) as f: + for line in f: + expected_hash, _, checked_file_name = line.rstrip('\n').partition(' ') + _check_file(checked_file_name, args.algorithm, expected_hash) + else: + with open(file_name, 'rb', closefd=closefd) as f: + hash_result = _calculate_file_hash(args.algorithm, f) + file_name = _sanitize_file_name(file_name) + print(f'{hash_result} {file_name}') + except OSError as e: + print("'{}': {}".format(e.filename, e.strerror), file=sys.stderr) + + +if __name__ == '__main__': + main() diff -r 3d6b67361749 Lib/test/test_hashlib.py --- a/Lib/test/test_hashlib.py Fri Apr 01 06:55:55 2016 +0000 +++ b/Lib/test/test_hashlib.py Sat Apr 02 11:36:35 2016 +0300 @@ -11,6 +11,8 @@ import itertools import os import sys +import tempfile +from collections import OrderedDict try: import threading except ImportError: @@ -18,7 +20,7 @@ import unittest import warnings from test import support -from test.support import _4G, bigmemtest, import_fresh_module +from test.support import _4G, bigmemtest, import_fresh_module, script_helper # Were we compiled --with-pydebug or with #define Py_DEBUG? COMPILED_WITH_PYDEBUG = hasattr(sys, 'gettotalrefcount') @@ -526,5 +528,73 @@ self._test_pbkdf2_hmac(c_hashlib.pbkdf2_hmac) +class TestMain(unittest.TestCase): + supported_algorithms = hashlib.algorithms_available + file_data = [("test_a", b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"), + ("test_b", b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')] + + def setUp(self): + self.temp_dir_obj = tempfile.TemporaryDirectory() + self.temp_dir = self.temp_dir_obj.name + self.file_dict = OrderedDict() + for file_name, file_data in self.file_data: + full_path = os.path.join(self.temp_dir, file_name) + self.file_dict[full_path] = file_data + with open(full_path, 'wb') as f: + f.write(file_data) + + def tearDown(self): + self.temp_dir_obj.cleanup() + + def get_output(self, *args): + return script_helper.assert_python_ok('-m', 'hashlib', *args) + + def expected_output(self, algorithm, file_name_data_list): + output = [] + for file_name, data in file_name_data_list: + output.append("{} {}".format( + hashlib.new(algorithm, data).hexdigest(), file_name)) + return "\n".join(output).encode("ascii") + + def test_calculate_file_hash(self): + for file_name in self.file_dict: + for algorithm in self.supported_algorithms: + self.assertEqual( + self.get_output(algorithm, file_name).out.rstrip(), + self.expected_output(algorithm, + [(file_name, self.file_dict[file_name])])) + + def test_calculate_hash_of_multiple_files(self): + for algorithm in self.supported_algorithms: + self.assertEqual(self.get_output(algorithm, *self.file_dict.keys()).out.rstrip(), + self.expected_output(algorithm, self.file_dict.items())) + + def test_calculate_hash_on_non_existing_file(self): + self.assertEqual(self.get_output("md5", "none_existing_path").err.rstrip(), + b"'none_existing_path': No such file or directory" ) + + def test_calculate_hash_on_directory(self): + self.assertEqual(self.get_output("md5", self.temp_dir).err.rstrip(), + "'{}': Is a directory".format(self.temp_dir).encode("ascii")) + + def test_calculate_hash_from_stdin(self): + data = list(self.file_dict.values())[0] + for algorithm in self.supported_algorithms: + with script_helper.spawn_python('-m', 'hashlib', algorithm) as proc: + out, err = proc.communicate(data) + self.assertEqual(out.rstrip(), self.expected_output(algorithm, [("-", data)])) + self.assertIsNone(err) + + def test_check_hash(self): + # creating check file with one correct hash and one bad hash. + check_file = os.path.join(self.temp_dir, "check") + for algorithm in self.supported_algorithms: + check_data = zip(self.file_dict.keys(), [list(self.file_dict.values())[0]] * 2) + with open(check_file, "wb") as cf: + cf.write(self.expected_output(algorithm, check_data)) + self.assertEqual(self.get_output("-c", algorithm, check_file).out.rstrip(), + "{}: OK\n{}: FAILED".format(*self.file_dict.keys()).encode("ascii")) + + if __name__ == "__main__": unittest.main()