diff -r 1bc0a8310b9f Lib/hashlib.py --- a/Lib/hashlib.py Thu Apr 17 20:11:19 2014 +0200 +++ b/Lib/hashlib.py Fri Apr 18 17:48:47 2014 -0700 @@ -60,7 +60,7 @@ algorithms = __always_supported -__all__ = __always_supported + ('new', 'algorithms') +__all__ = __always_supported + ('new', 'algorithms', 'pbkdf2_hmac') def __get_builtin_constructor(name): @@ -141,6 +141,91 @@ import logging logging.exception('code for hash %s was not found.', __func_name) + +try: + # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA + from _hashlib import pbkdf2_hmac +except ImportError: + import binascii + import struct + + _trans_5C = b"".join(chr(x ^ 0x5C) for x in range(256)) + _trans_36 = b"".join(chr(x ^ 0x36) for x in range(256)) + + def _bin_to_long(x): + """ + Convert a binary string into a long integer + + This is a clever optimization for fast xor vector math, equivilant to + Python 3's ``int.from_bytes``. + """ + return int(binascii.hexlify(x), 16) + + def _long_to_bin(x, hex_format_string): + """ + Convert a long integer into a binary string. + hex_format_string is like "%020x" for padding 10 characters. + + Similar to Python3's ``int.to_bytes``. + """ + return binascii.unhexlify((hex_format_string % x).encode('ascii')) + + def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None): + """Password based key derivation function 2 (PKCS #5 v2.0) + + This Python implementations based on the hmac module about as fast + as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster + for long passwords. + """ + if not isinstance(hash_name, str): + raise TypeError(hash_name) + + if not isinstance(password, (bytes, bytearray)): + password = bytes(buffer(password)) + if not isinstance(salt, (bytes, bytearray)): + salt = bytes(buffer(salt)) + + # Fast inline HMAC implementation + inner = new(hash_name) + outer = new(hash_name) + blocksize = getattr(inner, 'block_size', 64) + if len(password) > blocksize: + password = new(hash_name, password).digest() + password = password + b'\x00' * (blocksize - len(password)) + inner.update(password.translate(_trans_36)) + outer.update(password.translate(_trans_5C)) + + def prf(msg, inner=inner, outer=outer): + # PBKDF2_HMAC uses the password as key. We can re-use the same + # digest objects and and just update copies to skip initialization. + icpy = inner.copy() + ocpy = outer.copy() + icpy.update(msg) + ocpy.update(icpy.digest()) + return ocpy.digest() + + if iterations < 1: + raise ValueError(iterations) + if dklen is None: + dklen = outer.digest_size + if dklen < 1: + raise ValueError(dklen) + + hex_format_string = "%%0%ix" % (new(hash_name).digest_size * 2) + + dkey = b'' + loop = 1 + while len(dkey) < dklen: + prev = prf(salt + struct.pack(b'>I', loop)) + rkey = _bin_to_long(prev) + for i in range(iterations - 1): + prev = prf(prev) + rkey ^= _bin_to_long(prev) + loop += 1 + dkey += _long_to_bin(rkey, hex_format_string) + + return dkey[:dklen] + # Cleanup locals() del __always_supported, __func_name, __get_hash del __py_new, __hash_new, __get_openssl_constructor diff -r 1bc0a8310b9f Lib/test/test_hashlib.py --- a/Lib/test/test_hashlib.py Thu Apr 17 20:11:19 2014 +0200 +++ b/Lib/test/test_hashlib.py Fri Apr 18 17:48:47 2014 -0700 @@ -16,6 +16,8 @@ threading = None import unittest import warnings +from binascii import unhexlify + from test import test_support from test.test_support import _4G, precisionbigmemtest @@ -382,8 +384,75 @@ self.assertEqual(expected_hash, hasher.hexdigest()) + +class KDFTests(unittest.TestCase): + pbkdf2_test_vectors = [ + (b'password', b'salt', 1, None), + (b'password', b'salt', 2, None), + (b'password', b'salt', 4096, None), + # too slow, it takes over a minute on a fast CPU. + #(b'password', b'salt', 16777216, None), + (b'passwordPASSWORDpassword', b'saltSALTsaltSALTsaltSALTsaltSALTsalt', + 4096, -1), + (b'pass\0word', b'sa\0lt', 4096, 16), + ] + + pbkdf2_results = { + "sha1": [ + # offical test vectors from RFC 6070 + (unhexlify('0c60c80f961f0e71f3a9b524af6012062fe037a6'), None), + (unhexlify('ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957'), None), + (unhexlify('4b007901b765489abead49d926f721d065a429c1'), None), + #(unhexlify('eefe3d61cd4da4e4e9945b3d6ba2158c2634e984'), None), + (unhexlify('3d2eec4fe41c849b80c8d83662c0e44a8b291a964c' + 'f2f07038'), 25), + (unhexlify('56fa6aa75548099dcc37d7f03425e0c3'), None),], + "sha256": [ + (unhexlify('120fb6cffcf8b32c43e7225256c4f837' + 'a86548c92ccc35480805987cb70be17b'), None), + (unhexlify('ae4d0c95af6b46d32d0adff928f06dd0' + '2a303f8ef3c251dfd6e2d85a95474c43'), None), + (unhexlify('c5e478d59288c841aa530db6845c4c8d' + '962893a001ce4e11a4963873aa98134a'), None), + #(unhexlify('cf81c66fe8cfc04d1f31ecb65dab4089' + # 'f7f179e89b3b0bcb17ad10e3ac6eba46'), None), + (unhexlify('348c89dbcbd32b2f32d814b8116e84cf2b17' + '347ebc1800181c4e2a1fb8dd53e1c635518c7dac47e9'), 40), + (unhexlify('89b69d0516f829893c696226650a8687'), None),], + "sha512": [ + (unhexlify('867f70cf1ade02cff3752599a3a53dc4af34c7a669815ae5' + 'd513554e1c8cf252c02d470a285a0501bad999bfe943c08f' + '050235d7d68b1da55e63f73b60a57fce'), None), + (unhexlify('e1d9c16aa681708a45f5c7c4e215ceb66e011a2e9f004071' + '3f18aefdb866d53cf76cab2868a39b9f7840edce4fef5a82' + 'be67335c77a6068e04112754f27ccf4e'), None), + (unhexlify('d197b1b33db0143e018b12f3d1d1479e6cdebdcc97c5c0f8' + '7f6902e072f457b5143f30602641b3d55cd335988cb36b84' + '376060ecd532e039b742a239434af2d5'), None), + (unhexlify('8c0511f4c6e597c6ac6315d8f0362e225f3c501495ba23b8' + '68c005174dc4ee71115b59f9e60cd9532fa33e0f75aefe30' + '225c583a186cd82bd4daea9724a3d3b8'), 64), + (unhexlify('9d9e9c4cd21fe4be24d5b8244c759665'), None),], + } + + def test_pbkdf2_hmac(self): + for digest_name, results in self.pbkdf2_results.items(): + for i, vector in enumerate(self.pbkdf2_test_vectors): + password, salt, rounds, dklen = vector + expected, overwrite_dklen = results[i] + if overwrite_dklen: + dklen = overwrite_dklen + out = hashlib.pbkdf2_hmac( + digest_name, password, salt, rounds, dklen) + if out != expected: + import pdb + pdb.set_trace() + self.assertEqual(out, expected, + (digest_name, password, salt, rounds, dklen)) + + def test_main(): - test_support.run_unittest(HashLibTestCase) + test_support.run_unittest(HashLibTestCase, KDFTests) if __name__ == "__main__": test_main() diff -r 1bc0a8310b9f Modules/_hashopenssl.c --- a/Modules/_hashopenssl.c Thu Apr 17 20:11:19 2014 +0200 +++ b/Modules/_hashopenssl.c Fri Apr 18 17:48:47 2014 -0700 @@ -37,6 +37,8 @@ /* EVP is the preferred interface to hashing in OpenSSL */ #include +#include +#include #define MUNCH_SIZE INT_MAX @@ -491,6 +493,225 @@ return ret_obj; } + +#if (OPENSSL_VERSION_NUMBER >= 0x10000000 && !defined(OPENSSL_NO_HMAC) \ + && !defined(OPENSSL_NO_SHA)) + +#define PY_PBKDF2_HMAC 1 + +/* Improved implementation of PKCS5_PBKDF2_HMAC() + * + * PKCS5_PBKDF2_HMAC_fast() hashes the password exactly one time instead of + * `iter` times. Today (2013) the iteration count is typically 100,000 or + * more. The improved algorithm is not subject to a Denial-of-Service + * vulnerability with overly large passwords. + * + * Also OpenSSL < 1.0 don't provide PKCS5_PBKDF2_HMAC(), only + * PKCS5_PBKDF2_SHA1. + */ +static int +PKCS5_PBKDF2_HMAC_fast(const char *pass, int passlen, + const unsigned char *salt, int saltlen, + int iter, const EVP_MD *digest, + int keylen, unsigned char *out) +{ + unsigned char digtmp[EVP_MAX_MD_SIZE], *p, itmp[4]; + int cplen, j, k, tkeylen, mdlen; + unsigned long i = 1; + HMAC_CTX hctx_tpl, hctx; + + mdlen = EVP_MD_size(digest); + if (mdlen < 0) + return 0; + + HMAC_CTX_init(&hctx_tpl); + HMAC_CTX_init(&hctx); + p = out; + tkeylen = keylen; + if (!HMAC_Init_ex(&hctx_tpl, pass, passlen, digest, NULL)) { + HMAC_CTX_cleanup(&hctx_tpl); + return 0; + } + while(tkeylen) { + if(tkeylen > mdlen) + cplen = mdlen; + else + cplen = tkeylen; + /* We are unlikely to ever use more than 256 blocks (5120 bits!) + * but just in case... + */ + itmp[0] = (unsigned char)((i >> 24) & 0xff); + itmp[1] = (unsigned char)((i >> 16) & 0xff); + itmp[2] = (unsigned char)((i >> 8) & 0xff); + itmp[3] = (unsigned char)(i & 0xff); + if (!HMAC_CTX_copy(&hctx, &hctx_tpl)) { + HMAC_CTX_cleanup(&hctx_tpl); + return 0; + } + if (!HMAC_Update(&hctx, salt, saltlen) + || !HMAC_Update(&hctx, itmp, 4) + || !HMAC_Final(&hctx, digtmp, NULL)) { + HMAC_CTX_cleanup(&hctx_tpl); + HMAC_CTX_cleanup(&hctx); + return 0; + } + HMAC_CTX_cleanup(&hctx); + memcpy(p, digtmp, cplen); + for (j = 1; j < iter; j++) { + if (!HMAC_CTX_copy(&hctx, &hctx_tpl)) { + HMAC_CTX_cleanup(&hctx_tpl); + return 0; + } + if (!HMAC_Update(&hctx, digtmp, mdlen) + || !HMAC_Final(&hctx, digtmp, NULL)) { + HMAC_CTX_cleanup(&hctx_tpl); + HMAC_CTX_cleanup(&hctx); + return 0; + } + HMAC_CTX_cleanup(&hctx); + for (k = 0; k < cplen; k++) { + p[k] ^= digtmp[k]; + } + } + tkeylen-= cplen; + i++; + p+= cplen; + } + HMAC_CTX_cleanup(&hctx_tpl); + return 1; +} + +/* LCOV_EXCL_START */ +static PyObject * +_setException(PyObject *exc) +{ + unsigned long errcode; + const char *lib, *func, *reason; + + errcode = ERR_peek_last_error(); + if (!errcode) { + PyErr_SetString(exc, "unknown reasons"); + return NULL; + } + ERR_clear_error(); + + lib = ERR_lib_error_string(errcode); + func = ERR_func_error_string(errcode); + reason = ERR_reason_error_string(errcode); + + if (lib && func) { + PyErr_Format(exc, "[%s: %s] %s", lib, func, reason); + } + else if (lib) { + PyErr_Format(exc, "[%s] %s", lib, reason); + } + else { + PyErr_SetString(exc, reason); + } + return NULL; +} +/* LCOV_EXCL_STOP */ + +PyDoc_STRVAR(pbkdf2_hmac__doc__, +"pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None) -> key\n\ +\n\ +Password based key derivation function 2 (PKCS #5 v2.0) with HMAC as\n\ +pseudorandom function."); + +static PyObject * +pbkdf2_hmac(PyObject *self, PyObject *args, PyObject *kwdict) +{ + static char *kwlist[] = {"hash_name", "password", "salt", "iterations", + "dklen", NULL}; + PyObject *key_obj = NULL, *dklen_obj = Py_None; + char *name, *key; + Py_buffer password, salt; + long iterations, dklen; + int retval; + const EVP_MD *digest; + + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "ss*s*l|O:pbkdf2_hmac", + kwlist, &name, &password, &salt, + &iterations, &dklen_obj)) { + return NULL; + } + + digest = EVP_get_digestbyname(name); + if (digest == NULL) { + PyErr_SetString(PyExc_ValueError, "unsupported hash type"); + goto end; + } + + if (password.len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "password is too long."); + goto end; + } + + if (salt.len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "salt is too long."); + goto end; + } + + if (iterations < 1) { + PyErr_SetString(PyExc_ValueError, + "iteration value must be greater than 0."); + goto end; + } + if (iterations > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "iteration value is too great."); + goto end; + } + + if (dklen_obj == Py_None) { + dklen = EVP_MD_size(digest); + } else { + dklen = PyLong_AsLong(dklen_obj); + if ((dklen == -1) && PyErr_Occurred()) { + goto end; + } + } + if (dklen < 1) { + PyErr_SetString(PyExc_ValueError, + "key length must be greater than 0."); + goto end; + } + if (dklen > INT_MAX) { + /* INT_MAX is always smaller than dkLen max (2^32 - 1) * hLen */ + PyErr_SetString(PyExc_OverflowError, + "key length is too great."); + goto end; + } + + key_obj = PyBytes_FromStringAndSize(NULL, dklen); + if (key_obj == NULL) { + goto end; + } + key = PyBytes_AS_STRING(key_obj); + + Py_BEGIN_ALLOW_THREADS + retval = PKCS5_PBKDF2_HMAC_fast((char*)password.buf, (int)password.len, + (unsigned char *)salt.buf, (int)salt.len, + iterations, digest, dklen, + (unsigned char *)key); + Py_END_ALLOW_THREADS + + if (!retval) { + Py_CLEAR(key_obj); + _setException(PyExc_ValueError); + goto end; + } + + end: + PyBuffer_Release(&password); + PyBuffer_Release(&salt); + return key_obj; +} + +#endif + /* * This macro generates constructor function definitions for specific * hash algorithms. These constructors are much faster than calling @@ -558,6 +779,10 @@ CONSTRUCTOR_METH_DEF(sha384), CONSTRUCTOR_METH_DEF(sha512), #endif +#ifdef PY_PBKDF2_HMAC + {"pbkdf2_hmac", (PyCFunction)pbkdf2_hmac, METH_VARARGS|METH_KEYWORDS, + pbkdf2_hmac__doc__}, +#endif {NULL, NULL} /* Sentinel */ };