diff -r 8962d1c442a6 Doc/library/sys.rst --- a/Doc/library/sys.rst Mon Oct 28 08:08:09 2013 +0100 +++ b/Doc/library/sys.rst Mon Oct 28 23:09:06 2013 +0200 @@ -594,9 +594,20 @@ | :const:`imag` | multiplier used for the imaginary part of a | | | complex number | +---------------------+--------------------------------------------------+ + | :const:`algorithm` | name of the algorithm for hashing of str, bytes, | + | | and memoryview | + +---------------------+--------------------------------------------------+ + | :const:`hash_bits` | internal output size of the hash algorithm | + +---------------------+--------------------------------------------------+ + | :const:`seed_bits` | size of the seed key of the hash algorithm | + +---------------------+--------------------------------------------------+ + .. versionadded:: 3.2 + .. versionchanged: 3.4 + Added *algorithm*, *hash_bits* and *seed_bits* + .. data:: hexversion diff -r 8962d1c442a6 Include/Python.h --- a/Include/Python.h Mon Oct 28 08:08:09 2013 +0100 +++ b/Include/Python.h Mon Oct 28 23:09:06 2013 +0200 @@ -68,6 +68,7 @@ #include "object.h" #include "objimpl.h" #include "typeslots.h" +#include "pyhash.h" #include "pydebug.h" diff -r 8962d1c442a6 Include/object.h --- a/Include/object.h Mon Oct 28 08:08:09 2013 +0100 +++ b/Include/object.h Mon Oct 28 23:09:06 2013 +0200 @@ -565,15 +565,8 @@ #ifndef Py_LIMITED_API PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); -PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); #endif -typedef struct { - Py_hash_t prefix; - Py_hash_t suffix; -} _Py_HashSecret_t; -PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; - #ifdef Py_DEBUG PyAPI_DATA(int) _Py_HashSecret_Initialized; #endif diff -r 8962d1c442a6 Include/pyhash.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Include/pyhash.h Mon Oct 28 23:09:06 2013 +0200 @@ -0,0 +1,53 @@ +#ifndef Py_HASH_H +#define Py_HASH_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Helper for hash functions */ +#ifndef Py_LIMITED_API +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); +#endif + +/* hash secret */ +typedef struct { +#ifdef PY_UINT64_T + PY_UINT64_T prefix; + PY_UINT64_T suffix; +#else + Py_hash_t prefix; + Py_hash_t suffix; +#endif +} _Py_HashSecret_t; +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + +/* hash algorithm selection + * SipHash24 requires a platform with a working PY_UINT64_T type + */ +#define PY_HASH_SIPHASH24 1 +#define PY_HASH_FNV 2 + +#ifndef PY_HASH_ALGORITHM +#if defined(PY_UINT64_T) && defined(PY_UINT32_T) +#define PY_HASH_ALGORITHM PY_HASH_SIPHASH24 +#else +#define PY_HASH_ALGORITHM PY_HASH_FNV +#endif /* uint64_t && uint32_t && aligned */ +#endif /* PY_HASH_ALGORITHM */ + +#if PY_HASH_ALGORITHM == PY_HASH_FNV +#define _PyHASH_NAME "fnv" +#define _PyHASH_HASH_BITS (8 * SIZEOF_PY_HASH_T) +#define _PyHASH_SEED_BITS (16 * SIZEOF_PY_HASH_T) +#endif + +#if PY_HASH_ALGORITHM == PY_HASH_SIPHASH24 +#define _PyHASH_NAME "siphash24" +#define _PyHASH_HASH_BITS 64 +#define _PyHASH_SEED_BITS 128 +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_HASH_H */ diff -r 8962d1c442a6 Include/pyport.h --- a/Include/pyport.h Mon Oct 28 08:08:09 2013 +0100 +++ b/Include/pyport.h Mon Oct 28 23:09:06 2013 +0200 @@ -199,8 +199,10 @@ #endif /* Py_hash_t is the same size as a pointer. */ +#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T typedef Py_ssize_t Py_hash_t; /* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */ +#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T typedef size_t Py_uhash_t; /* Largest possible value of size_t. diff -r 8962d1c442a6 Lib/test/test_hash.py --- a/Lib/test/test_hash.py Mon Oct 28 08:08:09 2013 +0100 +++ b/Lib/test/test_hash.py Mon Oct 28 23:09:06 2013 +0200 @@ -12,6 +12,34 @@ IS_64BIT = sys.maxsize > 2**32 +def lcg(x, length=16): + """Linear congruential generator""" + if x == 0: + return bytes(length) + out = bytearray(length) + for i in range(length): + x = (214013 * x + 2531011) & 0x7fffffff + out[i] = (x >> 16) & 0xff + return bytes(out) + +def pysiphash(uint64): + """Convert SipHash24 output to Py_hash_t + """ + assert 0 <= uint64 < (1 << 64) + # simple unsigned to signed int64 + if uint64 > (1 << 63) - 1: + int64 = uint64 - (1 << 64) + else: + int64 = uint64 + # mangle uint64 to uint32 + uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff + # simple unsigned to signed int32 + if uint32 > (1 << 31) - 1: + int32 = uint32 - (1 << 32) + else: + int32 = uint32 + return int32, int64 + class HashEqualityTestCase(unittest.TestCase): @@ -161,12 +189,54 @@ self.assertNotEqual(run1, run2) class StringlikeHashRandomizationTests(HashRandomizationTests): + repr_ = None + repr_long = None + + # 32bit little, 64bit little, 32bit big, 64bit big + known_hashes = { + 'siphash24': [ + # seed 0, 'abc' + [2025351752, 4596069200710135518, 1433332804, + -3481057401533226760], + # seed 42, 'abc' + [-774632014, -4501618152524544106, 1054608210, + -1493500025205289231], + # seed 42, 'abcdefghijk' + [-1436007334, 4436719588892876975, -927983272, + -7467447726805986685], + # seed 0, 'äú∑' + [601955741, 2086523885441110006, 1256089729, 1210895472895696773], + #[-1382470381, -7683548216435433336], + # seed 42, 'äú∑' + [540380300, 6694378403234232636, 270159101, -5563298924215566873], + #[1938649799, -7765849682496497124], + ], + 'fnv': [ + # seed 0, 'abc' + [-1600925533, 1453079729188098211, -1600925533, + 1453079729188098211], + # seed 42, 'abc' + [-206076799, -4410911502303878509, -1024014457, + -3570150969479994130], + # seed 42, 'abcdefghijk' + [811136751, -5046230049376118746, -77208053 , None], + # seed 0, 'äú∑' + [-1928119725, -1215075570188975207, None, None], + # seed 42, 'äú∑' + [-265485687, -7998005692351841869, None, None], + ] + } + + def get_expected_hash(self, position): + algorithm = sys.hash_info.algorithm + platform = 1 if IS_64BIT else 0 + if sys.byteorder == 'big': + platform += 2 + return self.known_hashes[algorithm][position][platform] + def test_null_hash(self): # PYTHONHASHSEED=0 disables the randomized hash - if IS_64BIT: - known_hash_of_obj = 1453079729188098211 - else: - known_hash_of_obj = -1600925533 + known_hash_of_obj = self.get_expected_hash(0) # Randomization is enabled by default: self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj) @@ -177,28 +247,35 @@ def test_fixed_hash(self): # test a fixed seed for the randomized hash # Note that all types share the same values: - if IS_64BIT: - if sys.byteorder == 'little': - h = -4410911502303878509 - else: - h = -3570150969479994130 - else: - if sys.byteorder == 'little': - h = -206076799 - else: - h = -1024014457 + h = self.get_expected_hash(1) self.assertEqual(self.get_hash(self.repr_, seed=42), h) + def test_long_fixed_hash(self): + if self.repr_long is None: + return + h = self.get_expected_hash(2) + self.assertEqual(self.get_hash(self.repr_long, seed=42), h) + + class StrHashRandomizationTests(StringlikeHashRandomizationTests, unittest.TestCase): repr_ = repr('abc') + repr_long = repr('abcdefghijk') + repr_ucs2 = repr('äú∑') def test_empty_string(self): self.assertEqual(hash(""), 0) + def test_ucs2_string(self): + h = self.get_expected_hash(3) + self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h) + h = self.get_expected_hash(4) + self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h) + class BytesHashRandomizationTests(StringlikeHashRandomizationTests, unittest.TestCase): repr_ = repr(b'abc') + repr_long = repr(b'abcdefghijk') def test_empty_string(self): self.assertEqual(hash(b""), 0) @@ -206,6 +283,7 @@ class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests, unittest.TestCase): repr_ = "memoryview(b'abc')" + repr_long = "memoryview(b'abcdefghijk')" def test_empty_string(self): self.assertEqual(hash(memoryview(b"")), 0) @@ -224,5 +302,22 @@ repr_ = repr(datetime.time(0)) +class HashDistributionTestCase(unittest.TestCase): + + def test_hash_distribution(self): + # check for hash collision + base = "abcdefghabcdefg" + for i in range(1, len(base)): + prefix = base[:i] + s15 = set() + s255 = set() + for c in range(256): + h = hash(prefix + chr(c)) + s15.add(h & 0xf) + s255.add(h & 0xff) + # SipHash24 distribution depends on key, usually > 60% + self.assertGreater(len(s15), 8, prefix) + self.assertGreater(len(s255), 128, prefix) + if __name__ == "__main__": unittest.main() diff -r 8962d1c442a6 Lib/test/test_sys.py --- a/Lib/test/test_sys.py Mon Oct 28 08:08:09 2013 +0100 +++ b/Lib/test/test_sys.py Mon Oct 28 23:09:06 2013 +0200 @@ -8,6 +8,7 @@ import codecs import gc import sysconfig +import platform # count the number of test runs, used to create unique # strings to intern in test_intern() @@ -430,7 +431,7 @@ self.assertEqual(type(sys.int_info.sizeof_digit), int) self.assertIsInstance(sys.hexversion, int) - self.assertEqual(len(sys.hash_info), 5) + self.assertEqual(len(sys.hash_info), 8) self.assertLess(sys.hash_info.modulus, 2**sys.hash_info.width) # sys.hash_info.modulus should be a prime; we do a quick # probable primality test (doesn't exclude the possibility of @@ -445,6 +446,21 @@ self.assertIsInstance(sys.hash_info.inf, int) self.assertIsInstance(sys.hash_info.nan, int) self.assertIsInstance(sys.hash_info.imag, int) + self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash24"}) + self.assertIn(sys.hash_info.hash_bits, {32, 64}) + self.assertIn(sys.hash_info.seed_bits, {32, 64, 128}) + + algo = sysconfig.get_config_var("PY_HASH_ALGORITHM") + if algo == 1: + self.assertEqual(sys.hash_info.algorithm, "siphash24") + elif algo == 2: + self.assertEqual(sys.hash_info.algorithm, "fnv") + else: + processor = platform.processor().lower() + if processor in {"sparc", "mips"}: + self.assertEqual(sys.hash_info.algorithm, "fnv") + else: + self.assertEqual(sys.hash_info.algorithm, "siphash24") self.assertIsInstance(sys.maxsize, int) self.assertIsInstance(sys.maxunicode, int) diff -r 8962d1c442a6 Makefile.pre.in --- a/Makefile.pre.in Mon Oct 28 08:08:09 2013 +0100 +++ b/Makefile.pre.in Mon Oct 28 23:09:06 2013 +0200 @@ -366,6 +366,7 @@ Python/pyarena.o \ Python/pyctype.o \ Python/pyfpe.o \ + Python/pyhash.o \ Python/pymath.o \ Python/pystate.o \ Python/pythonrun.o \ @@ -868,6 +869,7 @@ $(srcdir)/Include/pydebug.h \ $(srcdir)/Include/pyerrors.h \ $(srcdir)/Include/pyfpe.h \ + $(srcdir)/Include/pyhash.h \ $(srcdir)/Include/pymath.h \ $(srcdir)/Include/pygetopt.h \ $(srcdir)/Include/pymacro.h \ diff -r 8962d1c442a6 Objects/bytesobject.c --- a/Objects/bytesobject.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Objects/bytesobject.c Mon Oct 28 23:09:06 2013 +0200 @@ -878,7 +878,7 @@ { if (a->ob_shash == -1) { /* Can't fail */ - a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a)); + a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a)); } return a->ob_shash; } diff -r 8962d1c442a6 Objects/memoryobject.c --- a/Objects/memoryobject.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Objects/memoryobject.c Mon Oct 28 23:09:06 2013 +0200 @@ -2742,7 +2742,7 @@ } /* Can't fail */ - self->hash = _Py_HashBytes((unsigned char *)mem, view->len); + self->hash = _Py_HashBytes(mem, view->len); if (mem != view->buf) PyMem_Free(mem); diff -r 8962d1c442a6 Objects/object.c --- a/Objects/object.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Objects/object.c Mon Oct 28 23:09:06 2013 +0200 @@ -844,33 +844,6 @@ } Py_hash_t -_Py_HashBytes(unsigned char *p, Py_ssize_t len) -{ - Py_uhash_t x; - Py_ssize_t i; - - /* - We make the hash of the empty string be 0, rather than using - (prefix ^ suffix), since this slightly obfuscates the hash secret - */ -#ifdef Py_DEBUG - assert(_Py_HashSecret_Initialized); -#endif - if (len == 0) { - return 0; - } - x = (Py_uhash_t) _Py_HashSecret.prefix; - x ^= (Py_uhash_t) *p << 7; - for (i = 0; i < len; i++) - x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++; - x ^= (Py_uhash_t) len; - x ^= (Py_uhash_t) _Py_HashSecret.suffix; - if (x == -1) - x = -2; - return x; -} - -Py_hash_t PyObject_HashNotImplemented(PyObject *v) { PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", @@ -878,8 +851,6 @@ return -1; } -_Py_HashSecret_t _Py_HashSecret; - Py_hash_t PyObject_Hash(PyObject *v) { diff -r 8962d1c442a6 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Objects/unicodeobject.c Mon Oct 28 23:09:06 2013 +0200 @@ -11131,39 +11131,8 @@ _PyUnicode_HASH(self) = 0; return 0; } - - /* The hash function as a macro, gets expanded three times below. */ -#define HASH(P) \ - x ^= (Py_uhash_t) *P << 7; \ - while (--len >= 0) \ - x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++; \ - - x = (Py_uhash_t) _Py_HashSecret.prefix; - switch (PyUnicode_KIND(self)) { - case PyUnicode_1BYTE_KIND: { - const unsigned char *c = PyUnicode_1BYTE_DATA(self); - HASH(c); - break; - } - case PyUnicode_2BYTE_KIND: { - const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self); - HASH(s); - break; - } - default: { - Py_UCS4 *l; - assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND && - "Impossible switch case in unicode_hash"); - l = PyUnicode_4BYTE_DATA(self); - HASH(l); - break; - } - } - x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self); - x ^= (Py_uhash_t) _Py_HashSecret.suffix; - - if (x == -1) - x = -2; + x = _Py_HashBytes(PyUnicode_DATA(self), + PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); _PyUnicode_HASH(self) = x; return x; } diff -r 8962d1c442a6 PCbuild/pythoncore.vcxproj --- a/PCbuild/pythoncore.vcxproj Mon Oct 28 08:08:09 2013 +0100 +++ b/PCbuild/pythoncore.vcxproj Mon Oct 28 23:09:06 2013 +0200 @@ -412,6 +412,7 @@ + @@ -616,6 +617,7 @@ + diff -r 8962d1c442a6 PCbuild/pythoncore.vcxproj.filters --- a/PCbuild/pythoncore.vcxproj.filters Mon Oct 28 08:08:09 2013 +0100 +++ b/PCbuild/pythoncore.vcxproj.filters Mon Oct 28 23:09:06 2013 +0200 @@ -421,6 +421,9 @@ Python + + Include + @@ -931,6 +934,9 @@ Modules + + Python + diff -r 8962d1c442a6 Python/pyhash.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Python/pyhash.c Mon Oct 28 23:09:06 2013 +0200 @@ -0,0 +1,220 @@ +/* Set of hash utility functions to help maintaining the invariant that + if a==b then hash(a)==hash(b) + + All the utility functions (_Py_Hash*()) return "-1" to signify an error. +*/ +#include "Python.h" + +_Py_HashSecret_t _Py_HashSecret; + +/* Count _Py_HashBytes() calls */ +#ifdef PY_HASH_STATS +#define PY_HASH_STATS_MAX 32 +static Py_ssize_t hashstats[PY_HASH_STATS_MAX + 1] = {0}; +#endif + +void +_PyHash_Fini(void) +{ +#ifdef PY_HASH_STATS + int i; + Py_ssize_t total = 0; + char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n"; + + fprintf(stderr, "len calls total\n"); + for (i = 1; i <= PY_HASH_STATS_MAX; i++) { + total += hashstats[i]; + fprintf(stderr, fmt, i, hashstats[i], total); + } + total += hashstats[0]; + fprintf(stderr, "> %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n", + hashstats[0], total); +#endif +} + +Py_hash_t +_Py_HashBytes(const void *src, Py_ssize_t len) +{ + Py_hash_t x; + /* + We make the hash of the empty string be 0, rather than using + (prefix ^ suffix), since this slightly obfuscates the hash secret + */ + if (len == 0) { + return 0; + } +#ifdef PY_HASH_STATS + hashstats[(len <= PY_HASH_STATS_MAX) ? len : 0]++; +#endif + +#if PY_HASH_ALGORITHM == PY_HASH_FNV +/* ************************************************************************** + * Modified Fowler-Noll-Vo (FNV) hash algorithm + */ +{ + const unsigned char *p = src; + Py_ssize_t remainder, blocks; + union { + Py_uhash_t value; + unsigned char bytes[SIZEOF_PY_UHASH_T]; + } block; + +#ifdef Py_DEBUG + assert(_Py_HashSecret_Initialized); +#endif + remainder = len % SIZEOF_PY_UHASH_T; + if (remainder == 0) { + /* Process at least one block byte by byte to reduce hash collisions + * for strings with common prefixes. */ + remainder = SIZEOF_PY_UHASH_T; + } + blocks = (len - remainder) / SIZEOF_PY_UHASH_T; + + x = (Py_uhash_t) _Py_HashSecret.prefix; + x ^= (Py_uhash_t) *p << 7; + while (blocks--) { +#ifdef _MSC_VER + block.value = *(const Py_uhash_t*)p; +#else + memcpy(block.bytes, p, SIZEOF_PY_UHASH_T); +#endif + x = (_PyHASH_MULTIPLIER * x) ^ block.value; + p += SIZEOF_PY_UHASH_T; + } + /* add remainder */ + for (; remainder > 0; remainder--) + x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++; + x ^= (Py_uhash_t) len; + x ^= (Py_uhash_t) _Py_HashSecret.suffix; +} + +#endif /* PY_HASH_ALGORITHM == PY_HASH_FNV */ + + +#if PY_HASH_ALGORITHM == PY_HASH_SIPHASH24 +/* ************************************************************************** + + Copyright (c) 2013 Marek Majkowski + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + + Original location: + https://github.com/majek/csiphash/ + + Solution inspired by code from: + Samuel Neves (supercop/crypto_auth/siphash24/little) + djb (supercop/crypto_auth/siphash24/little2) + Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c) + + Modified for Python by Christian Heimes: + - C89 / MSVC compatibility + - PY_UINT64_T and PY_UINT32_T + - _rotl64() on Windows +*/ + +{ +#ifdef _MSC_VER +#define ROTATE(x, b) _rotl64(x, b) +#else +#define ROTATE(x, b) (PY_UINT64_T)( ((x) << (b)) | ( (x) >> (64 - (b))) ) +#endif + +#define HALF_ROUND(a,b,c,d,s,t) \ + a += b; c += d; \ + b = ROTATE(b, s) ^ a; \ + d = ROTATE(d, t) ^ c; \ + a = ROTATE(a, 32); + +#define DOUBLE_ROUND(v0,v1,v2,v3) \ + HALF_ROUND(v0,v1,v2,v3,13,16); \ + HALF_ROUND(v2,v1,v0,v3,17,21); \ + HALF_ROUND(v0,v1,v2,v3,13,16); \ + HALF_ROUND(v2,v1,v0,v3,17,21); + + union { + PY_UINT64_T value; + PY_UINT32_T hvalue; + unsigned char bytes[8]; + } block; + + PY_UINT64_T k0 = _Py_HashSecret.prefix; + PY_UINT64_T k1 = _Py_HashSecret.suffix; + PY_UINT64_T b = (PY_UINT64_T)len << 56; + const unsigned char *in = (const unsigned char*)src; + + PY_UINT64_T v0 = k0 ^ 0x736f6d6570736575ULL; + PY_UINT64_T v1 = k1 ^ 0x646f72616e646f6dULL; + PY_UINT64_T v2 = k0 ^ 0x6c7967656e657261ULL; + PY_UINT64_T v3 = k1 ^ 0x7465646279746573ULL; + + PY_UINT64_T t; + + while (len >= 8) { +#ifdef _MSC_VER + block.value = *(const PY_UINT64_T*)in; +#else + memcpy(block.bytes, in, 8); +#endif + in += 8; + len -= 8; + v3 ^= block.value; + DOUBLE_ROUND(v0,v1,v2,v3); + v0 ^= block.value; + } + + block.value = 0; + switch (len) { + case 7: block.bytes[6] = in[6]; + case 6: block.bytes[5] = in[5]; + case 5: block.bytes[4] = in[4]; + case 4: +#ifdef _MSC_VER + block.hvalue = *(const PY_UINT32_T*)in; +#else + memcpy(block.bytes, in, 4); +#endif + break; + case 3: block.bytes[2] = in[2]; + case 2: block.bytes[1] = in[1]; + case 1: block.bytes[0] = in[0]; + } + b |= block.value; + + v3 ^= b; + DOUBLE_ROUND(v0,v1,v2,v3); + v0 ^= b; + v2 ^= 0xff; + DOUBLE_ROUND(v0,v1,v2,v3); + DOUBLE_ROUND(v0,v1,v2,v3); + + /* modified */ + t = (v0 ^ v1) ^ (v2 ^ v3); +#if SIZEOF_VOID_P == 4 + t ^= (t >> 32); +#endif + x = (Py_hash_t)t; +} + +#endif /* PY_HASH_ALGORITHM == PY_HASH_SIPHASH24 */ + + if (x == -1) + return -2; + return x; +} diff -r 8962d1c442a6 Python/pythonrun.c --- a/Python/pythonrun.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Python/pythonrun.c Mon Oct 28 23:09:06 2013 +0200 @@ -92,6 +92,7 @@ extern void PyLong_Fini(void); extern int _PyFaulthandler_Init(void); extern void _PyFaulthandler_Fini(void); +extern void _PyHash_Fini(void); #ifdef WITH_THREAD extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); @@ -630,6 +631,8 @@ #ifdef COUNT_ALLOCS dump_counts(stdout); #endif + /* dump hash stats */ + _PyHash_Fini(); PRINT_TOTAL_REFS(); diff -r 8962d1c442a6 Python/random.c --- a/Python/random.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Python/random.c Mon Oct 28 23:09:06 2013 +0200 @@ -95,7 +95,7 @@ /* Read size bytes from /dev/urandom into buffer. Call Py_FatalError() on error. */ static void -dev_urandom_noraise(char *buffer, Py_ssize_t size) +dev_urandom_noraise(unsigned char *buffer, Py_ssize_t size) { int fd; Py_ssize_t n; @@ -249,7 +249,7 @@ _PyRandom_Init(void) { char *env; - void *secret = &_Py_HashSecret; + unsigned char *secret = (unsigned char *)&_Py_HashSecret; Py_ssize_t secret_size = sizeof(_Py_HashSecret_t); if (_Py_HashSecret_Initialized) @@ -278,17 +278,17 @@ memset(secret, 0, secret_size); } else { - lcg_urandom(seed, (unsigned char*)secret, secret_size); + lcg_urandom(seed, secret, secret_size); } } else { #ifdef MS_WINDOWS - (void)win32_urandom((unsigned char *)secret, secret_size, 0); + (void)win32_urandom(secret, secret_size, 0); #else /* #ifdef MS_WINDOWS */ # ifdef __VMS - vms_urandom((unsigned char *)secret, secret_size, 0); + vms_urandom(secret, secret_size, 0); # else - dev_urandom_noraise((char*)secret, secret_size); + dev_urandom_noraise(secret, secret_size); # endif #endif } diff -r 8962d1c442a6 Python/sysmodule.c --- a/Python/sysmodule.c Mon Oct 28 08:08:09 2013 +0100 +++ b/Python/sysmodule.c Mon Oct 28 23:09:06 2013 +0200 @@ -617,7 +617,7 @@ "hash_info\n\ \n\ A struct sequence providing parameters used for computing\n\ -numeric hashes. The attributes are read only."); +hashes. The attributes are read only."); static PyStructSequence_Field hash_info_fields[] = { {"width", "width of the type used for hashing, in bits"}, @@ -626,6 +626,10 @@ {"inf", "value to be used for hash of a positive infinity"}, {"nan", "value to be used for hash of a nan"}, {"imag", "multiplier used for the imaginary part of a complex number"}, + {"algorithm", "name of the algorithm for hashing of str, bytes and " + "memoryviews"}, + {"hash_bits", "internal output size of hash algorithm"}, + {"seed_bits", "seed size of hash algorithm"}, {NULL, NULL} }; @@ -633,7 +637,7 @@ "sys.hash_info", hash_info_doc, hash_info_fields, - 5, + 8, }; static PyObject * @@ -654,6 +658,12 @@ PyLong_FromLong(_PyHASH_NAN)); PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_IMAG)); + PyStructSequence_SET_ITEM(hash_info, field++, + PyUnicode_FromString(_PyHASH_NAME)); + PyStructSequence_SET_ITEM(hash_info, field++, + PyLong_FromLong(_PyHASH_HASH_BITS)); + PyStructSequence_SET_ITEM(hash_info, field++, + PyLong_FromLong(_PyHASH_SEED_BITS)); if (PyErr_Occurred()) { Py_CLEAR(hash_info); return NULL; @@ -1298,6 +1308,7 @@ executable -- absolute path of the executable binary of the Python interpreter\n\ float_info -- a struct sequence with information about the float implementation.\n\ float_repr_style -- string indicating the style of repr() output for floats\n\ +hash_info -- a struct sequenece with information about the hash algorithm.\n\ hexversion -- version information encoded as a single integer\n\ implementation -- Python implementation information.\n\ int_info -- a struct sequence with information about the int implementation.\n\ diff -r 8962d1c442a6 configure --- a/configure Mon Oct 28 08:08:09 2013 +0100 +++ b/configure Mon Oct 28 23:09:06 2013 +0200 @@ -792,6 +792,7 @@ enable_shared enable_profiling with_pydebug +with_hash_algorithm with_libs with_system_expat with_system_ffi @@ -1465,6 +1466,8 @@ compiler --with-suffix=.exe set executable suffix --with-pydebug build with Py_DEBUG defined + --with-hash-algorithm=[fnv|siphash24] + select hash algorithm --with-libs='lib1 ...' link against additional libs --with-system-expat build pyexpat module using an installed expat library @@ -8987,6 +8990,37 @@ *) ;; esac +# str, bytes and memoryview hash algorith + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-hash-algorithm" >&5 +$as_echo_n "checking for --with-hash-algorithm... " >&6; } + +# Check whether --with-hash_algorithm was given. +if test "${with_hash_algorithm+set}" = set; then : + withval=$with_hash_algorithm; +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5 +$as_echo "$withval" >&6; } +case "$withval" in + siphash24) + $as_echo "#define PY_HASH_ALGORITHM 1" >>confdefs.h + + ;; + fnv) + $as_echo "#define PY_HASH_ALGORITHM 2" >>confdefs.h + + ;; + *) + as_fn_error $? "unknown hash algorithm '$withval'" "$LINENO" 5 + ;; +esac + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: default" >&5 +$as_echo "default" >&6; } +fi + + # Most SVR4 platforms (e.g. Solaris) need -lsocket and -lnsl. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for t_open in -lnsl" >&5 $as_echo_n "checking for t_open in -lnsl... " >&6; } diff -r 8962d1c442a6 configure.ac --- a/configure.ac Mon Oct 28 08:08:09 2013 +0100 +++ b/configure.ac Mon Oct 28 23:09:06 2013 +0200 @@ -2229,6 +2229,32 @@ *) ;; esac +# str, bytes and memoryview hash algorith +AH_TEMPLATE(PY_HASH_ALGORITHM, + [Define hash algorithm for str, bytes and memoryview. + SipHash24: 1, FNV: 2]) + +AC_MSG_CHECKING(for --with-hash-algorithm) +dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output +AC_ARG_WITH(hash_algorithm, + AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@], + [select hash algorithm]), +[ +AC_MSG_RESULT($withval) +case "$withval" in + siphash24) + AC_DEFINE(PY_HASH_ALGORITHM, 1) + ;; + fnv) + AC_DEFINE(PY_HASH_ALGORITHM, 2) + ;; + *) + AC_MSG_ERROR([unknown hash algorithm '$withval']) + ;; +esac +], +[AC_MSG_RESULT(default)]) + # Most SVR4 platforms (e.g. Solaris) need -lsocket and -lnsl. AC_CHECK_LIB(nsl, t_open, [LIBS="-lnsl $LIBS"]) # SVR4 AC_CHECK_LIB(socket, socket, [LIBS="-lsocket $LIBS"], [], $LIBS) # SVR4 sockets diff -r 8962d1c442a6 pyconfig.h.in --- a/pyconfig.h.in Mon Oct 28 08:08:09 2013 +0100 +++ b/pyconfig.h.in Mon Oct 28 23:09:06 2013 +0200 @@ -1187,6 +1187,10 @@ /* Define to printf format modifier for Py_ssize_t */ #undef PY_FORMAT_SIZE_T +/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2, + externally defined: 0 */ +#undef PY_HASH_ALGORITHM + /* Define if you want to build an interpreter with many run-time checks. */ #undef Py_DEBUG