diff -r b9623fa5a0dd -r b8d39bf9ca4a Doc/library/sys.rst
--- a/Doc/library/sys.rst Thu Oct 24 09:47:10 2013 -0700
+++ b/Doc/library/sys.rst Mon Oct 28 16:19:27 2013 +0100
@@ -594,9 +594,20 @@
| :const:`imag` | multiplier used for the imaginary part of a |
| | complex number |
+---------------------+--------------------------------------------------+
+ | :const:`algorithm` | name of the algorithm for hashing of str, bytes, |
+ | | and memoryview |
+ +---------------------+--------------------------------------------------+
+ | :const:`hash_bits` | internal output size of the hash algorithm |
+ +---------------------+--------------------------------------------------+
+ | :const:`seed_bits` | size of the seed key of the hash algorithm |
+ +---------------------+--------------------------------------------------+
+
.. versionadded:: 3.2
+ .. versionchanged: 3.4
+ Added *algorithm*, *hash_bits* and *seed_bits*
+
.. data:: hexversion
diff -r b9623fa5a0dd -r b8d39bf9ca4a Include/Python.h
--- a/Include/Python.h Thu Oct 24 09:47:10 2013 -0700
+++ b/Include/Python.h Mon Oct 28 16:19:27 2013 +0100
@@ -68,6 +68,7 @@
#include "object.h"
#include "objimpl.h"
#include "typeslots.h"
+#include "pyhash.h"
#include "pydebug.h"
diff -r b9623fa5a0dd -r b8d39bf9ca4a Include/object.h
--- a/Include/object.h Thu Oct 24 09:47:10 2013 -0700
+++ b/Include/object.h Mon Oct 28 16:19:27 2013 +0100
@@ -561,23 +561,6 @@
PyAPI_FUNC(int) Py_ReprEnter(PyObject *);
PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
-/* Helpers for hash functions */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
-PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
-PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
-#endif
-
-typedef struct {
- Py_hash_t prefix;
- Py_hash_t suffix;
-} _Py_HashSecret_t;
-PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
-
-#ifdef Py_DEBUG
-PyAPI_DATA(int) _Py_HashSecret_Initialized;
-#endif
-
/* Helper for passing objects to printf and the like */
#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
diff -r b9623fa5a0dd -r b8d39bf9ca4a Include/pyhash.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Include/pyhash.h Mon Oct 28 16:19:27 2013 +0100
@@ -0,0 +1,92 @@
+#ifndef Py_HASH_H
+#define Py_HASH_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Helpers for hash functions */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
+PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
+PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
+#endif
+
+/* Prime multiplier used in string and various other hashes. */
+#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
+
+/* Parameters used for the numeric hash implementation. See notes for
+ _Py_HashDouble in Objects/object.c. Numeric hashes are based on
+ reduction modulo the prime 2**_PyHASH_BITS - 1. */
+
+#if SIZEOF_VOID_P >= 8
+#define _PyHASH_BITS 61
+#else
+#define _PyHASH_BITS 31
+#endif
+#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
+#define _PyHASH_INF 314159
+#define _PyHASH_NAN 0
+#define _PyHASH_IMAG _PyHASH_MULTIPLIER
+
+
+/* hash secret */
+typedef union {
+ /* ensure 24 bytes */
+ unsigned char uc[24];
+ /* two Py_hash_t for FNV */
+ struct {
+ Py_hash_t prefix;
+ Py_hash_t suffix;
+ } ht;
+#ifdef PY_UINT64_T
+ /* two uint64 for SipHash24 */
+ struct {
+ PY_UINT64_T k0;
+ PY_UINT64_T k1;
+ } ui64;
+#endif
+ /* a different (!) Py_hash_t for small string optimization */
+ struct {
+ unsigned char padding[16];
+ Py_hash_t suffix;
+ } small;
+} _Py_HashSecret_t;
+PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
+
+#ifdef Py_DEBUG
+PyAPI_DATA(int) _Py_HashSecret_Initialized;
+#endif
+
+
+/* hash function definition */
+#ifndef Py_LIMITED_API
+typedef struct {
+ Py_hash_t (*const hash)(const void *, Py_ssize_t);
+ const char *name;
+ const int hash_bits;
+ const int seed_bits;
+} PyHash_FuncDef;
+
+PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
+#endif
+
+/* hash algorithm selection
+ * SipHash24 requires a platform with a working PY_UINT64_T type
+ */
+#define PY_HASH_EXTERNAL 0
+#define PY_HASH_SIPHASH24 1
+#define PY_HASH_FNV 2
+
+#ifndef PY_HASH_ALGORITHM
+#if (defined(PY_UINT64_T) && defined(PY_UINT32_T) && \
+ !defined(HAVE_ALIGNED_REQUIRED))
+#define PY_HASH_ALGORITHM PY_HASH_SIPHASH24
+#else
+#define PY_HASH_ALGORITHM PY_HASH_FNV
+#endif /* uint64_t && uint32_t && aligned */
+#endif /* PY_HASH_ALGORITHM */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_HASH_H */
diff -r b9623fa5a0dd -r b8d39bf9ca4a Include/pyport.h
--- a/Include/pyport.h Thu Oct 24 09:47:10 2013 -0700
+++ b/Include/pyport.h Mon Oct 28 16:19:27 2013 +0100
@@ -144,23 +144,6 @@
#endif
#endif
-/* Prime multiplier used in string and various other hashes. */
-#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
-
-/* Parameters used for the numeric hash implementation. See notes for
- _Py_HashDouble in Objects/object.c. Numeric hashes are based on
- reduction modulo the prime 2**_PyHASH_BITS - 1. */
-
-#if SIZEOF_VOID_P >= 8
-#define _PyHASH_BITS 61
-#else
-#define _PyHASH_BITS 31
-#endif
-#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
-#define _PyHASH_INF 314159
-#define _PyHASH_NAN 0
-#define _PyHASH_IMAG _PyHASH_MULTIPLIER
-
/* uintptr_t is the C9X name for an unsigned integral type such that a
* legitimate void* can be cast to uintptr_t and then back to void* again
* without loss of information. Similarly for intptr_t, wrt a signed
@@ -199,8 +182,10 @@
#endif
/* Py_hash_t is the same size as a pointer. */
+#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T
typedef Py_ssize_t Py_hash_t;
/* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */
+#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T
typedef size_t Py_uhash_t;
/* Largest possible value of size_t.
diff -r b9623fa5a0dd -r b8d39bf9ca4a Lib/test/test_hash.py
--- a/Lib/test/test_hash.py Thu Oct 24 09:47:10 2013 -0700
+++ b/Lib/test/test_hash.py Mon Oct 28 16:19:27 2013 +0100
@@ -12,6 +12,34 @@
IS_64BIT = sys.maxsize > 2**32
+def lcg(x, length=16):
+ """Linear congruential generator"""
+ if x == 0:
+ return bytes(length)
+ out = bytearray(length)
+ for i in range(length):
+ x = (214013 * x + 2531011) & 0x7fffffff
+ out[i] = (x >> 16) & 0xff
+ return bytes(out)
+
+def pysiphash(uint64):
+ """Convert SipHash24 output to Py_hash_t
+ """
+ assert 0 <= uint64 < (1 << 64)
+ # simple unsigned to signed int64
+ if uint64 > (1 << 63) - 1:
+ int64 = uint64 - (1 << 64)
+ else:
+ int64 = uint64
+ # mangle uint64 to uint32
+ uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
+ # simple unsigned to signed int32
+ if uint32 > (1 << 31) - 1:
+ int32 = uint32 - (1 << 32)
+ else:
+ int32 = uint32
+ return int32, int64
+
class HashEqualityTestCase(unittest.TestCase):
@@ -161,12 +189,63 @@
self.assertNotEqual(run1, run2)
class StringlikeHashRandomizationTests(HashRandomizationTests):
+ repr_ = None
+ repr_long = None
+
+ # 32bit little, 64bit little, 32bit big, 64bit big
+ known_hashes = {
+ 'djba33x': [ # only used for small strings
+ # seed 0, 'abc'
+ [193485960, 193485960, 193485960, 193485960],
+ # seed 42, 'abc'
+ [-678966196, 573763426263223372, -678966196, 573763426263223372],
+ ],
+ 'siphash24': [
+ # seed 0, 'abc'
+ [2025351752, 4596069200710135518, 1433332804,
+ -3481057401533226760],
+ # seed 42, 'abc'
+ [-774632014, -4501618152524544106, 1054608210,
+ -1493500025205289231],
+ # seed 42, 'abcdefghijk'
+ [-1436007334, 4436719588892876975, -927983272,
+ -7467447726805986685],
+ # seed 0, 'äú∑ℇ'
+ [1386693832, 5749986484189612790, -1765006383,
+ 701560684020781672],
+ # seed 42, 'äú∑ℇ'
+ [1260387190, -2947981342227738144, 1360900231,
+ -8011585725872172611]
+ ],
+ 'fnv': [
+ # seed 0, 'abc'
+ [-1600925533, 1453079729188098211, -1600925533,
+ 1453079729188098211],
+ # seed 42, 'abc'
+ [-206076799, -4410911502303878509, -1024014457,
+ -3570150969479994130],
+ # seed 42, 'abcdefghijk'
+ [811136751, -5046230049376118746, -77208053 , None],
+ # seed 0, 'äú∑ℇ'
+ [44402817, 8998297579845987431, None, None],
+ # seed 42, 'äú∑ℇ'
+ [-283066365, -4576729883824601543, None, None],
+ ]
+ }
+
+ def get_expected_hash(self, position, length):
+ if length < 6:
+ algorithm = "djba33x"
+ else:
+ algorithm = sys.hash_info.algorithm
+ platform = 1 if IS_64BIT else 0
+ if sys.byteorder != 'little':
+ platform += 2
+ return self.known_hashes[algorithm][position][platform]
+
def test_null_hash(self):
# PYTHONHASHSEED=0 disables the randomized hash
- if IS_64BIT:
- known_hash_of_obj = 1453079729188098211
- else:
- known_hash_of_obj = -1600925533
+ known_hash_of_obj = self.get_expected_hash(0, 3)
# Randomization is enabled by default:
self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
@@ -177,28 +256,35 @@
def test_fixed_hash(self):
# test a fixed seed for the randomized hash
# Note that all types share the same values:
- if IS_64BIT:
- if sys.byteorder == 'little':
- h = -4410911502303878509
- else:
- h = -3570150969479994130
- else:
- if sys.byteorder == 'little':
- h = -206076799
- else:
- h = -1024014457
+ h = self.get_expected_hash(1, 3)
self.assertEqual(self.get_hash(self.repr_, seed=42), h)
+ def test_long_fixed_hash(self):
+ if self.repr_long is None:
+ return
+ h = self.get_expected_hash(2, 11)
+ self.assertEqual(self.get_hash(self.repr_long, seed=42), h)
+
+
class StrHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = repr('abc')
+ repr_long = repr('abcdefghijk')
+ repr_ucs2 = repr('äú∑ℇ')
def test_empty_string(self):
self.assertEqual(hash(""), 0)
+ def test_ucs2_string(self):
+ h = self.get_expected_hash(3, 6)
+ self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
+ h = self.get_expected_hash(4, 6)
+ self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)
+
class BytesHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = repr(b'abc')
+ repr_long = repr(b'abcdefghijk')
def test_empty_string(self):
self.assertEqual(hash(b""), 0)
@@ -206,6 +292,7 @@
class MemoryviewHashRandomizationTests(StringlikeHashRandomizationTests,
unittest.TestCase):
repr_ = "memoryview(b'abc')"
+ repr_long = "memoryview(b'abcdefghijk')"
def test_empty_string(self):
self.assertEqual(hash(memoryview(b"")), 0)
@@ -224,5 +311,22 @@
repr_ = repr(datetime.time(0))
+class HashDistributionTestCase(unittest.TestCase):
+
+ def test_hash_distribution(self):
+ # check for hash collision
+ base = "abcdefghabcdefg"
+ for i in range(1, len(base)):
+ prefix = base[:i]
+ s15 = set()
+ s255 = set()
+ for c in range(256):
+ h = hash(prefix + chr(c))
+ s15.add(h & 0xf)
+ s255.add(h & 0xff)
+ # SipHash24 distribution depends on key, usually > 60%
+ self.assertGreater(len(s15), 8, prefix)
+ self.assertGreater(len(s255), 128, prefix)
+
if __name__ == "__main__":
unittest.main()
diff -r b9623fa5a0dd -r b8d39bf9ca4a Lib/test/test_sys.py
--- a/Lib/test/test_sys.py Thu Oct 24 09:47:10 2013 -0700
+++ b/Lib/test/test_sys.py Mon Oct 28 16:19:27 2013 +0100
@@ -8,6 +8,7 @@
import codecs
import gc
import sysconfig
+import platform
# count the number of test runs, used to create unique
# strings to intern in test_intern()
@@ -430,7 +431,7 @@
self.assertEqual(type(sys.int_info.sizeof_digit), int)
self.assertIsInstance(sys.hexversion, int)
- self.assertEqual(len(sys.hash_info), 5)
+ self.assertEqual(len(sys.hash_info), 8)
self.assertLess(sys.hash_info.modulus, 2**sys.hash_info.width)
# sys.hash_info.modulus should be a prime; we do a quick
# probable primality test (doesn't exclude the possibility of
@@ -445,6 +446,21 @@
self.assertIsInstance(sys.hash_info.inf, int)
self.assertIsInstance(sys.hash_info.nan, int)
self.assertIsInstance(sys.hash_info.imag, int)
+ self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash24"})
+ self.assertIn(sys.hash_info.hash_bits, {32, 64})
+ self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
+
+ algo = sysconfig.get_config_var("PY_HASH_ALGORITHM")
+ if algo == 1:
+ self.assertEqual(sys.hash_info.algorithm, "siphash24")
+ elif algo == 2:
+ self.assertEqual(sys.hash_info.algorithm, "fnv")
+ else:
+ processor = platform.processor().lower()
+ if processor in {"sparc", "mips"}:
+ self.assertEqual(sys.hash_info.algorithm, "fnv")
+ else:
+ self.assertEqual(sys.hash_info.algorithm, "siphash24")
self.assertIsInstance(sys.maxsize, int)
self.assertIsInstance(sys.maxunicode, int)
diff -r b9623fa5a0dd -r b8d39bf9ca4a Makefile.pre.in
--- a/Makefile.pre.in Thu Oct 24 09:47:10 2013 -0700
+++ b/Makefile.pre.in Mon Oct 28 16:19:27 2013 +0100
@@ -366,6 +366,7 @@
Python/pyarena.o \
Python/pyctype.o \
Python/pyfpe.o \
+ Python/pyhash.o \
Python/pymath.o \
Python/pystate.o \
Python/pythonrun.o \
@@ -866,6 +867,7 @@
$(srcdir)/Include/pydebug.h \
$(srcdir)/Include/pyerrors.h \
$(srcdir)/Include/pyfpe.h \
+ $(srcdir)/Include/pyhash.h \
$(srcdir)/Include/pymath.h \
$(srcdir)/Include/pygetopt.h \
$(srcdir)/Include/pymacro.h \
diff -r b9623fa5a0dd -r b8d39bf9ca4a Modules/pyexpat.c
--- a/Modules/pyexpat.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Modules/pyexpat.c Mon Oct 28 16:19:27 2013 +0100
@@ -1218,7 +1218,7 @@
* has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
* to indicate that we can still use it. */
XML_SetHashSalt(self->itself,
- (unsigned long)_Py_HashSecret.prefix);
+ (unsigned long)_Py_HashSecret.ht.prefix);
#endif
XML_SetUserData(self->itself, (void *)self);
XML_SetUnknownEncodingHandler(self->itself,
diff -r b9623fa5a0dd -r b8d39bf9ca4a Objects/bytesobject.c
--- a/Objects/bytesobject.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Objects/bytesobject.c Mon Oct 28 16:19:27 2013 +0100
@@ -878,7 +878,7 @@
{
if (a->ob_shash == -1) {
/* Can't fail */
- a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
+ a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
}
return a->ob_shash;
}
diff -r b9623fa5a0dd -r b8d39bf9ca4a Objects/memoryobject.c
--- a/Objects/memoryobject.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Objects/memoryobject.c Mon Oct 28 16:19:27 2013 +0100
@@ -2742,7 +2742,7 @@
}
/* Can't fail */
- self->hash = _Py_HashBytes((unsigned char *)mem, view->len);
+ self->hash = _Py_HashBytes(mem, view->len);
if (mem != view->buf)
PyMem_Free(mem);
diff -r b9623fa5a0dd -r b8d39bf9ca4a Objects/object.c
--- a/Objects/object.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Objects/object.c Mon Oct 28 16:19:27 2013 +0100
@@ -726,150 +726,6 @@
return ok;
}
-/* Set of hash utility functions to help maintaining the invariant that
- if a==b then hash(a)==hash(b)
-
- All the utility functions (_Py_Hash*()) return "-1" to signify an error.
-*/
-
-/* For numeric types, the hash of a number x is based on the reduction
- of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
- hash(x) == hash(y) whenever x and y are numerically equal, even if
- x and y have different types.
-
- A quick summary of the hashing strategy:
-
- (1) First define the 'reduction of x modulo P' for any rational
- number x; this is a standard extension of the usual notion of
- reduction modulo P for integers. If x == p/q (written in lowest
- terms), the reduction is interpreted as the reduction of p times
- the inverse of the reduction of q, all modulo P; if q is exactly
- divisible by P then define the reduction to be infinity. So we've
- got a well-defined map
-
- reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
-
- (2) Now for a rational number x, define hash(x) by:
-
- reduce(x) if x >= 0
- -reduce(-x) if x < 0
-
- If the result of the reduction is infinity (this is impossible for
- integers, floats and Decimals) then use the predefined hash value
- _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
- _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
- hashes of float and Decimal infinities and nans.
-
- A selling point for the above strategy is that it makes it possible
- to compute hashes of decimal and binary floating-point numbers
- efficiently, even if the exponent of the binary or decimal number
- is large. The key point is that
-
- reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
-
- provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
- binary or decimal float is never infinity, since the denominator is a power
- of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
- for nonnegative x,
-
- reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
-
- reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
-
- and reduce(10**e) can be computed efficiently by the usual modular
- exponentiation algorithm. For reduce(2**e) it's even better: since
- P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
- by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
-
- */
-
-Py_hash_t
-_Py_HashDouble(double v)
-{
- int e, sign;
- double m;
- Py_uhash_t x, y;
-
- if (!Py_IS_FINITE(v)) {
- if (Py_IS_INFINITY(v))
- return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
- else
- return _PyHASH_NAN;
- }
-
- m = frexp(v, &e);
-
- sign = 1;
- if (m < 0) {
- sign = -1;
- m = -m;
- }
-
- /* process 28 bits at a time; this should work well both for binary
- and hexadecimal floating point. */
- x = 0;
- while (m) {
- x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
- m *= 268435456.0; /* 2**28 */
- e -= 28;
- y = (Py_uhash_t)m; /* pull out integer part */
- m -= y;
- x += y;
- if (x >= _PyHASH_MODULUS)
- x -= _PyHASH_MODULUS;
- }
-
- /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
- e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
- x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
-
- x = x * sign;
- if (x == (Py_uhash_t)-1)
- x = (Py_uhash_t)-2;
- return (Py_hash_t)x;
-}
-
-Py_hash_t
-_Py_HashPointer(void *p)
-{
- Py_hash_t x;
- size_t y = (size_t)p;
- /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
- excessive hash collisions for dicts and sets */
- y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
- x = (Py_hash_t)y;
- if (x == -1)
- x = -2;
- return x;
-}
-
-Py_hash_t
-_Py_HashBytes(unsigned char *p, Py_ssize_t len)
-{
- Py_uhash_t x;
- Py_ssize_t i;
-
- /*
- We make the hash of the empty string be 0, rather than using
- (prefix ^ suffix), since this slightly obfuscates the hash secret
- */
-#ifdef Py_DEBUG
- assert(_Py_HashSecret_Initialized);
-#endif
- if (len == 0) {
- return 0;
- }
- x = (Py_uhash_t) _Py_HashSecret.prefix;
- x ^= (Py_uhash_t) *p << 7;
- for (i = 0; i < len; i++)
- x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
- x ^= (Py_uhash_t) len;
- x ^= (Py_uhash_t) _Py_HashSecret.suffix;
- if (x == -1)
- x = -2;
- return x;
-}
-
Py_hash_t
PyObject_HashNotImplemented(PyObject *v)
{
@@ -878,8 +734,6 @@
return -1;
}
-_Py_HashSecret_t _Py_HashSecret;
-
Py_hash_t
PyObject_Hash(PyObject *v)
{
diff -r b9623fa5a0dd -r b8d39bf9ca4a Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Objects/unicodeobject.c Mon Oct 28 16:19:27 2013 +0100
@@ -11131,39 +11131,8 @@
_PyUnicode_HASH(self) = 0;
return 0;
}
-
- /* The hash function as a macro, gets expanded three times below. */
-#define HASH(P) \
- x ^= (Py_uhash_t) *P << 7; \
- while (--len >= 0) \
- x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++; \
-
- x = (Py_uhash_t) _Py_HashSecret.prefix;
- switch (PyUnicode_KIND(self)) {
- case PyUnicode_1BYTE_KIND: {
- const unsigned char *c = PyUnicode_1BYTE_DATA(self);
- HASH(c);
- break;
- }
- case PyUnicode_2BYTE_KIND: {
- const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
- HASH(s);
- break;
- }
- default: {
- Py_UCS4 *l;
- assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
- "Impossible switch case in unicode_hash");
- l = PyUnicode_4BYTE_DATA(self);
- HASH(l);
- break;
- }
- }
- x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self);
- x ^= (Py_uhash_t) _Py_HashSecret.suffix;
-
- if (x == -1)
- x = -2;
+ x = _Py_HashBytes(PyUnicode_DATA(self),
+ PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
_PyUnicode_HASH(self) = x;
return x;
}
diff -r b9623fa5a0dd -r b8d39bf9ca4a PCbuild/pythoncore.vcxproj
--- a/PCbuild/pythoncore.vcxproj Thu Oct 24 09:47:10 2013 -0700
+++ b/PCbuild/pythoncore.vcxproj Mon Oct 28 16:19:27 2013 +0100
@@ -412,6 +412,7 @@
+
@@ -613,6 +614,7 @@
+
diff -r b9623fa5a0dd -r b8d39bf9ca4a PCbuild/pythoncore.vcxproj.filters
--- a/PCbuild/pythoncore.vcxproj.filters Thu Oct 24 09:47:10 2013 -0700
+++ b/PCbuild/pythoncore.vcxproj.filters Mon Oct 28 16:19:27 2013 +0100
@@ -412,6 +412,9 @@
Python
+
+ Include
+
@@ -922,6 +925,9 @@
Modules
+
+ Python
+
diff -r b9623fa5a0dd -r b8d39bf9ca4a Python/pyhash.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Python/pyhash.c Mon Oct 28 16:19:27 2013 +0100
@@ -0,0 +1,423 @@
+/* Set of hash utility functions to help maintaining the invariant that
+ if a==b then hash(a)==hash(b)
+
+ All the utility functions (_Py_Hash*()) return "-1" to signify an error.
+*/
+#include "Python.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_Py_HashSecret_t _Py_HashSecret;
+
+#if PY_HASH_ALGORITHM == PY_HASH_EXTERNAL
+extern PyHash_FuncDef PyHash_Func;
+#else
+static PyHash_FuncDef PyHash_Func;
+#endif
+
+/* cutoff for small string DJBX33A optimization in range [1, cutoff).
+ *
+ * About 50% of the strings in a typical Python application are smaller than
+ * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
+ * NEVER use DJBX33A for long strings!
+ *
+ * A PY_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
+ * use a small cutoff because it is easier to create colliding strings. The
+ * selected values have a decent safety margin.
+ */
+#ifndef PY_HASH_CUTOFF
+# if SIZEOF_PY_UHASH_T == 4
+# define PY_HASH_CUTOFF 5
+# else
+# define PY_HASH_CUTOFF 7
+# endif /* SIZEOF_PY_UHASH_T == 4 */
+#elif (PY_HASH_CUTOFF > 7 || PY_HASH_CUTOFF < 0)
+# error PY_HASH_CUTOFF must in range 0...7.
+#endif /* PY_HASH_CUTOFF */
+
+/* Count _Py_HashBytes() calls */
+#ifdef PY_HASH_STATS
+#define PY_HASH_STATS_MAX 32
+static Py_ssize_t hashstats[PY_HASH_STATS_MAX + 1] = {0};
+#endif
+
+/* For numeric types, the hash of a number x is based on the reduction
+ of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
+ hash(x) == hash(y) whenever x and y are numerically equal, even if
+ x and y have different types.
+
+ A quick summary of the hashing strategy:
+
+ (1) First define the 'reduction of x modulo P' for any rational
+ number x; this is a standard extension of the usual notion of
+ reduction modulo P for integers. If x == p/q (written in lowest
+ terms), the reduction is interpreted as the reduction of p times
+ the inverse of the reduction of q, all modulo P; if q is exactly
+ divisible by P then define the reduction to be infinity. So we've
+ got a well-defined map
+
+ reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
+
+ (2) Now for a rational number x, define hash(x) by:
+
+ reduce(x) if x >= 0
+ -reduce(-x) if x < 0
+
+ If the result of the reduction is infinity (this is impossible for
+ integers, floats and Decimals) then use the predefined hash value
+ _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
+ _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
+ hashes of float and Decimal infinities and nans.
+
+ A selling point for the above strategy is that it makes it possible
+ to compute hashes of decimal and binary floating-point numbers
+ efficiently, even if the exponent of the binary or decimal number
+ is large. The key point is that
+
+ reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
+
+ provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
+ binary or decimal float is never infinity, since the denominator is a power
+ of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
+ for nonnegative x,
+
+ reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
+
+ reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
+
+ and reduce(10**e) can be computed efficiently by the usual modular
+ exponentiation algorithm. For reduce(2**e) it's even better: since
+ P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
+ by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
+
+ */
+
+Py_hash_t
+_Py_HashDouble(double v)
+{
+ int e, sign;
+ double m;
+ Py_uhash_t x, y;
+
+ if (!Py_IS_FINITE(v)) {
+ if (Py_IS_INFINITY(v))
+ return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
+ else
+ return _PyHASH_NAN;
+ }
+
+ m = frexp(v, &e);
+
+ sign = 1;
+ if (m < 0) {
+ sign = -1;
+ m = -m;
+ }
+
+ /* process 28 bits at a time; this should work well both for binary
+ and hexadecimal floating point. */
+ x = 0;
+ while (m) {
+ x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
+ m *= 268435456.0; /* 2**28 */
+ e -= 28;
+ y = (Py_uhash_t)m; /* pull out integer part */
+ m -= y;
+ x += y;
+ if (x >= _PyHASH_MODULUS)
+ x -= _PyHASH_MODULUS;
+ }
+
+ /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
+ e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
+ x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
+
+ x = x * sign;
+ if (x == (Py_uhash_t)-1)
+ x = (Py_uhash_t)-2;
+ return (Py_hash_t)x;
+}
+
+Py_hash_t
+_Py_HashPointer(void *p)
+{
+ Py_hash_t x;
+ size_t y = (size_t)p;
+ /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
+ excessive hash collisions for dicts and sets */
+ y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
+ x = (Py_hash_t)y;
+ if (x == -1)
+ x = -2;
+ return x;
+}
+
+Py_hash_t
+_Py_HashBytes(const void *src, Py_ssize_t len)
+{
+ Py_hash_t x;
+ /*
+ We make the hash of the empty string be 0, rather than using
+ (prefix ^ suffix), since this slightly obfuscates the hash secret
+ */
+ if (len == 0) {
+ return 0;
+ }
+
+#ifdef PY_HASH_STATS
+ hashstats[(len <= PY_HASH_STATS_MAX) ? len : 0]++;
+#endif
+
+#if PY_HASH_CUTOFF > 0
+ if (len < PY_HASH_CUTOFF) {
+ /* Optimize hashing of very small strings with inline DJBX33A. */
+ Py_uhash_t hash;
+ const unsigned char *p = src;
+ hash = 5381; /* DJBX33A starts with 5381 */
+
+ switch(len) {
+ /* ((hash << 5) + hash) + *p == hash * 33 + *p */
+ /* case 7: hash = ((hash << 5) + hash) + *p++; */
+ case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
+ case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
+ case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
+ case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
+ case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
+ case 1: hash = ((hash << 5) + hash) + *p++; break;
+ default:
+ assert(0);
+ }
+ hash ^= len;
+ hash ^= (Py_uhash_t) _Py_HashSecret.small.suffix;
+ x = (Py_hash_t)hash;
+ }
+ else
+#endif /* PY_HASH_CUTOFF */
+ x = PyHash_Func.hash(src, len);
+
+ if (x == -1)
+ return -2;
+ return x;
+}
+
+void
+_PyHash_Fini(void)
+{
+#ifdef PY_HASH_STATS
+ int i;
+ Py_ssize_t total = 0;
+ char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
+
+ fprintf(stderr, "len calls total\n");
+ for (i = 1; i <= PY_HASH_STATS_MAX; i++) {
+ total += hashstats[i];
+ fprintf(stderr, fmt, i, hashstats[i], total);
+ }
+ total += hashstats[0];
+ fprintf(stderr, "> %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
+ hashstats[0], total);
+#endif
+}
+
+PyHash_FuncDef *
+PyHash_GetFuncDef(void)
+{
+ return &PyHash_Func;
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
+#else
+#if SIZEOF_PY_UHASH_T == 4
+#define PY_UHASH_CPY(dst, src) do { \
+ dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
+ } while(0)
+#elif SIZEOF_PY_UHASH_T == 8
+#define PY_UHASH_CPY(dst, src) do { \
+ dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
+ dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
+ } while(0)
+#else
+#error SIZEOF_PY_UHASH_T must be 4 or 8
+#endif /* SIZEOF_PY_UHASH_T */
+#endif /* __GNUC__ || __clang__ */
+
+
+#if PY_HASH_ALGORITHM == PY_HASH_FNV
+/* **************************************************************************
+ * Modified Fowler-Noll-Vo (FNV) hash function
+ */
+static Py_hash_t
+fnv(const void *src, Py_ssize_t len)
+{
+ const unsigned char *p = src;
+ Py_uhash_t x;
+ Py_ssize_t remainder, blocks;
+ union {
+ Py_uhash_t value;
+ unsigned char bytes[SIZEOF_PY_UHASH_T];
+ } block;
+
+#ifdef Py_DEBUG
+ assert(_Py_HashSecret_Initialized);
+#endif
+ remainder = len % SIZEOF_PY_UHASH_T;
+ if (remainder == 0) {
+ /* Process at least one block byte by byte to reduce hash collisions
+ * for strings with common prefixes. */
+ remainder = SIZEOF_PY_UHASH_T;
+ }
+ blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
+
+ x = (Py_uhash_t) _Py_HashSecret.ht.prefix;
+ x ^= (Py_uhash_t) *p << 7;
+ while (blocks--) {
+ PY_UHASH_CPY(block.bytes, p);
+ x = (_PyHASH_MULTIPLIER * x) ^ block.value;
+ p += SIZEOF_PY_UHASH_T;
+ }
+ /* add remainder */
+ for (; remainder > 0; remainder--)
+ x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
+ x ^= (Py_uhash_t) len;
+ x ^= (Py_uhash_t) _Py_HashSecret.ht.suffix;
+ if (x == -1) {
+ x = -2;
+ }
+ return x;
+}
+
+static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
+ 16 * SIZEOF_PY_HASH_T};
+
+#endif /* PY_HASH_ALGORITHM == PY_HASH_FNV */
+
+
+#if PY_HASH_ALGORITHM == PY_HASH_SIPHASH24
+/* **************************************************************************
+
+ Copyright (c) 2013 Marek Majkowski
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+
+
+ Original location:
+ https://github.com/majek/csiphash/
+
+ Solution inspired by code from:
+ Samuel Neves (supercop/crypto_auth/siphash24/little)
+ djb (supercop/crypto_auth/siphash24/little2)
+ Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
+
+ Modified for Python by Christian Heimes:
+ - C89 / MSVC compatibility
+ - PY_UINT64_T, PY_UINT32_T and PY_UINT8_T
+ - _rotl64() on Windows
+*/
+
+typedef unsigned char PY_UINT8_T;
+
+/* The original code handles byte order. Python ignores the byte order here
+ * because doesn't aim for equal hash values across platforms and
+ * architectures.
+ */
+#define _le64toh(x) ((PY_UINT64_T)(x))
+
+#ifdef _MSC_VER
+#define ROTATE(x, b) _rotl64(x, b)
+#else
+#define ROTATE(x, b) (PY_UINT64_T)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+#endif
+
+#define HALF_ROUND(a,b,c,d,s,t) \
+ a += b; c += d; \
+ b = ROTATE(b, s) ^ a; \
+ d = ROTATE(d, t) ^ c; \
+ a = ROTATE(a, 32);
+
+#define DOUBLE_ROUND(v0,v1,v2,v3) \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
+ HALF_ROUND(v2,v1,v0,v3,17,21); \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
+ HALF_ROUND(v2,v1,v0,v3,17,21);
+
+
+static Py_hash_t
+siphash24(const void *src, Py_ssize_t src_sz) {
+ PY_UINT64_T k0 = _le64toh(_Py_HashSecret.ui64.k0);
+ PY_UINT64_T k1 = _le64toh(_Py_HashSecret.ui64.k1);
+ PY_UINT64_T b = (PY_UINT64_T)src_sz << 56;
+ const PY_UINT64_T *in = (PY_UINT64_T*)src;
+
+ PY_UINT64_T v0 = k0 ^ 0x736f6d6570736575ULL;
+ PY_UINT64_T v1 = k1 ^ 0x646f72616e646f6dULL;
+ PY_UINT64_T v2 = k0 ^ 0x6c7967656e657261ULL;
+ PY_UINT64_T v3 = k1 ^ 0x7465646279746573ULL;
+
+ PY_UINT64_T t;
+ PY_UINT8_T *pt;
+ PY_UINT8_T *m;
+
+ while (src_sz >= 8) {
+ PY_UINT64_T mi = _le64toh(*in);
+ in += 1;
+ src_sz -= 8;
+ v3 ^= mi;
+ DOUBLE_ROUND(v0,v1,v2,v3);
+ v0 ^= mi;
+ }
+
+ t = 0;
+ pt = (PY_UINT8_T *)&t;
+ m = (PY_UINT8_T *)in;
+ switch (src_sz) {
+ case 7: pt[6] = m[6];
+ case 6: pt[5] = m[5];
+ case 5: pt[4] = m[4];
+ case 4: *((PY_UINT32_T*)&pt[0]) = *((PY_UINT32_T*)&m[0]); break;
+ case 3: pt[2] = m[2];
+ case 2: pt[1] = m[1];
+ case 1: pt[0] = m[0];
+ }
+ b |= _le64toh(t);
+
+ v3 ^= b;
+ DOUBLE_ROUND(v0,v1,v2,v3);
+ v0 ^= b;
+ v2 ^= 0xff;
+ DOUBLE_ROUND(v0,v1,v2,v3);
+ DOUBLE_ROUND(v0,v1,v2,v3);
+
+ /* modified */
+ t = (v0 ^ v1) ^ (v2 ^ v3);
+#if SIZEOF_VOID_P == 4
+ t ^= (t >> 32);
+#endif
+ return (Py_hash_t)t;
+}
+
+static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
+
+#endif /* PY_HASH_ALGORITHM == PY_HASH_SIPHASH24 */
+
+#ifdef __cplusplus
+}
+#endif
diff -r b9623fa5a0dd -r b8d39bf9ca4a Python/pythonrun.c
--- a/Python/pythonrun.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Python/pythonrun.c Mon Oct 28 16:19:27 2013 +0100
@@ -92,6 +92,7 @@
extern void PyLong_Fini(void);
extern int _PyFaulthandler_Init(void);
extern void _PyFaulthandler_Fini(void);
+extern void _PyHash_Fini(void);
#ifdef WITH_THREAD
extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *);
@@ -630,6 +631,8 @@
#ifdef COUNT_ALLOCS
dump_counts(stdout);
#endif
+ /* dump hash stats */
+ _PyHash_Fini();
PRINT_TOTAL_REFS();
diff -r b9623fa5a0dd -r b8d39bf9ca4a Python/random.c
--- a/Python/random.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Python/random.c Mon Oct 28 16:19:27 2013 +0100
@@ -95,7 +95,7 @@
/* Read size bytes from /dev/urandom into buffer.
Call Py_FatalError() on error. */
static void
-dev_urandom_noraise(char *buffer, Py_ssize_t size)
+dev_urandom_noraise(unsigned char *buffer, Py_ssize_t size)
{
int fd;
Py_ssize_t n;
@@ -249,8 +249,9 @@
_PyRandom_Init(void)
{
char *env;
- void *secret = &_Py_HashSecret;
+ unsigned char *secret = (unsigned char *)&_Py_HashSecret.uc;
Py_ssize_t secret_size = sizeof(_Py_HashSecret_t);
+ assert(secret_size == sizeof(_Py_HashSecret.uc));
if (_Py_HashSecret_Initialized)
return;
@@ -278,17 +279,17 @@
memset(secret, 0, secret_size);
}
else {
- lcg_urandom(seed, (unsigned char*)secret, secret_size);
+ lcg_urandom(seed, secret, secret_size);
}
}
else {
#ifdef MS_WINDOWS
- (void)win32_urandom((unsigned char *)secret, secret_size, 0);
+ (void)win32_urandom(secret, secret_size, 0);
#else /* #ifdef MS_WINDOWS */
# ifdef __VMS
- vms_urandom((unsigned char *)secret, secret_size, 0);
+ vms_urandom(secret, secret_size, 0);
# else
- dev_urandom_noraise((char*)secret, secret_size);
+ dev_urandom_noraise(secret, secret_size);
# endif
#endif
}
diff -r b9623fa5a0dd -r b8d39bf9ca4a Python/sysmodule.c
--- a/Python/sysmodule.c Thu Oct 24 09:47:10 2013 -0700
+++ b/Python/sysmodule.c Mon Oct 28 16:19:27 2013 +0100
@@ -617,7 +617,7 @@
"hash_info\n\
\n\
A struct sequence providing parameters used for computing\n\
-numeric hashes. The attributes are read only.");
+hashes. The attributes are read only.");
static PyStructSequence_Field hash_info_fields[] = {
{"width", "width of the type used for hashing, in bits"},
@@ -626,6 +626,10 @@
{"inf", "value to be used for hash of a positive infinity"},
{"nan", "value to be used for hash of a nan"},
{"imag", "multiplier used for the imaginary part of a complex number"},
+ {"algorithm", "name of the algorithm for hashing of str, bytes and "
+ "memoryviews"},
+ {"hash_bits", "internal output size of hash algorithm"},
+ {"seed_bits", "seed size of hash algorithm"},
{NULL, NULL}
};
@@ -633,7 +637,7 @@
"sys.hash_info",
hash_info_doc,
hash_info_fields,
- 5,
+ 8,
};
static PyObject *
@@ -641,9 +645,11 @@
{
PyObject *hash_info;
int field = 0;
+ PyHash_FuncDef *hashfunc;
hash_info = PyStructSequence_New(&Hash_InfoType);
if (hash_info == NULL)
return NULL;
+ hashfunc = PyHash_GetFuncDef();
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(8*sizeof(Py_hash_t)));
PyStructSequence_SET_ITEM(hash_info, field++,
@@ -654,6 +660,12 @@
PyLong_FromLong(_PyHASH_NAN));
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_IMAG));
+ PyStructSequence_SET_ITEM(hash_info, field++,
+ PyUnicode_FromString(hashfunc->name));
+ PyStructSequence_SET_ITEM(hash_info, field++,
+ PyLong_FromLong(hashfunc->hash_bits));
+ PyStructSequence_SET_ITEM(hash_info, field++,
+ PyLong_FromLong(hashfunc->seed_bits));
if (PyErr_Occurred()) {
Py_CLEAR(hash_info);
return NULL;
@@ -1298,6 +1310,7 @@
executable -- absolute path of the executable binary of the Python interpreter\n\
float_info -- a struct sequence with information about the float implementation.\n\
float_repr_style -- string indicating the style of repr() output for floats\n\
+hash_info -- a struct sequenece with information about the hash algorithm.\n\
hexversion -- version information encoded as a single integer\n\
implementation -- Python implementation information.\n\
int_info -- a struct sequence with information about the int implementation.\n\
diff -r b9623fa5a0dd -r b8d39bf9ca4a configure
--- a/configure Thu Oct 24 09:47:10 2013 -0700
+++ b/configure Mon Oct 28 16:19:27 2013 +0100
@@ -792,6 +792,7 @@
enable_shared
enable_profiling
with_pydebug
+with_hash_algorithm
with_libs
with_system_expat
with_system_ffi
@@ -1465,6 +1466,8 @@
compiler
--with-suffix=.exe set executable suffix
--with-pydebug build with Py_DEBUG defined
+ --with-hash-algorithm=[fnv|siphash24]
+ select hash algorithm
--with-libs='lib1 ...' link against additional libs
--with-system-expat build pyexpat module using an installed expat
library
@@ -8987,6 +8990,79 @@
*) ;;
esac
+# check for systems that require aligned memory access
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking aligned memory access is required" >&5
+$as_echo_n "checking aligned memory access is required... " >&6; }
+if test "$cross_compiling" = yes; then :
+ aligned_required=yes
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int main()
+{
+ char s[16];
+ int i, *p1, *p2;
+ for (i=0; i < 16; i++)
+ s[i] = i;
+ p1 = (int*)(s+1);
+ p2 = (int*)(s+2);
+ if (*p1 == *p2)
+ return 1;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+ aligned_required=no
+else
+ aligned_required=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+
+if test "$aligned_required" = yes ; then
+
+$as_echo "#define HAVE_ALIGNED_REQUIRED 1" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $aligned_required" >&5
+$as_echo "$aligned_required" >&6; }
+
+
+# str, bytes and memoryview hash algorithm
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-hash-algorithm" >&5
+$as_echo_n "checking for --with-hash-algorithm... " >&6; }
+
+# Check whether --with-hash_algorithm was given.
+if test "${with_hash_algorithm+set}" = set; then :
+ withval=$with_hash_algorithm;
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
+$as_echo "$withval" >&6; }
+case "$withval" in
+ siphash24)
+ $as_echo "#define PY_HASH_ALGORITHM 1" >>confdefs.h
+
+ ;;
+ fnv)
+ $as_echo "#define PY_HASH_ALGORITHM 2" >>confdefs.h
+
+ ;;
+ *)
+ as_fn_error $? "unknown hash algorithm '$withval'" "$LINENO" 5
+ ;;
+esac
+
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: default" >&5
+$as_echo "default" >&6; }
+fi
+
+
# Most SVR4 platforms (e.g. Solaris) need -lsocket and -lnsl.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for t_open in -lnsl" >&5
$as_echo_n "checking for t_open in -lnsl... " >&6; }
diff -r b9623fa5a0dd -r b8d39bf9ca4a configure.ac
--- a/configure.ac Thu Oct 24 09:47:10 2013 -0700
+++ b/configure.ac Mon Oct 28 16:19:27 2013 +0100
@@ -2229,6 +2229,59 @@
*) ;;
esac
+# check for systems that require aligned memory access
+AC_MSG_CHECKING(aligned memory access is required)
+AC_TRY_RUN([
+int main()
+{
+ char s[16];
+ int i, *p1, *p2;
+ for (i=0; i < 16; i++)
+ s[i] = i;
+ p1 = (int*)(s+1);
+ p2 = (int*)(s+2);
+ if (*p1 == *p2)
+ return 1;
+ return 0;
+}
+ ],
+ [aligned_required=no],
+ [aligned_required=yes],
+ [aligned_required=yes])
+
+if test "$aligned_required" = yes ; then
+ AC_DEFINE([HAVE_ALIGNED_REQUIRED], [1],
+ [Define if aligned memory access is required])
+fi
+AC_MSG_RESULT($aligned_required)
+
+
+# str, bytes and memoryview hash algorithm
+AH_TEMPLATE(PY_HASH_ALGORITHM,
+ [Define hash algorithm for str, bytes and memoryview.
+ SipHash24: 1, FNV: 2, externally defined: 0])
+
+AC_MSG_CHECKING(for --with-hash-algorithm)
+dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output
+AC_ARG_WITH(hash_algorithm,
+ AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@],
+ [select hash algorithm]),
+[
+AC_MSG_RESULT($withval)
+case "$withval" in
+ siphash24)
+ AC_DEFINE(PY_HASH_ALGORITHM, 1)
+ ;;
+ fnv)
+ AC_DEFINE(PY_HASH_ALGORITHM, 2)
+ ;;
+ *)
+ AC_MSG_ERROR([unknown hash algorithm '$withval'])
+ ;;
+esac
+],
+[AC_MSG_RESULT(default)])
+
# Most SVR4 platforms (e.g. Solaris) need -lsocket and -lnsl.
AC_CHECK_LIB(nsl, t_open, [LIBS="-lnsl $LIBS"]) # SVR4
AC_CHECK_LIB(socket, socket, [LIBS="-lsocket $LIBS"], [], $LIBS) # SVR4 sockets
diff -r b9623fa5a0dd -r b8d39bf9ca4a pyconfig.h.in
--- a/pyconfig.h.in Thu Oct 24 09:47:10 2013 -0700
+++ b/pyconfig.h.in Mon Oct 28 16:19:27 2013 +0100
@@ -49,6 +49,9 @@
/* Define to 1 if you have the `alarm' function. */
#undef HAVE_ALARM
+/* Define if aligned memory access is required */
+#undef HAVE_ALIGNED_REQUIRED
+
/* Define to 1 if you have the header file. */
#undef HAVE_ALLOCA_H
@@ -1187,6 +1190,10 @@
/* Define to printf format modifier for Py_ssize_t */
#undef PY_FORMAT_SIZE_T
+/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2,
+ externally defined: 0 */
+#undef PY_HASH_ALGORITHM
+
/* Define if you want to build an interpreter with many run-time checks. */
#undef Py_DEBUG