diff -r 5d45e27bcecf Lib/decimal.py --- a/Lib/decimal.py Sat Mar 20 19:09:14 2010 +0100 +++ b/Lib/decimal.py Sat Mar 20 20:32:10 2010 +0000 @@ -928,33 +928,27 @@ def __hash__(self): """x.__hash__() <==> hash(x)""" - # Decimal integers must hash the same as the ints - # - # The hash of a nonspecial noninteger Decimal must depend only - # on the value of that Decimal, and not on its representation. - # For example: hash(Decimal('100E-1')) == hash(Decimal('10')). + if self._is_special: - if self._isnan(): - raise TypeError('Cannot hash a NaN value.') - return hash(str(self)) + if self.is_infinite(): + return _PyHASH_INF if self > 0 else _PyHASH_NINF + else: + return _PyHASH_NAN if not self: return 0 - if self._isinteger(): - op = _WorkRep(self.to_integral_value()) - # to make computation feasible for Decimals with large - # exponent, we use the fact that hash(n) == hash(m) for - # any two nonzero integers n and m such that (i) n and m - # have the same sign, and (ii) n is congruent to m modulo - # 2**64-1. So we can replace hash((-1)**s*c*10**e) with - # hash((-1)**s*c*pow(10, e, 2**64-1). - return hash((-1)**op.sign*op.int*pow(10, op.exp, 2**64-1)) - # The value of a nonzero nonspecial Decimal instance is - # faithfully represented by the triple consisting of its sign, - # its adjusted exponent, and its coefficient with trailing - # zeros removed. - return hash((self._sign, - self._exp+len(self._int), - self._int.rstrip('0'))) + + if self._exp >= 0: + exp_hash = pow(10, self._exp, _PyHASH_MASK) + else: + exp_hash = pow(_PyHASH_10INV, -self._exp, _PyHASH_MASK) + + hash_ = 1 + (int(self._int) * exp_hash - 1) % _PyHASH_MASK + + if self < 0: + hash_ = -hash_ + if hash_ == -1: + hash_ = -2 + return hash_ def as_tuple(self): """Represents the number as a triple tuple. @@ -6119,6 +6113,15 @@ # _SignedInfinity[sign] is infinity w/ that sign _SignedInfinity = (_Infinity, _NegativeInfinity) +# Constants related to the hash implementation; hash(x) is based +# on the reduction of x modulo 2**_PyHASH_BITS - 1. +_PyHASH_BITS = 31 +_PyHASH_MASK = (1 << _PyHASH_BITS) - 1 +_PyHASH_INF = 314159 +_PyHASH_NINF = -271828 +_PyHASH_NAN = 15858 +_PyHASH_10INV = 1503238553 # inverse of 10 modulo _PyHASH_MASK + if __name__ == '__main__': diff -r 5d45e27bcecf Lib/fractions.py --- a/Lib/fractions.py Sat Mar 20 19:09:14 2010 +0100 +++ b/Lib/fractions.py Sat Mar 20 20:32:10 2010 +0000 @@ -10,8 +10,6 @@ __all__ = ['Fraction', 'gcd'] - - def gcd(a, b): """Calculate the Greatest Common Divisor of a and b. @@ -22,6 +20,28 @@ a, b = b, a%b return a +def _invmod(a, n): + """Compute the inverse of the integer a modulo the integer n. + + Raises ZeroDivisionError if no such inverse exists. + + """ + g0, g1 = a, n + x0, x1 = 1, 0 + while g1: + # invariants: x0*a = g0 modulo n; x1*a = g1 modulo n. + q = g0 // g1 + g0, g1 = g1, g0 - q*g1 + x0, x1 = x1, x0 - q*x1 + if abs(g0) != 1: + raise ZeroDivisionError("not invertible") + return (x0 // g0) % n + +# Constants related to the hash implementation; hash(x) is based +# on the reduction of x modulo 2**_PyHASH_BITS - 1. +_PyHASH_BITS = 31 +_PyHASH_MASK = (1 << _PyHASH_BITS) - 1 +_PyHASH_INF = 314159 _RATIONAL_FORMAT = re.compile(r""" \A\s* # optional whitespace at the start, then @@ -482,16 +502,22 @@ """ # XXX since this method is expensive, consider caching the result - if self._denominator == 1: - # Get integers right. - return hash(self._numerator) - # Expensive check, but definitely correct. - if self == float(self): - return hash(float(self)) - else: - # Use tuple's hash to avoid a high collision rate on - # simple fractions. - return hash((self._numerator, self._denominator)) + + if not self: + return 0 + + n = abs(self._numerator) % _PyHASH_MASK + d = self._denominator % _PyHASH_MASK + try: + hash_ = 1 + (n * _invmod(d, _PyHASH_MASK) - 1) % _PyHASH_MASK + except ZeroDivisionError: + return _PyHASH_INF + + if self < 0: + hash_ = -hash_ + if hash_ == -1: + hash_ = -2 + return hash_ def __eq__(a, b): """a == b""" diff -r 5d45e27bcecf Lib/test/test_decimal.py --- a/Lib/test/test_decimal.py Sat Mar 20 19:09:14 2010 +0100 +++ b/Lib/test/test_decimal.py Sat Mar 20 20:32:10 2010 +0000 @@ -1225,7 +1225,7 @@ #the same hash that to an int self.assertEqual(hash(Decimal(23)), hash(23)) - self.assertRaises(TypeError, hash, Decimal('NaN')) + #self.assertRaises(TypeError, hash, Decimal('NaN')) self.assertTrue(hash(Decimal('Inf'))) self.assertTrue(hash(Decimal('-Inf'))) diff -r 5d45e27bcecf Lib/test/test_numeric_tower.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_numeric_tower.py Sat Mar 20 20:32:10 2010 +0000 @@ -0,0 +1,115 @@ +# test interactions betwen int, float, Decimal and Fraction + +import unittest +import random +import math +from test.support import run_unittest + +from decimal import Decimal as D +from fractions import Fraction as F + +# Constants related to the hash implementation; hash(x) is based +# on the reduction of x modulo 2**_PyHASH_BITS - 1. +_PyHASH_BITS = 31 +_PyHASH_MASK = (1 << _PyHASH_BITS) - 1 + +class HashTest(unittest.TestCase): + def check_equal_hash(self, x, y): + self.assertEqual(hash(x), hash(y), + "got different hashes for {!r} and {!r}".format(x, y)) + + def test_bools(self): + self.check_equal_hash(False, 0) + self.check_equal_hash(True, 1) + + def test_integers(self): + # check that equal values hash equal + + # exact integers + for i in range(-1000, 1000): + self.check_equal_hash(i, float(i)) + self.check_equal_hash(i, D(i)) + self.check_equal_hash(i, F(i)) + + # the current hash is based on reduction modulo 2**n-1 for some + # n, so pay special attention to numbers of the form 2**n and 2**n-1. + for i in range(100): + n = 2**i - 1 + if n == int(float(n)): + self.check_equal_hash(n, float(n)) + self.check_equal_hash(-n, -float(n)) + self.check_equal_hash(n, D(n)) + self.check_equal_hash(n, F(n)) + self.check_equal_hash(-n, D(-n)) + self.check_equal_hash(-n, F(-n)) + + n = 2**i + self.check_equal_hash(n, float(n)) + self.check_equal_hash(-n, -float(n)) + self.check_equal_hash(n, D(n)) + self.check_equal_hash(n, F(n)) + self.check_equal_hash(-n, D(-n)) + self.check_equal_hash(-n, F(-n)) + + # random values of various sizes + for _ in range(1000): + e = random.randrange(300) + n = random.randrange(-10**e, 10**e) + self.check_equal_hash(n, D(n)) + self.check_equal_hash(n, F(n)) + if n == int(float(n)): + self.check_equal_hash(n, float(n)) + + def test_binary_floats(self): + # check that floats hash equal to corresponding Fractions and Decimals + + # zeros + self.check_equal_hash(0.0, D(0)) + self.check_equal_hash(-0.0, D(0)) + self.check_equal_hash(-0.0, D('-0.0')) + self.check_equal_hash(0.0, F(0)) + + # infinities and nans + self.check_equal_hash(float('inf'), D('inf')) + self.check_equal_hash(float('-inf'), D('-inf')) + + for _ in range(1000): + x = random.random() * math.exp(random.random()*200.0 - 100.0) + self.check_equal_hash(x, D.from_float(x)) + self.check_equal_hash(x, F.from_float(x)) + + def test_fractions(self): + # hashes are based on reduction modulo _PyHASH_MASK, so just check + # that we can still compute a hash without error for Fractions + # whose reduction modulo _PyHASH_MASK is infinite. + hash(F(1, _PyHASH_MASK)) + hash(F(-12345, _PyHASH_MASK)) + hash(F(_PyHASH_MASK, 5*_PyHASH_MASK)) + + def test_hash_normalization(self): + # Test for a bug encountered while changing long_hash. + # + # Given objects x and y, it should be possible for y's + # __hash__ method to return hash(x) in order to ensure that + # hash(x) == hash(y). But hash(x) is not exactly equal to the + # result of x.__hash__(): there's some internal normalization + # to make sure that the result fits in a C long, and is not + # equal to the invalid hash value -1. This internal + # normalization must therefore not change the result of + # hash(x) for any x. + + class HalibutProxy: + def __hash__(self): + return hash('halibut') + def __eq__(self, other): + return other == 'halibut' + + x = {'halibut', HalibutProxy()} + self.assertEqual(len(x), 1) + + +def test_main(): + run_unittest(HashTest) + +if __name__ == '__main__': + test_main() diff -r 5d45e27bcecf Objects/longobject.c --- a/Objects/longobject.c Sat Mar 20 19:09:14 2010 +0100 +++ b/Objects/longobject.c Sat Mar 20 20:32:10 2010 +0000 @@ -2572,18 +2572,21 @@ sign = -1; i = -(i); } - /* The following loop produces a C unsigned long x such that x is - congruent to the absolute value of v modulo ULONG_MAX. The - resulting x is nonzero if and only if v is. */ + + +#define PyHASH_BITS 31 +#define PyHASH_MASK ((1UL << PyHASH_BITS) - 1) + while (--i >= 0) { - /* Force a native long #-bits (32 or 64) circular shift */ - x = (x >> (8*SIZEOF_LONG-PyLong_SHIFT)) | (x << PyLong_SHIFT); + /* Rotate bottom 31 bits left by PyLong_SHIFT bits; in effect, + this multiplies by 2**PyLong_SHIFT modulo 2**31 - 1. */ + x = ((x << PyLong_SHIFT) & PyHASH_MASK) | x >> (PyHASH_BITS - PyLong_SHIFT); x += v->ob_digit[i]; /* If the addition above overflowed we compensate by incrementing. This preserves the value modulo ULONG_MAX. */ - if (x < v->ob_digit[i]) - x++; + if (x > PyHASH_MASK) + x -= PyHASH_MASK; } x = x * sign; if (x == (unsigned long)-1) diff -r 5d45e27bcecf Objects/object.c --- a/Objects/object.c Sat Mar 20 19:09:14 2010 +0100 +++ b/Objects/object.c Sat Mar 20 20:32:10 2010 +0000 @@ -644,60 +644,56 @@ All the utility functions (_Py_Hash*()) return "-1" to signify an error. */ +#define PyHASH_BITS 31 /* hash is based on reduction modulo 2**PyHASH_BITS - 1 */ +#define PyHASH_MASK ((1UL << PyHASH_BITS) - 1) +#define PyHASH_INF 314159 +#define PyHASH_NINF -271828 +#define PyHASH_NAN 15858 + long _Py_HashDouble(double v) { - double intpart, fractpart; - int expo; - long hipart; - long x; /* the final hash value */ - /* This is designed so that Python numbers of different types - * that compare equal hash to the same value; otherwise comparisons - * of mapping keys will turn out weird. - */ + int e, sign; + double m; + unsigned long x, y; - fractpart = modf(v, &intpart); - if (fractpart == 0.0) { - /* This must return the same hash as an equal int or long. */ - if (intpart > LONG_MAX/2 || -intpart > LONG_MAX/2) { - /* Convert to long and use its hash. */ - PyObject *plong; /* converted to Python long */ - if (Py_IS_INFINITY(intpart)) - /* can't convert to long int -- arbitrary */ - v = v < 0 ? -271828.0 : 314159.0; - plong = PyLong_FromDouble(v); - if (plong == NULL) - return -1; - x = PyObject_Hash(plong); - Py_DECREF(plong); - return x; - } - /* Fits in a C long == a Python int, so is its own hash. */ - x = (long)intpart; - if (x == -1) - x = -2; - return x; + if (!Py_IS_FINITE(v)) { + if (Py_IS_INFINITY(v)) + return v > 0 ? PyHASH_INF : PyHASH_NINF; + else + return PyHASH_NAN; } - /* The fractional part is non-zero, so we don't have to worry about - * making this match the hash of some other type. - * Use frexp to get at the bits in the double. - * Since the VAX D double format has 56 mantissa bits, which is the - * most of any double format in use, each of these parts may have as - * many as (but no more than) 56 significant bits. - * So, assuming sizeof(long) >= 4, each part can be broken into two - * longs; frexp and multiplication are used to do that. - * Also, since the Cray double format has 15 exponent bits, which is - * the most of any double format in use, shifting the exponent field - * left by 15 won't overflow a long (again assuming sizeof(long) >= 4). - */ - v = frexp(v, &expo); - v *= 2147483648.0; /* 2**31 */ - hipart = (long)v; /* take the top 32 bits */ - v = (v - (double)hipart) * 2147483648.0; /* get the next 32 bits */ - x = hipart + (long)v + (expo << 15); - if (x == -1) - x = -2; - return x; + + m = frexp(v, &e); + + sign = 1; + if (m < 0) { + sign = -1; + m = -m; + } + + /* process 28 bits at a time; this should work well both for binary + and hexadecimal floating point. */ + x = 0; + while (m) { + x = ((x << 28) & PyHASH_MASK) | x >> (PyHASH_BITS - 28); + m *= 268435456.0; /* 2**28 */ + e -= 28; + y = (unsigned long)m; /* pull out integer part */ + m -= y; + x += y; + if (x > PyHASH_MASK) + x -= PyHASH_MASK; + } + + /* adjust for the exponent; first reduce it modulo PyHASH_BITS */ + e = e >= 0 ? e % PyHASH_BITS : PyHASH_BITS-1-((-1-e) % PyHASH_BITS); + x = ((x << e) & PyHASH_MASK) | x >> (PyHASH_BITS - e); + + x = x * sign; + if (x == (unsigned long)-1) + x = (unsigned long)-2; + return (long)x; } long diff -r 5d45e27bcecf Objects/typeobject.c --- a/Objects/typeobject.c Sat Mar 20 19:09:14 2010 +0100 +++ b/Objects/typeobject.c Sat Mar 20 20:32:10 2010 +0000 @@ -4911,30 +4911,39 @@ PyObject *func, *res; static PyObject *hash_str; long h; + int overflow; func = lookup_method(self, "__hash__", &hash_str); - if (func == Py_None) { + if (func == Py_None) { Py_DECREF(func); func = NULL; } if (func == NULL) { return PyObject_HashNotImplemented(self); - } + } res = PyEval_CallObject(func, NULL); Py_DECREF(func); if (res == NULL) return -1; - if (PyLong_Check(res)) - h = PyLong_Type.tp_hash(res); - else - h = PyLong_AsLong(res); + /* It's important that any value that can come out of hash(x) + for a Python object x is left unchanged by this function, + so that an object y can ensure hash(x) == hash(y) by having + its __hash__ method return hash(x). hash(x) can be any + value that fits in a C long, with the exception of -1. */ + h = PyLong_AsLongAndOverflow(res, &overflow); + if (overflow) { + if (PyLong_Check(res)) + h = PyLong_Type.tp_hash(res); + else + h = PyLong_AsLong(res); + } Py_DECREF(res); - if (h == -1 && !PyErr_Occurred()) - h = -2; - return h; + if (h == -1 && !PyErr_Occurred()) + h = -2; + return h; } static PyObject *