from itertools import product
def tryone(tag, xs):
import sys
hashbits = sys.maxsize.bit_length() + 1
assert hashbits in (32, 64)
nballs = len(xs)
# When nballs is much less than nbins, the mean expected number
# of collisions is closely approximated by
# combinations(nballs, 2) / nbins.
cn2 = nballs * (nballs - 1) / 2.0
hashes = list(map(hash, xs))
nbins = 1 << hashbits
mean = cn2 / nbins
collisions = nballs - len(set(hashes))
msg = f"{tag}; {hashbits}-bit hash codes; mean {mean:.2f} got {collisions:,}"
print(msg)
if hashbits > 32:
mean = cn2 / 2**32
mask = (1 << 32) - 1
collisions = nballs - len(set(h & mask for h in hashes))
msg = f"{tag}; 32-bit lower hash codes; mean {mean:.2f} got {collisions:,}"
print(msg)
shift = hashbits - 32
collisions = nballs - len(set(h >> shift for h in hashes))
msg = f"{tag}; 32-bit upper hash codes; mean {mean:.2f} got {collisions:,}"
print(msg)
tryone("range(100) by 3", list(product(range(100), repeat=3)))
cands = list(range(-10, -1)) + list(range(9))
tryone("-10 .. 8 by 4", list(product(cands, repeat=4)))
del cands
cands = list(range(-50, 51))
cands.remove(-1) # hashes to -2
tryone("-50 .. 50 less -1 by 3", list(product(cands, repeat=3)))
del cands
L = [n << 60 for n in range(100)]
tryone("0..99 << 60 by 3", list(product(L, repeat=3)))
del L
tryone("[-3, 3] by 20", list(product([-3, 3], repeat=20)))
tryone("[0.5, 0.25] by 20", list(product([0.5, 0.25], repeat=20)))
if 1:
def test_hash():
# See SF bug 942952: Weakness in tuple hash
# The hash should:
# be non-commutative
# should spread-out closely spaced values
# should not exhibit cancellation in tuples like (x,(x,y))
# should be distinct from element hashes: hash(x)!=hash((x,))
# This test exercises those cases.
# For a pure random hash and N=50, the expected number of occupied
# buckets when tossing 252,600 balls into 2**32 buckets
# is 252,592.6, or about 7.4 expected collisions. The
# standard deviation is 2.73. On a box with 64-bit hash
# codes, no collisions are expected. Here we accept no
# more than 15 collisions. Any worse and the hash function
# is sorely suspect.
N=50
base = list(range(N))
xp = [(i, j) for i in base for j in base]
inps = base + [(i, j) for i in base for j in xp] + \
[(i, j) for i in xp for j in base] + xp + list(zip(base))
tryone("old tuple test", inps)
test_hash()
if 1:
# https://github.com/python/cpython/pull/9534/files
n = 5
A = [x for x in range(-n, n+1) if x != -1]
B = A + [(a,) for a in A]
L2 = [(a,b) for a in A for b in A]
L3 = L2 + [(a,b,c) for a in A for b in A for c in A]
L4 = L3 + [(a,b,c,d) for a in A for b in A for c in A for d in A]
# T = list of testcases. These consist of all (possibly nested
# at most 2 levels deep) tuples containing at most 4 items from
# the set A.
T = A
T += [(a,) for a in B + L4]
T += [(a,b) for a in L3 for b in B]
T += [(a,b) for a in L2 for b in L2]
T += [(a,b) for a in B for b in L3]
T += [(a,b,c) for a in B for b in B for c in L2]
T += [(a,b,c) for a in B for b in L2 for c in B]
T += [(a,b,c) for a in L2 for b in B for c in B]
T += [(a,b,c,d) for a in B for b in B for c in B for d in B]
assert len(T) == 345130
tryone("new tuple test", T)
## self.assertLessEqual(collisions, 20)