from random import randrange,shuffle from sets import Set import sys def testset(fixt, hsh=hash): results = {} for f in fixt: h = hsh(f) try: results[h].add(f) except KeyError: results[h] = Set([f]) return results def testint(fixt, hsh=hash): results = {} for f in fixt: h = hsh(f) try: results[h] += 1 except KeyError: results[h] = 1 return results def test(fixt, hsh=hash): results = testset(fixt, hsh) lo, hi = ecollisions(sum([len(v) for v in results.values()])) c = len([v for v in results.values() if len(v) > 1]) c2 = len([v for v in results.values() if len(v) > 2]) if c >= lo and c <= hi: ok = "PASS" else: ok = "FAIL" print "%s: %s collisions, %s multiple, expected between %s and %s." % \ (ok, c, c2, lo, hi) def testi(fixt, hsh=hash): results = testint(fixt, hsh) lo, hi = ecollisions(sum(results.values())) c = len([v for v in results.values() if v > 1]) c2 = len([v for v in results.values() if v > 2]) if c >= lo and c <= hi: ok = "PASS" else: ok = "FAIL" print "%s: %s collisions, %s multiple, expected between %s and %s." % \ (ok, c, c2, lo, hi) def pcollisions(c, s, n=(1L << 32)): '''The probability of getting c collisions when taking s random samples from n items. We only consider collisions involving exactly two samples.''' p = 1.0 for i in range(n - s + c + 1, n): p *= float(i) / n for i in range(0, 2 * c, 2): p *= float(s - i) / n / ((i >> 1) + 1) * (s - i - 1) p /= 2 ** c return p def ecollisions(s, tol=0.9, n=(1L << 32)): '''The minimum and maximum number of collisions expected when taking s random samples from n items, where the number of collisions is expected if it is within a range around the average that occurs with probability tol. We only consider collisions involving exactly two samples.''' p = 0.0 min = 0 while True: p += pcollisions(min, s, n) if p > (1 - tol) / 2: break min += 1 max = min while True: p += pcollisions(max + 1, s, n) if p > (1 + tol) / 2: break max += 1 return min, max def tim(base): for i in base: for j in base: yield i, j for i in base: for j in base: for k in base: yield i, (j, k) yield (i, j), k def longtuples(n, m=300): for i in range(n): yield tuple([randrange(-sys.maxint-1, sys.maxint) for j in range(m)]) def grow(n): for i in range(n): yield tuple(range(i)) def growc(n, c): for i in range(n): yield tuple([c] * i) def rotate(n): for i in range(n): yield tuple(range(i, n) + range(i)) def randstruct(n, maxdata, maxtuples, c): for i in range(n): d = randrange(maxdata) t = randrange(maxtuples) a = ["(", "),"] * t + [repr(c) + ","] * d shuffle(a) for j in range(t): try: yield eval("".join(a)) break except SyntaxError: pass a.insert(0, "(") a.append(")") def common(n, base): if n < 1: yield () else: for t in common(n - 1, base): for i in base: yield t + (i,) if __name__ == "__main__": testi(tim(range(50))) testi(longtuples(300)) testi(grow(10000)) testi(growc(10000, 1)) testi(growc(10000, "")) testi(rotate(5000)) test(randstruct(1000, 10, 10, 1)) for n in range(1, 10): print "%s:" % n, test(common(n, range(int(100000.0 ** (1.0 / n)))))