from random import randrange, shuffle, seed from timeit import Timer seed = 8675309 print(''' ----------------------------------------------------------------------- Test for a set that too large to fit in a 1MB L2 cache. >>> num_resizes = 7 >>> tablerows = 8 * 4 ** num_resizes >>> tablerows * 2 * 8 // 1024 # size of the table in kilobytes 2048 >>> int(tablerows * .64) # entries to make the table nearly full 83886 >>> collisions = 83900 - tablerows * (1 - ((tablerows-1)/tablerows) ** 83900) >>> collisions 21933.764339061774 >>> _ / 83900 0.26142746530466954 ''') # population overlap is compared using identity n = 83900 population = [randrange(10000000) for i in range(3*n//2)] shuffle(population) s = set(population[:n]) t = set(population[n//2:]) print(min(Timer('s&t; s|t; s-t; t-s; set(list(s))', 'from __main__ import s, t').repeat(7, 400))) print(''' ----------------------------------------------------------------------- Test for a set that too large to fit in a 8Mb L3 cache. >>> num_resizes = 9 >>> tablerows = 8 * 4 ** num_resizes >>> tablerows * 2 * 8 // 1024 # size of the table in kilobytes 32768 >>> int(tablerows * .64) # entries to make the table nearly full 1342177 >>> collisions = 1342000 - tablerows * (1 - (float(tablerows-1)/tablerows) ** 1342000) >>> collisions 350753.67529321834 >>> _ / 1342000 0.26136637503220445 ''') # population overlap is be compared using identity n = 1342000 population = [randrange(10000000) for i in range(3*n//2)] shuffle(population) u = set(population[:n]) v = set(population[n//2:]) print(min(Timer('s&t; s|t; s-t; t-s; set(list(s))', 'from __main__ import u as s, v as t').repeat(7, 25)))