from random import randrange, shuffle, seed from timeit import Timer seed = 8675309 print(''' ----------------------------------------------------------------------- Test for a set that too large to fit in a 256kb L2 cache. >>> num_resizes = 6 >>> tablerows = 8 * 4 ** num_resizes >>> tablerows * 2 * 8 // 1024 # size of the table in kilobytes 512 >>> int(tablerows * .64) # entries to make the table nearly full 20971 >>> collisions = 20900 - tablerows * (1 - ((tablerows-1)/tablerows) ** 20900) >>> collisions 5447.902762377089 >>> _ / 20900 0.2606652039414875 ''') # population overlap is compared using identity n = 20900 population = [randrange(10000000) for i in range(3*n//2)] shuffle(population) s = set(population[:n]) t = set(population[n//2:]) print(min(Timer('s&t; s|t; s-t; t-s; set(list(s))', 'from __main__ import s, t').repeat(7, 400))) print(''' ----------------------------------------------------------------------- Test for a set that too large to fit in a 2Mb L3 cache. >>> num_resizes = 8 >>> tablerows = 8 * 4 ** num_resizes >>> tablerows * 2 * 8 // 1024 # size of the table in kilobytes 8192 >>> int(tablerows * .64) # entries to make the table nearly full 335544 >>> collisions = 335000 - tablerows * (1 - ((tablerows-1)/tablerows) ** 335000) >>> collisions 87452.0866365626 >>> _ / 335000 0.2610510048852615 ''') # population overlap is be compared using identity n = 335000 population = [randrange(10000000) for i in range(3*n//2)] shuffle(population) u = set(population[:n]) v = set(population[n//2:]) print(min(Timer('s&t; s|t; s-t; t-s; set(list(s))', 'from __main__ import u as s, v as t').repeat(7, 25)))