''' Time to build, resize, and deallocate sets. * turn-off hash randomization for better comparability PYTHONHASHSEED=3385876387 * need case that has many redundant, non-identical strings (because specialization only applies to unicode, inexact matches) and vary the set sizes to explore cache effects ''' from string import ascii_letters as letters from random import choice, seed, shuffle, randrange from pprint import pprint from timeit import Timer import sys seed('the tale of two cities it was the best of times') def makeuniq(words): words = '~'.join(words).split('~') assert len(set(map(id, words))) == len(words) return words def makeuniqints(ints): ints = [i * 1 for i in ints] assert len(set(map(id, ints))) == len(ints) return ints def makewordlist(size, redundancy=5, wsize=5, identical=True): npool = size // redundancy pool = [''.join(choice(letters) for i in range(wsize)) for j in range(npool)] words = pool * redundancy if not identical: words = makeuniq(words) shuffle(words) return words def makeintlist(size, redundancy=5, wsize=9, identical=True): irange = 10 ** wsize npool = size // redundancy pool = [randrange(1000, irange) for j in range(npool)] ints = pool * redundancy if not identical: ints = makeuniqints(ints) shuffle(ints) return ints trials = 7 n = 10000000 print(' size dup words dup ints uniq words uniq ints') print('---------- ----------- ----------- ----------- -----------') for size in [100, 1000, 10000, 100000, 1000000]: setup = 'from __main__ import makewordlist, makeintlist\n' setup += 's = makewordlist(%d, identical=True)\n' % size setup += 't = makeintlist(%d, identical=True)\n' % size setup += 'u = makewordlist(%d, identical=False)\n' % size setup += 'v = makeintlist(%d, identical=False)\n' % size setup += '_set = set\n' reps = n // size t1 = min(Timer('_set(s)', setup).repeat(trials, reps)) t2 = min(Timer('_set(t)', setup).repeat(trials, reps)) t3 = min(Timer('_set(u)', setup).repeat(trials, reps)) t4 = min(Timer('_set(v)', setup).repeat(trials, reps)) print('%-10d %7.05f %7.05f %7.05f %7.05f' % (size, t1, t2, t3, t4))