from timeit import Timer from random import randrange, seed, sample from collections import defaultdict from itertools import repeat class Counter(dict): def __missing__(self, key): return 0 def c1(data): c = Counter() for elem in data: c[elem] += 1 return c def c2(data): c = Counter() c_get = c.get for elem in data: c[elem] = c_get(elem, 0) + 1 return c def c2a(data): c = Counter() c_get = c.get zero = 0 for elem in data: c[elem] = c_get(elem, zero) + 1 return c def c3(data): c = Counter() for elem in data: if elem in c: c[elem] += 1 else: c[elem] = 1 return c def c4(data, KeyError=KeyError): c = Counter() for elem in data: try: c[elem] += 1 except KeyError: c[elem] = 1 return c def c5(data): c = defaultdict(repeat(0).__next__) for elem in data: c[elem] += 1 return c N = 10000 data_even = [randrange(N) for i in range(N)] data_set = sample(range(N*6), N) # no collisions data_sparse = [randrange(N*6) for i in range(N)] # rare collisions data_dense = [randrange(N//6) for i in range(N)] # many duplicates (collisions) setup = 'from __main__ import c1, c2, c2a, c3, c4, c5, ' \ 'data_set, data_dense, data_sparse, data_even' for func in 'dict.fromkeys c1 c2 c2a c3 c4 c5'.split(): for data in 'data_set data_sparse data_even data_dense'.split(): stmt = '%s(%s)' % (func, data) time = min(Timer(stmt, setup).repeat(7, 30)) print('%-25s\t%6.3f' % (stmt, time)) print()