from __future__ import print_function, division from collections import defaultdict, Counter import sys, timeit def test(data): d = {} for item in data: if item in d: d[item] += 1 else: d[item] = 1 return d def exception(data): d = {} for item in data: try: d[item] += 1 except KeyError: d[item] = 1 return d def get(data): d = {} for item in data: d[item] = d.get(item, 0) + 1 return d def default(data): d = defaultdict(int) for item in data: d[item] += 1 return d def count(data): d = Counter(data) return d def bench(func, data): best = min(timeit.repeat(lambda: func(data), number=1)) print(func.__name__, '%.1f' % (best * 1000)) def benchs(data): print('uniqueness = %.1f%%' % (100 * len(Counter(data)) / len(data))) bench(test, data) bench(exception, data) bench(get, data) bench(default, data) bench(count, data) print() if __name__ == '__main__': data = ['Line #%d' % i for i in range(100000)] benchs(data) benchs(data[::10] * 10) benchs(data[::100] * 100) benchs(data[::1000] * 1000)