from __future__ import print_function import time import collections ## Example output: # a has 144900 words # j has 3035 words # m has 62626 words # amj takes 5.902/1000 (verify: 289) # ajm takes 0.292/1000 (verify: 289) # jma takes 0.132/1000 (verify: 289) # Get all of the words into a list of words. words = [line.strip().lower() for line in open("/usr/share/dict/words")] # Make an inverted index mapping character to a set of word indicies inverted_index = collections.defaultdict(set) for i, word in enumerate(words): for c in word: inverted_index[c].add(i) # Timing benchmark, which shows the order dependency def benchmark(letters): t1 = time.time() for i in range(1000): count = len(set.intersection(*(inverted_index[c] for c in letters))) return count, (time.time()-t1) for letter in "ajm": print(letter, "has", len(inverted_index[letter]), "words") for ordering in ("amj", "ajm", "jma"): count, dt = benchmark(ordering) print("{0} takes {1:.2f}2/1000 (verify: {2})".format(ordering, dt, count))