#!/usr/bin/python # -*- coding: utf-8 -*- import os, random, collections, time, re # Here are a couple of lookup arrays that I've made (256 elements, based # on chars/word lengths occurrence frequencies), could be useful to # someone attempting to generate random 'words' really quickly: letters = [ 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 'e', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 't', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 'i', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'U', 'U', 'U', 'U', 'U', 'U', 'U', 'U', 'U', 'U', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'l', 'l', 'l', 'l', 'l', 'l', 'l', ']', ']', ']', ']', ']', ']', '[', '[', '[', '[', '[', 'u', 'u', 'u', 'u', 'u', 'd', 'd', 'd', 'd', 'd', 'm', 'm', 'm', 'm', 'm', 'g', 'g', 'g', 'g', 'g', 'p', 'p', 'p', 'p', 'p', 'f', 'f', 'f', 'f', 'y', 'y', 'y', 'y', 'k', 'k', 'k', 'w', 'w', 'w', '.', '.', '.', 'V', 'V', 'V', 'b', 'b', 'b', "'", "'", "'", 'v', 'v', '1', '1', '|', '|', '=', '=', 'A', 'A', '/', '/', '5', '5', '!', '!', '0', '0', 'S', 'S', '4'] lengths = [ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 1, 1, 1, 1, 1, 1, 1, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 0, 0, 0, 0, 0, 14, 14, 14, 15, 15, 15, 16, 16, 21, 22, 20, 17] def benchmark_dict(d, N): start = time.time() for i in xrange(N): length = lengths[random.randint(0, 255)] word = ''.join([ letters[random.randint(0, 255)] for i in xrange(length) ]) d[word] += 1 dt = time.time() - start vm = re.findall("(VmPeak.*|VmSize.*)", open('/proc/%d/status' % os.getpid()).read()) print "%d keys (%d unique), %s, %f seconds, %f keys per second" % (N, len(d), vm, dt, N / dt) d = collections.defaultdict(int) benchmark_dict(d, 1000000)