#!/usr/bin/python
# -*- coding: utf-8  -*-
import os, random, collections, time, re

# Here are a couple of lookup arrays that I've made (256 elements, based
# on chars/word lengths occurrence frequencies), could be useful to
# someone attempting to generate random 'words' really quickly: 

letters = [
'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',
'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',  'e',
't',  't',  't',  't',  't',  't',  't',  't',  't',  't',  't',
't',  't',  't',
'o',  'o',  'o',  'o',  'o',  'o',  'o',  'o',  'o',  'o',  'o',
'o',  'o',
'a',  'a',  'a',  'a',  'a',  'a',  'a',  'a',  'a',  'a',  'a',
'a',  'a',
'r',  'r',  'r',  'r',  'r',  'r',  'r',  'r',  'r',  'r',  'r',  'r',
'i',  'i',  'i',  'i',  'i',  'i',  'i',  'i',  'i',  'i',  'i',  'i',
's',  's',  's',  's',  's',  's',  's',  's',  's',  's',  's',
'C',  'C',  'C',  'C',  'C',  'C',  'C',  'C',  'C',  'C',  'C',
'F',  'F',  'F',  'F',  'F',  'F',  'F',  'F',  'F',  'F',  'F',
'n',  'n',  'n',  'n',  'n',  'n',  'n',  'n',  'n',  'n',  'n',
'K',  'K',  'K',  'K',  'K',  'K',  'K',  'K',  'K',  'K',  'K',
'U',  'U',  'U',  'U',  'U',  'U',  'U',  'U',  'U',  'U',
'c',  'c',  'c',  'c',  'c',  'c',  'c',  'c',
'h',  'h',  'h',  'h',  'h',  'h',  'h',  'h',
'l',  'l',  'l',  'l',  'l',  'l',  'l',
']',  ']',  ']',  ']',  ']',  ']',
'[',  '[',  '[',  '[',  '[',
'u',  'u',  'u',  'u',  'u',
'd',  'd',  'd',  'd',  'd',
'm',  'm',  'm',  'm',  'm',
'g',  'g',  'g',  'g',  'g',
'p',  'p',  'p',  'p',  'p',
'f',  'f',  'f',  'f',
'y',  'y',  'y',  'y',
'k',  'k',  'k',
'w',  'w',  'w',
'.',  '.',  '.',
'V',  'V',  'V',
'b',  'b',  'b',
"'",  "'",  "'",
'v',  'v',
'1',  '1',
'|',  '|',
'=',  '=',
'A',  'A',
'/',  '/',
'5',  '5',
'!',  '!',
'0',  '0',
'S',  'S',
'4']

lengths = [
4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
7,  7,  7,  7,  7,  7,  7,  7,  7,
5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
5,  5,  5,  5,  5,  5,  5,  5,
6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
6,  6,  6,  6,  6,  6,  6,
8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
8,  8,  8,  8,
9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
10,  10,  10,  10,  10,  10,  10,  10,  10,  10,  10,  10,
1,  1,  1,  1,  1,  1,  1,  1,
11,  11,  11,  11,  11,  11,  11,  11,
12,  12,  12,  12,  12,  12,  12,
13,  13,  13,  13,  13,
0,  0,  0,  0,  0,
14,  14,  14,
15,  15,  15,
16,  16,
21,
22,
20,
17] 


def benchmark_dict(d, N):
    start = time.time()

    for i in xrange(N):
        length = lengths[random.randint(0, 255)]
        word = ''.join([ letters[random.randint(0, 255)] for i in xrange(length) ])
        d[word] += 1

    dt = time.time() - start
    vm = re.findall("(VmPeak.*|VmSize.*)", open('/proc/%d/status' % os.getpid()).read())
    print "%d keys (%d unique), %s, %f seconds, %f keys per second" % (N, len(d), vm, dt, N / dt)


d = collections.defaultdict(int)
benchmark_dict(d, 1000000)