# -*- coding: utf-8 -*- """ Created on Tue Feb 12 16:03:43 2019 @author: Shawn """ # Improved_random.choices # I have improved Python 3.x's built-in function random.choices() from O(N**2) to O(N) # Compare Shawn's code without random.choices() that is O(N) to the code with Py3 BIF random.choices() that is O(N**2). from datetime import datetime import random for k in range(3, 5): time0 = datetime.today() N = 10 ** k dictTimestamp = dict(); events = ['A', 'B', 'C', 'D', 'E',]; weights = [2, 2, 1, 1, 1]; total_weight = sum(weights) for i in range(1, N+1): r = random.random() for j in range(len(events)): if sum(weights[:j])/total_weight <= r < sum(weights[:j+1])/total_weight: dictTimestamp[i] = events[j] countA, countB, countC, countD, countE = 0, 0, 0, 0, 0 for v in dictTimestamp.values(): if v == 'A': countA += 1 elif v == 'B': countB += 1 elif v == 'C': countC += 1 elif v == 'D': countD += 1 elif v == 'E': countE += 1 print('* Generate dataset where events A and B each occur twice as frequently as events C, D, or E.') print('Distribution where A, B each occur at 2/7 frequency (' + str(round(2*N/7)), 'times) and C, D, E each occur at 1/7 frequency (' + str(round(N/7)), 'times).\nA occured:', countA, '\nB occured:', countB, '\nC occurred:', countC, '\nD occurred:', countD, '\nE occurred:', countE) time1 = datetime.today() print('The elapsed time for this program with Shawn\'s code and N =', N, 'was', time1 - time0) time2 = datetime.today() print() dictTimestamp = dict() for i in range(1,N+1): dictTimestamp[i] = random.choices(['A', 'B', 'C', 'D', 'E',], [2/7, 2/7, 1/7, 1/7, 1/7,], k=N+1)[i] countA, countB, countC, countD, countE = 0, 0, 0, 0, 0 for v in dictTimestamp.values(): if v == 'A': countA += 1 elif v == 'B': countB += 1 elif v == 'C': countC += 1 elif v == 'D': countD += 1 elif v == 'E': countE += 1 print('* Generate dataset where events A and B each occur twice as frequently as events C, D, or E.') print('Distribution where A, B each occur at 2/7 frequency (' + str(round(2*N/7)), 'times) and C, D, E each occur at 1/7 frequency (' + str(round(N/7)), 'times).\nA occured:', countA, '\nB occured:', countB, '\nC occurred:', countC, '\nD occurred:', countD, '\nE occurred:', countE) time3 = datetime.today() print('The elapsed time for this program with random.choices() and N =', N, 'was', time3 - time2, 'which is', round((time3 - time2)/(time1 - time0), 2), 'times longer than Shawn\'s method') if k == 3: print('\n-------------------------------------------------------------------\n')