import random import timeit from collections.abc import Mapping as _Mapping from itertools import accumulate as _accumulate from bisect import bisect as _bisect from math import floor as _floor def weighted_choice_generator_rw(self, data): if isinstance(data, _Mapping): indices = list(data.keys()) weights = data.values() else: indices = None weights = data cumulative_dist = list(_accumulate(weights)) total_sum = cumulative_dist[-1] if any(w < 0 for w in weights): raise ValueError("All weights must be non-negative") if not total_sum: raise ValueError("At least one weight must be greater than zero") del weights if indices is None: for u in iter(self.random, None): yield _bisect(cumulative_dist, u * total_sum) else: for u in iter(self.random, None): yield indices[_bisect(cumulative_dist, u * total_sum)] def weighted_choice_generator_rw2(self, data): if isinstance(data, _Mapping): indices = list(data.keys()) weights = data.values() else: indices = None weights = data cumulative_dist = list(_accumulate(weights)) total_sum = cumulative_dist[-1] if any(w < 0 for w in weights): raise ValueError("All weights must be non-negative") if not total_sum: raise ValueError("At least one weight must be greater than zero") k = 1.0 / total_sum cumulative_dist = [k * s for s in cumulative_dist] del weights if indices is None: for u in iter(self.random, None): yield _bisect(cumulative_dist, u) else: for u in iter(self.random, None): yield indices[_bisect(cumulative_dist, u)] def weighted_choice_generator_va(self, data): if isinstance(data, _Mapping): indices = list(data.keys()) weights = data.values() else: indices = None weights = data total_sum = sum(weights) if any(w < 0 for w in weights): raise ValueError("All weights must be non-negative") if not total_sum: raise ValueError("At least one weight must be greater than zero") probabilities = [w / total_sum for w in weights] size = len(weights) probability = [0.0] * size alias = [None] * size average = 1.0 / size small = [] large = [] for i, p in enumerate(probabilities): if p >= average: large.append(i) else: small.append(i) while small and large: less = small.pop() more = large.pop() probability[less] = probabilities[less] * size alias[less] = more probabilities[more] += probabilities[less] - average if probabilities[more] >= average: large.append(more) else: small.append(more) while small: probability[small.pop()] = 1.0 while large: probability[large.pop()] = 1.0 if indices is None: while True: column = self.randrange(size) coinToss = self.random() < probability[column] yield column if coinToss else alias[column] else: while True: column = self.randrange(size) coinToss = self.random() < probability[column] yield indices[column if coinToss else alias[column]] def weighted_choice_generator_va2(self, data): if isinstance(data, _Mapping): indices = list(data.keys()) weights = data.values() else: indices = None weights = data total_sum = sum(weights) if any(w < 0 for w in weights): raise ValueError("All weights must be non-negative") if not total_sum: raise ValueError("At least one weight must be greater than zero") size = len(weights) k = size / total_sum probabilities = [k * w for w in weights] aliases = list(range(size)) small = [] large = [] large_append = large.append small_append = small.append for i in aliases: if probabilities[i] >= 1.0: large_append(i) else: small_append(i) for less, more in zip(small, large): aliases[less] = more probabilities[more] -= 1.0 - probabilities[less] if probabilities[more] >= 1.0: large_append(more) else: small_append(more) del weights, small, large, large_append, small_append if indices is None: while True: r = self.random() * size column = _floor(r) yield (column if (r - column < probabilities[column]) else aliases[column]) else: while True: r = self.random() * size column = _floor(r) yield indices[column if (r - column < probabilities[column]) else aliases[column]] g = weighted_choice_generator_va2(random.Random(), list(range(5))) s = [0]*5 for i in range(10000): s[next(g)] += 1 print(s) wcgs = [ ('Roulette Wheel', weighted_choice_generator_rw), ('Roulette Wheel 2', weighted_choice_generator_rw2), ("Vose's Alias", weighted_choice_generator_va), ("Vose's Alias 2", weighted_choice_generator_va2), ] self = random.Random() for size in 10, 100, 1000, 10000, 100000, 1000000: w = list(range(size)) t01 = t02 = None for name, wcg in wcgs: def test(): g = wcg(self, w) next(g) number = 10 t1 = timeit.timeit(test, number=number) / number samples = 1000 g = wcg(self, w) next(g) def test(): for i in range(samples): next(g) number = 10 t2 = timeit.timeit(test, number=number) / number / samples t1 -= t2 if t01 is None: t01 = t1 t02 = t2 n = 0 else: if t2 < t02: n = int((t1 - t01) / (t02 - t2)) else: n = '-' print('%-16s %8d %8.3f %8.3f %8s' % (name, size, 1e3 * t1, 1e6 * t2, n))