from math import floor, fsum from random import random from itertools import repeat, accumulate from collections import deque from bisect import bisect def choices(population, weights=None, *, cum_weights=None, k=1, ): # XXX Need better heuristic for choosing the inverse-cdf method. # XXX Add back the error checking code. n = len(weights) _random = random _floor = floor _bisect = bisect # Fast path for equal weights if weights is None or len(set(weights)) == 1: n += 0.0 return [population[_floor(_random() * n)] for i in repeat(None, k)] # Inverse CDF method when k is small relative to n if cum_weights is not None or k < 10 * n: if cum_weights is None: cum_weights = list(accumulate(weights)) total = cum_weights[-1] + 0.0 hi = n - 1 return [population[_bisect(cum_weights, _random() * total, 0, hi)] for i in repeat(None, k)] # Alias method needs normalized weights total = fsum(weights) weights = [w / total for w in weights] # Partition weights into groups that fit into a bin # or that need to be split. bin_width = 1.0 / n small = deque() large = deque() small_append, small_pop = small.append, small.popleft large_append, large_pop = large.append, large.popleft for pos, weight in enumerate(weights): if weight <= bin_width: small_append(pos) else: large_append(pos) # Fill undersized bins with the aliased weights from large bins K = list(range(len(weights))) while small and large: small_pos = small_pop() small_prob = weights[small_pos] if small_prob < bin_width: K[small_pos] = large_pos = large_pop() weights[large_pos] -= bin_width - small_prob if weights[large_pos] <= bin_width: small_append(large_pos) else: large_append(large_pos) # Convert aliased bin weights to cumulative weights U = [weights[i] + bin_width * i for i in range(n)] # Debugging code to show tables if True: for i in range(n): print(f'{i*bin_width:5.3f} {i=} {U[i]=:5.3f} {K[i]=}') # Make selections from aliased bins n += 0.0 return [population[i if (x := _random()) < U[(i := _floor(x * n))] else K[i]] for _ in repeat(None, k)] if __name__ == '__main__': from collections import Counter # Equal weighting result = choices('ABCD', (10, 10, 10, 10), k=10_000) print(sorted(Counter(result).items())) # Small k result = choices('ABCD', (10, 30, 40, 20), k=20) print(sorted(Counter(result).items())) # Large k result = choices('ABCDE', (10, 30, 0, 40, 20), k=10_000) print(sorted(Counter(result).items()))