from random import * from heapq import * cmps = 0 class Int(int): def __lt__(self, other): global cmps cmps += 1 return int.__lt__(self, other) def count_cmps(f, data, k): 'Count comparisons in a call to f(k, data)' global cmps data = data[:] shuffle(data) cmps = 0 result = f(k, data) print(cmps, f.__name__, result[:10]) def current_smallest(k, data): return nsmallest(k, data) def heapifying_smallest(k, data): heapify(data) result = [heappop(data) for j in range(k)] data.extend(result) return result def select_nth(data, n): if len(data) == 1: assert n == 0 return data[0] pivot = choice(data) lhs, rhs = [], [] for elem in data: (lhs if elem < pivot else rhs).append(elem) if len(lhs) >= n+1: return select_nth(lhs, n) else: return select_nth(rhs, n - len(lhs)) def selecting_smallest(k, data): assert 0 <= k < len(data) pivot = select_nth(data, k) return sorted(elem for elem in data if elem <= pivot)[:k] if __name__ == '__main__': # show that select_nth works data = list(range(20)) shuffle(data) print([select_nth(data, k) for k in range(len(data))]) # compare nsmallest implementations n = 100000 k = 100 data = list(map(Int, range(n))) for f in current_smallest, heapifying_smallest, selecting_smallest: for i in range(5): count_cmps(f, data, k)