""" Experiment with duplicate comparisons in Python's Timsort. Measure comparisons and time while sorting. """ import functools import itertools import random import timeit # A number of random elements to shuffle and sort RANDOM_ELEMENT_COUNT = 10 ** 5 # Reuse a random seed, for determinism RANDOM_SEED = 0 class Sort: """ Test sorting performance. """ def __init__(self, data, name): """ Sort data and print statistics. """ self.data = data self.name = name self.total_comparisons = 0 self.duplicates = [] def test(self, cmp_func=None): """ The main block of test code: sort and count. """ for perm in self.data: # Initialize. perm = list(perm) compares = Compares(cmp_func) # Sort. start = timeit.default_timer() perm.sort(key=functools.cmp_to_key(compares.compare)) #perm.sort() end = timeit.default_timer() # Update numbers. self.update_statistics(compares) self.check_if_sorted(perm) self.print_results(end - start) def update_statistics(self, compares): """ Increase counts of comparisons and duplicates. """ self.total_comparisons += compares.count self.duplicates += [ count for count in compares.pairs.values() if count > 1 ] def check_if_sorted(self, perm): """ Check that a list is in non-decreasing order. """ for i, item in enumerate(perm[:-1]): if item > perm[i + 1]: raise ValueError(i, item, perm[i + 1]) def print_results(self, time_difference): """ Print statistics to the screen. """ message = '{} comparisons in {} seconds, with {} duplicates, for {}' output = message.format( self.total_comparisons, time_difference, len(self.duplicates), self.name ) print(output) class Compares: """ Count and record comparisons made while sorting. """ def __init__(self, compare_func=None): self.count = 0 self.choices = [] self.pairs = {} if compare_func: self.compare_func = compare_func else: self.compare_func = lambda a, b: a < b def compare(self, a, b): """ A comparison function in the form of the old __cmp__. """ self.count += 1 self.choices.append('{}:{}'.format(a, b)) key = (a, b) try: self.pairs[key] += 1 except KeyError: self.pairs[key] = 1 if isinstance(a, tuple) and isinstance(b, tuple): return a[0] - b[0] else: return a - b def random_data(count, seed=RANDOM_SEED): """ Generate a random list of distinct elements. """ data = list(range(count)) r = random.Random(seed) r.shuffle(data) return [data] def all_permutations(n): """ Return all permutations of a given length. """ return itertools.permutations(range(n)) def stable_sort_data(count): """ Test stable sorting. """ for i in range(count): data = [(0, i) for i in range(count)] data.insert(i, (0, i)) yield data def main(): """ Run test functions. """ print('Starting tests.') # Sort random data. Sort( random_data(RANDOM_ELEMENT_COUNT), '{} randomly ordered elements'.format(RANDOM_ELEMENT_COUNT) ).test() # Sort all permutations of a particular length. for length in [3, 8]: Sort( all_permutations(length), 'all permutations of length {}'.format(length) ).test() # Test stable sorting. Sort( stable_sort_data(length), 'stable sorting {} identical elements'.format(length) ).test(cmp_func=lambda a, b: a[0] - b[0]) if __name__ == '__main__': main()