def merge_at(s, i): s[i] += s[i+1] del s[i+1] return s[i] def merge_force_collapse(s): cost = 0 while len(s) > 1: n = len(s) - 2 if n > 0 and s[n-1] < s[n+1]: n -= 1 cost += merge_at(s, n) return cost def sort(runs, merge_collapse): stack = [] maxstack = 0 cost = 0 for x in runs: stack.append(x) maxstack = max(maxstack, len(stack)) cost += merge_collapse(stack) cost += merge_force_collapse(stack) return cost, maxstack def timsort(s): cost = 0 while len(s) > 1: n = len(s) - 2 if ((n > 0 and s[n-1] <= s[n] + s[n+1]) or (n > 1 and s[n-2] <= s[n-1] + s[n])): if s[n-1] < s[n+1]: n -= 1 cost += merge_at(s, n) elif s[n] <= s[n+1]: cost += merge_at(s, n) else: break return cost def twomerge(s): cost = 0 while len(s) > 1: n = len(s) - 2 if s[n] < 2 * s[n+1]: if n > 0 and s[n-1] < s[n+1]: n -= 1 cost += merge_at(s, n) else: break if len(s) > 1: assert s[-2] >= 2 * s[-1] if len(s) > 2: assert s[-3] >= 2 * s[-2] return cost if 1: from random import random, randrange print("all the same") # identical stats runs = [32] * 1000 print(sort(runs, timsort)) print(sort(runs, twomerge)) print("ascending") # timsort a little better runs = range(1, 2000) print(sort(runs, timsort)) print(sort(runs, twomerge)) print("descending") # twomerge significantly better print(sort(reversed(runs), timsort)) print(sort(reversed(runs), twomerge)) # "Random" run-length distributions are largely irrelevant, since on # randomly ordered input the actual sort is most likely to _force_ # (via local binary insertion sorts) all runs to length `minrun`. # Nevertheless ... there's no clear overall winner in this # particular made-up distribution. On some runs timsort "wins" in # the end, on others twomerge, but the total costs are typically # within 2% of each other. print("randomized trials") totals = [0, 0] for trial in range(20): runs = [] for i in range(1000): switch = random() if switch < 0.80: x = randrange(1, 100) else: x = randrange(1000, 10000) runs.append(x) for i, which in enumerate([timsort, twomerge]): cost, depth = sort(runs, which) print(f"{which.__name__:8s}", cost, depth) totals[i] += cost print("sofar", totals, "timsort excess", f"{(totals[0] - totals[1]) / totals[1]:.2%}")