def merge_at(s, i): s[i] += s[i+1] del s[i+1] return s[i] def merge_force_collapse(s): cost = 0 while len(s) > 1: n = len(s) - 2 if n > 0 and s[n-1] < s[n+1]: n -= 1 cost += merge_at(s, n) return cost def sort(runs, merge_collapse): stack = [] maxstack = 0 cost = 0 for x in runs: stack.append(x) maxstack = max(maxstack, len(stack)) cost += merge_collapse(stack) cost += merge_force_collapse(stack) return cost, maxstack def timsort(s): cost = 0 while len(s) > 1: n = len(s) - 2 if ((n > 0 and s[n-1] <= s[n] + s[n+1]) or (n > 1 and s[n-2] <= s[n-1] + s[n])): if s[n-1] < s[n+1]: n -= 1 cost += merge_at(s, n) elif s[n] <= s[n+1]: cost += merge_at(s, n) else: break return cost def twomerge(s): cost = 0 while len(s) > 1: n = len(s) - 2 if s[n] < 2 * s[n+1]: if n > 0 and s[n-1] < s[n+1]: n -= 1 cost += merge_at(s, n) else: break if len(s) > 1: assert s[-2] >= 2 * s[-1] if len(s) > 2: assert s[-3] >= 2 * s[-2] return cost # Minimal cost across all possible ways of merging runs. # Takes time cubic in len(runs). def ideal(runs): runs = list(runs) n = len(runs) # c[i, w] is minimal cost of merging slice runs[i : i+w]. c = {(i, 1) : 0 for i in range(n)} prefixsum = runs[0] for width in range(2, n+1): prefixsum += runs[width - 1] total = prefixsum for start in range(n - width + 1): c[start, width] = total + min(c[start, w] + c[start + w, width - w] for w in range(1, width)) if start + width < n: total += runs[start + width] - runs[start] return c[0, n] def greedy(runs): s = list(runs) cost = 0 while len(s) > 1: i = min(range(len(s) - 1), key=lambda i: s[i] + s[i+1]) cost += merge_at(s, i) return cost # Bad sequence of run lengths for timsort, summing to `n`. def rtim(n): if n <= 3: return [n] n2 = n >> 1 return rtim(n2) + rtim(n2 - 1) + [(n & 1) + 1] def nodePower(left, right, startA, startB, endB): twoN = (right - left + 1) << 1 # 2*n l = startA + startB - (left << 1) r = startB + endB + 1 - (left << 1) a = (l << 31) // twoN b = (r << 31) // twoN return 32 - (a ^ b).bit_length() def powersort(runs): cost = 0 maxstack = 0 s = [] runs = list(runs) n = sum(runs) s1, n1 = 0, runs[0] for i in range(1, len(runs)): s2, n2 = s1 + n1, runs[i] p = nodePower(0, n-1, s1, s2, s2 + n2 - 1) while s and s[-1][-1] > p: s0, n0, _ = s.pop() assert s0 + n0 == s1 n1 += n0 cost += n1 s1 = s0 assert s1 + n1 == s2 if s: assert s[-1][-1] < p # never equal! s.append((s1, n1, p)) maxstack = max(maxstack, len(s)) s1, n1 = s2, n2 while s: s0, n0, _ = s.pop() assert s0 + n0 == s1 n1 += n0 cost += n1 s1 = s0 assert (s1, n1) == (0, n), (s1, n1, 0, n) return cost, maxstack if 1: from random import random, randrange print("all the same") # identical stats runs = [32] * 1000 print(sort(runs, timsort)) print(sort(runs, twomerge)) print(powersort(runs)) print("ascending") # timsort a little better runs = range(1, 2000) print(sort(runs, timsort)) print(sort(runs, twomerge)) print(powersort(runs)) print("descending") # twomerge significantly better print(sort(reversed(runs), timsort)) print(sort(reversed(runs), twomerge)) print(powersort(reversed(runs))) print("bad timsort case") runs = rtim(100000) print(sort(runs, timsort)) print(sort(runs, twomerge)) print(powersort(runs)) # "Random" run-length distributions are largely irrelevant, since on # randomly ordered input the actual sort is most likely to _force_ # (via local binary insertion sorts) all runs to length `minrun`. # Nevertheless ... there's no clear overall winner in this # particular made-up distribution. On some runs timsort "wins" in # the end, on others twomerge, but the total costs are typically # within 2% of each other. # Later: powersort almost always dominates both. print("randomized trials") totals = [0, 0, 0] for trial in range(20): runs = [] for i in range(10000): switch = random() if switch < 0.80: x = randrange(1, 100) else: x = randrange(1000, 10000) runs.append(x) for i, which in enumerate([timsort, twomerge]): cost, depth = sort(runs, which) print(f"{which.__name__:8s}", cost, depth) totals[i] += cost #cost = ideal(runs) #print(" ideal", cost) #totals[2] += cost #cost = greedy(runs) #print(" greedy", cost) #totals[3] += cost cost, depth = powersort(runs) print("power ", cost, depth) totals[2] += cost print("sofar", totals, "timsort excess", f"{(totals[0] - totals[1]) / totals[1]:.2%}")