def merge_at(s, i):
s[i] += s[i+1]
del s[i+1]
return s[i]
def merge_force_collapse(s):
cost = 0
while len(s) > 1:
n = len(s) - 2
if n > 0 and s[n-1] < s[n+1]:
n -= 1
cost += merge_at(s, n)
return cost
def sort(runs, merge_collapse):
stack = []
maxstack = 0
cost = 0
for x in runs:
stack.append(x)
maxstack = max(maxstack, len(stack))
cost += merge_collapse(stack)
cost += merge_force_collapse(stack)
return cost, maxstack
def timsort(s):
cost = 0
while len(s) > 1:
n = len(s) - 2
if ((n > 0 and s[n-1] <= s[n] + s[n+1]) or
(n > 1 and s[n-2] <= s[n-1] + s[n])):
if s[n-1] < s[n+1]:
n -= 1
cost += merge_at(s, n)
elif s[n] <= s[n+1]:
cost += merge_at(s, n)
else:
break
return cost
def twomerge(s):
cost = 0
while len(s) > 1:
n = len(s) - 2
if s[n] < 2 * s[n+1]:
if n > 0 and s[n-1] < s[n+1]:
n -= 1
cost += merge_at(s, n)
else:
break
if len(s) > 1:
assert s[-2] >= 2 * s[-1]
if len(s) > 2:
assert s[-3] >= 2 * s[-2]
return cost
# Minimal cost across all possible ways of merging runs.
# Takes time cubic in len(runs).
def ideal(runs):
runs = list(runs)
n = len(runs)
# c[i, w] is minimal cost of merging slice runs[i : i+w].
c = {(i, 1) : 0 for i in range(n)}
prefixsum = runs[0]
for width in range(2, n+1):
prefixsum += runs[width - 1]
total = prefixsum
for start in range(n - width + 1):
c[start, width] = total + min(c[start, w] + c[start + w, width - w]
for w in range(1, width))
if start + width < n:
total += runs[start + width] - runs[start]
return c[0, n]
def greedy(runs):
s = list(runs)
cost = 0
while len(s) > 1:
i = min(range(len(s) - 1), key=lambda i: s[i] + s[i+1])
cost += merge_at(s, i)
return cost
# Bad sequence of run lengths for timsort, summing to `n`.
def rtim(n):
if n <= 3:
return [n]
n2 = n >> 1
return rtim(n2) + rtim(n2 - 1) + [(n & 1) + 1]
def nodePower(left, right, startA, startB, endB):
twoN = (right - left + 1) << 1 # 2*n
l = startA + startB - (left << 1)
r = startB + endB + 1 - (left << 1)
a = (l << 31) // twoN
b = (r << 31) // twoN
return 32 - (a ^ b).bit_length()
def powersort(runs):
cost = 0
maxstack = 0
s = []
runs = list(runs)
n = sum(runs)
s1, n1 = 0, runs[0]
for i in range(1, len(runs)):
s2, n2 = s1 + n1, runs[i]
p = nodePower(0, n-1, s1, s2, s2 + n2 - 1)
while s and s[-1][-1] > p:
s0, n0, _ = s.pop()
assert s0 + n0 == s1
n1 += n0
cost += n1
s1 = s0
assert s1 + n1 == s2
if s:
assert s[-1][-1] < p # never equal!
s.append((s1, n1, p))
maxstack = max(maxstack, len(s))
s1, n1 = s2, n2
while s:
s0, n0, _ = s.pop()
assert s0 + n0 == s1
n1 += n0
cost += n1
s1 = s0
assert (s1, n1) == (0, n), (s1, n1, 0, n)
return cost, maxstack
if 1:
from random import random, randrange
print("all the same") # identical stats
runs = [32] * 1000
print(sort(runs, timsort))
print(sort(runs, twomerge))
print(powersort(runs))
print("ascending") # timsort a little better
runs = range(1, 2000)
print(sort(runs, timsort))
print(sort(runs, twomerge))
print(powersort(runs))
print("descending") # twomerge significantly better
print(sort(reversed(runs), timsort))
print(sort(reversed(runs), twomerge))
print(powersort(reversed(runs)))
print("bad timsort case")
runs = rtim(100000)
print(sort(runs, timsort))
print(sort(runs, twomerge))
print(powersort(runs))
# "Random" run-length distributions are largely irrelevant, since on
# randomly ordered input the actual sort is most likely to _force_
# (via local binary insertion sorts) all runs to length `minrun`.
# Nevertheless ... there's no clear overall winner in this
# particular made-up distribution. On some runs timsort "wins" in
# the end, on others twomerge, but the total costs are typically
# within 2% of each other.
# Later: powersort almost always dominates both.
print("randomized trials")
totals = [0, 0, 0]
for trial in range(20):
runs = []
for i in range(10000):
switch = random()
if switch < 0.80:
x = randrange(1, 100)
else:
x = randrange(1000, 10000)
runs.append(x)
for i, which in enumerate([timsort, twomerge]):
cost, depth = sort(runs, which)
print(f"{which.__name__:8s}", cost, depth)
totals[i] += cost
#cost = ideal(runs)
#print(" ideal", cost)
#totals[2] += cost
#cost = greedy(runs)
#print(" greedy", cost)
#totals[3] += cost
cost, depth = powersort(runs)
print("power ", cost, depth)
totals[2] += cost
print("sofar", totals, "timsort excess",
f"{(totals[0] - totals[1]) / totals[1]:.2%}")