def merge_at(s, i):
s[i] += s[i+1]
del s[i+1]
return s[i]
def merge_force_collapse(s):
cost = 0
while len(s) > 1:
n = len(s) - 2
if n > 0 and s[n-1] < s[n+1]:
n -= 1
cost += merge_at(s, n)
return cost
def sort(runs, merge_collapse):
stack = []
maxstack = 0
cost = 0
for x in runs:
stack.append(x)
maxstack = max(maxstack, len(stack))
cost += merge_collapse(stack)
cost += merge_force_collapse(stack)
return cost, maxstack
def timsort(s):
cost = 0
while len(s) > 1:
n = len(s) - 2
if ((n > 0 and s[n-1] <= s[n] + s[n+1]) or
(n > 1 and s[n-2] <= s[n-1] + s[n])):
if s[n-1] < s[n+1]:
n -= 1
cost += merge_at(s, n)
elif s[n] <= s[n+1]:
cost += merge_at(s, n)
else:
break
return cost
def twomerge(s):
cost = 0
while len(s) > 1:
n = len(s) - 2
if s[n] < 2 * s[n+1]:
if n > 0 and s[n-1] < s[n+1]:
n -= 1
cost += merge_at(s, n)
else:
break
if len(s) > 1:
assert s[-2] >= 2 * s[-1]
if len(s) > 2:
assert s[-3] >= 2 * s[-2]
return cost
if 1:
from random import random, randrange
print("all the same") # identical stats
runs = [32] * 1000
print(sort(runs, timsort))
print(sort(runs, twomerge))
print("ascending") # timsort a little better
runs = range(1, 2000)
print(sort(runs, timsort))
print(sort(runs, twomerge))
print("descending") # twomerge significantly better
print(sort(reversed(runs), timsort))
print(sort(reversed(runs), twomerge))
# "Random" run-length distributions are largely irrelevant, since on
# randomly ordered input the actual sort is most likely to _force_
# (via local binary insertion sorts) all runs to length `minrun`.
# Nevertheless ... there's no clear overall winner in this
# particular made-up distribution. On some runs timsort "wins" in
# the end, on others twomerge, but the total costs are typically
# within 2% of each other.
print("randomized trials")
totals = [0, 0]
for trial in range(20):
runs = []
for i in range(1000):
switch = random()
if switch < 0.80:
x = randrange(1, 100)
else:
x = randrange(1000, 10000)
runs.append(x)
for i, which in enumerate([timsort, twomerge]):
cost, depth = sort(runs, which)
print(f"{which.__name__:8s}", cost, depth)
totals[i] += cost
print("sofar", totals, "timsort excess",
f"{(totals[0] - totals[1]) / totals[1]:.2%}")