def sum_up(n):
assert n >= 0 # avoids infinite recursion if called with negative n
if n == 0:
return n
return n + sum_up(n - 1)
print(sum_up(3))
def sum_elements(li):
if not li:
return 0
return li[0] + sum_elements(li[1:])
print(sum_elements([1, 1, 2, 3]))
def quicksort(li):
if not li:
return []
pivot = li[0]
left = [e for e in li[1:] if e <= pivot]
right = [e for e in li[1:] if e > pivot]
return quicksort(left) + [pivot] + quicksort(right)
import random
for i in range(1000):
if i% 100 == 0:
print("x", end=" ")
# als include duplicate elements:
data = list(range(100)) + list(range(50))
random.shuffle(data)
assert quicksort(data) == sorted(data)
import time
import numpy as np
def measure_10_times(algorithm, data, do_shuffle):
times = []
for i in range(10):
print(i, end=" ")
if do_shuffle:
random.shuffle(data)
started = time.time()
algorithm(data)
needed = time.time() - started
times.append(needed)
return times
def benchmark(algorithm, sizes, do_shuffle):
"""
measures run time of "algorighm" for different input data sizes.
runs for every size quicksort multiple times and takes minimum
to compensate bias of time measurement due to background
processes on the computer.
do_shuffle = True uses randomly shuffled data,
else data is not sorted but in increasing order.
"""
all_times = []
for size in sizes:
data = list(range(size))
print(size, end=" ")
times = measure_10_times(algorithm, data, do_shuffle)
print()
all_times.append(min(times))
return np.array(all_times)
sizes = np.array((1000, 10000, 30000, 50000, 100000, 200000, 300000, 400000))
all_times = benchmark(quicksort, sizes, True)
from matplotlib import pyplot
import math
pyplot.plot(sizes, all_times, "o", label="qsort")
pyplot.legend()
pyplot.show()
Theory shows that runtime of quicksort for $n$ elements is on average proportional to $n \log{n}$ for large $n$, which grows only slightly faster than $n$.
from scipy.optimize import curve_fit
def nlogn(x, a, c):
return a * np.log(x) * x + c
params, __ = curve_fit(nlogn, sizes, all_times)
print("a=", params[0])
print("c=", params[1])
pyplot.plot(sizes, all_times, "o", label="qsort")
pyplot.plot(sizes, nlogn(sizes, *params), ":", label="n log(n) fit")
pyplot.legend()
pyplot.show()
The situation is different for already sorted data or reversely sorted data. In this case either left
or right
is the full list except the pivot element, and thus we don't divide our problem into signifcantly smaller sub problems.
import time
sizes = np.array((10, 100, 500, 1000, 1500, 2000))
all_times = benchmark(quicksort, sizes, False)
def quadratic(x, a, b):
return a * x**2 + b
params, __ = curve_fit(quadratic, sizes, all_times)
print(params)
pyplot.plot(sizes, all_times, "o", label="qsort worst case")
fitted = quadratic(sizes, *params)
fac = all_times[0] / sizes[0]
pyplot.plot(sizes, fitted, ":", label="quadratic fit")
pyplot.legend()
pyplot.show()
Now we benchmark the built in sorted
function:
sizes = np.array((1000, 5000, 8000, 10000, 25000, 50000, 100000, 500000))
all_times = benchmark(sorted, sizes, True)
params, __ = curve_fit(nlogn, sizes, all_times)
print(params)
pyplot.plot(sizes, all_times, "o", label="sorted")
pyplot.plot(sizes, linear(sizes, *params), ":", label="n log(n) fit")
pyplot.legend()
pyplot.show()