def sum_up(n):
    assert n >= 0  # avoids infinite recursion if called with negative n
    if n == 0:
        return n
    return n + sum_up(n - 1)

print(sum_up(3))

6

def sum_elements(li):
    if not li:
        return 0
    return li[0] + sum_elements(li[1:])

print(sum_elements([1, 1, 2, 3]))

7

def quicksort(li):
    if not li:
        return []
    pivot = li[0]
    left = [e for e in li[1:] if e <= pivot]
    right = [e for e in li[1:] if e > pivot]
    return quicksort(left) + [pivot] + quicksort(right)

random test¶

import random

for i in range(1000):
    if i% 100 == 0:
        print("x", end=" ")
    # als include duplicate elements:
    data = list(range(100)) + list(range(50))
    random.shuffle(data)
    assert quicksort(data) == sorted(data)

x x x x x x x x x x

About the speed of quicksort¶

import time
import numpy as np


def measure_10_times(algorithm, data, do_shuffle):
    times = []
    for i in range(10):
        print(i, end=" ")
        if do_shuffle:
            random.shuffle(data)
        started = time.time()
        algorithm(data)
        needed = time.time() - started
        times.append(needed)
    return times

def benchmark(algorithm, sizes, do_shuffle):
    """
    measures run time of "algorighm" for different input data sizes.
    runs for every size quicksort multiple times and takes minimum
    to compensate bias of time measurement due to background 
    processes on the computer.
    
    do_shuffle = True uses randomly shuffled data,
    else data is not sorted but in increasing order.
    
    """
    all_times = []

    for size in sizes:
        data = list(range(size))
        
        print(size, end=" ")
        times = measure_10_times(algorithm, data, do_shuffle)
        print()
        all_times.append(min(times))
        
    return np.array(all_times)
    
sizes = np.array((1000, 10000, 30000, 50000, 100000, 200000, 300000, 400000))

all_times = benchmark(quicksort, sizes, True)

1000 0 1 2 3 4 5 6 7 8 9 
10000 0 1 2 3 4 5 6 7 8 9 
30000 0 1 2 3 4 5 6 7 8 9 
50000 0 1 2 3 4 5 6 7 8 9 
100000 0 1 2 3 4 5 6 7 8 9 
200000 0 1 2 3 4 5 6 7 8 9 
300000 0 1 2 3 4 5 6 7 8 9 
400000 0 1 2 3 4 5 6 7 8 9

from matplotlib import pyplot
import math

pyplot.plot(sizes, all_times, "o", label="qsort")

pyplot.legend()
pyplot.show()

Theory shows that runtime of quicksort for $n$ elements is on average proportional to $n \log{n}$ for large $n$, which grows only slightly faster than $n$.

from scipy.optimize import curve_fit

def nlogn(x, a, c):
    return a * np.log(x) * x + c

params, __ = curve_fit(nlogn, sizes, all_times)
print("a=", params[0])
print("c=", params[1])

pyplot.plot(sizes, all_times, "o", label="qsort")

pyplot.plot(sizes, nlogn(sizes, *params), ":", label="n log(n) fit")
pyplot.legend()
pyplot.show()

a= 3.6316263586323034e-07
c= -0.007771319200465442

The situation is different for already sorted data or reversely sorted data. In this case either left or right is the full list except the pivot element, and thus we don't divide our problem into signifcantly smaller sub problems.

import time

sizes = np.array((10, 100, 500, 1000, 1500, 2000))

all_times = benchmark(quicksort, sizes, False)

10 0 1 2 3 4 5 6 7 8 9 
100 0 1 2 3 4 5 6 7 8 9 
500 0 1 2 3 4 5 6 7 8 9 
1000 0 1 2 3 4 5 6 7 8 9 
1500 0 1 2 3 4 5 6 7 8 9 
2000 0 1 2 3 4 5 6 7 8 9

def quadratic(x, a, b):
    return a * x**2 + b

params, __ = curve_fit(quadratic, sizes, all_times)

print(params)

[5.73418026e-08 4.04772922e-05]

pyplot.plot(sizes, all_times, "o", label="qsort worst case")

fitted = quadratic(sizes, *params)

fac = all_times[0] / sizes[0]
pyplot.plot(sizes, fitted, ":", label="quadratic fit")
pyplot.legend()
pyplot.show()

Now we benchmark the built in sorted function:

sizes = np.array((1000, 5000, 8000, 10000, 25000, 50000, 100000, 500000))

all_times = benchmark(sorted, sizes, True)

1000 0 1 2 3 4 5 6 7 8 9 
5000 0 1 2 3 4 5 6 7 8 9 
8000 0 1 2 3 4 5 6 7 8 9 
10000 0 1 2 3 4 5 6 7 8 9 
25000 0 1 2 3 4 5 6 7 8 9 
50000 0 1 2 3 4 5 6 7 8 9 
100000 0 1 2 3 4 5 6 7 8 9 
500000 0 1 2 3 4 5 6 7 8 9

params, __ = curve_fit(nlogn, sizes, all_times)
print(params)

pyplot.plot(sizes, all_times, "o", label="sorted")

pyplot.plot(sizes, linear(sizes, *params), ":", label="n log(n) fit")
pyplot.legend()
pyplot.show()

[ 4.22576782e-08 -3.99345812e-03]