# ignore this line, it's for showing plots within this script.
%pylab inline
import random
import math
def random_walk(n, x0=0, y0=0):
x_values = [x0]
y_values = [y0]
for i in range(n - 1):
phi = 2 * math.pi * random.random()
r = random.random()
x_new = x_values[-1] + math.cos(phi) * r
y_new = y_values[-1] + math.sin(phi) * r
x_values.append(x_new)
y_values.append(y_new)
return x_values, y_values
x_values, y_values = random_walk(10000)
plt.plot(x_values, y_values)
# rough timing
import time
started = time.time()
random_walk(1000000)
needed = time.time() - started
print(needed)
numpy
is optimized for array and matrix operation, so addition like:
a = np.arange(10)
b = np.arange(10)
c = a + b
Is not only shorter, but also faster than manual looping:
c = np.zeros(10)
for i in range(10):
c[i] = a[i] + b[i]
So our goal is to get rid of the for
loop. The tricky part is to implement the updates of x
and y
.
np.cumsum
(cumulative sum) which transforms an array [x0, x1, x2, ..]
to [x0, x0 + x1, x0 + x1 + x2, ...]
helps us:
print(np.cumsum([1, 2, 1, 3]))
import numpy as np
import matplotlib.pyplot as p
def random_walk_fast(n, x0=0, y0=0):
"""optimized version using numpy features.
idea: we first create vectors with the updates for x and y
and finally np.cumsum() to compute the wanted result.
"""
phi_values = np.random.rand(n - 1) * 2 * np.pi
cos_values = np.cos(phi_values)
sin_values = np.sin(phi_values)
r_values = np.random.random(n - 1)
x_updates = np.zeros(n)
x_updates[0] = x0
x_updates[1:] = r_values * cos_values
y_updates = np.zeros(n)
y_updates[0] = y0
y_updates[1:] = r_values * sin_values
x_values = np.cumsum(x_updates)
y_values = np.cumsum(y_updates)
return x_values, y_values
# rough timing
import time
started = time.time()
x_values, y_values = random_walk_fast(1000000)
needed = time.time() - started
print(needed)
So the optimized function is about 10x faster, but for the cost of decreased readability for non numpy
experts.
https://shreevatsa.wordpress.com/2008/05/16/premature-optimization-is-the-root-of-all-evil/
This does not mean not to optimize, but optimization often reduces code readability, and sometimes structure. So:
Don't guess what parts are slow, measure ! Most such guesses are wrong.
plt.plot(x_values, y_values)
def plot_random_walk(x_values, y_values, block_size=2000, colors="RGBKY"):
n = len(x_values)
assert len(y_values) == n
for block_number, block_start in enumerate(range(0, n, block_size)):
color = colors[block_number % len(colors)] # wrap around
x_block = x_values[block_start: block_start + block_size]
y_block = y_values[block_start: block_start + block_size]
pylab.plot(x_block, y_block, color)
x_values, y_values= random_walk_fast(10000)
plot_random_walk(x_values, y_values)
Btw: we can use indices beyond the length of an array:
x = np.arange(8)
print(x[5:10])