from datetime import datetime as dt
print('Last accessed on: {}'.format(dt.now()))
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Optimization¶
Optimization consists of maximizing or minimizing a real function by systematically choosing input values from within an allowed set and computing the value of the function. With this definition, it is a wide field but equally diverse applications.
In this notebook, we will take the two most common optimization methods: Nelder-Mead and Levenberg-Marquardt. Although these methods can optimize general functions, we will restrict ourselves to sinusoidal input.
from matplotlib import pyplot as plt
import numpy as np
%pylab inline
Let's make a sine function and plot it given parameters.
true = [1, 1, 0]
x = np.linspace(0, 10, 1000)
def simple_sine(theta, x):
a, b, c = theta
return a * np.sin(b * x + c)
plt.plot(x, simple_sine(true, x))
plt.ylim([-4,4]);
Artificially add noise to represent natural data. Superpose noisy data to perfect sine curve.
y_noisy = simple_sine(true, x) + np.random.randn(x.size)
plt.plot(x, y_noisy, 'o')
plt.plot(x, simple_sine(true, x),'r-', lw=3)
plt.ylim([-4,4]);
Let's see the histogram of residuals (i.e. true - observed).
#y = simple_sine(x, params);
res = y_noisy - simple_sine(true, x);
plt.hist(res, bins=30);
The histogram is fairly normally distributed, i.e. bell-shaped.
Nelder-Mead Method¶
Let's make a general optimization model called Nelder-Mead.
import scipy.optimize as opt
opt.minimize?
# Nelder-Mead <optimize.minimize-neldermead> uses the Simplex algorithm.
# This algorithm is robust in many applications. However, if numerical
# computation of derivative can be trusted, other algorithms using the
# first and/or second derivatives information might be preferred for
# their better performance in general.
The minimization algorithms in scipy.optimize.minimize requires a theoretical function as an input called objective below. To get the minimum, the values that yield smallest squared error between the theoretical function and the data are computed.
def objective(theta, xi, yi):
model = simple_sine(theta, xi)
return np.sum((model - yi)**2) #res**2
Let's see the squared error of the objective given our noisy data.
objective(true, x, y_noisy)
This is just a number. But we can use this as the minimum measure of our error. We can use scipy.optimize.minimize to check all combinations of parameters theta (tuple) that yield closest to the value above.
import scipy.optimize as opt
init_guess = [0.5] * 3 #[0.5,0.5,0.5]
optimize = opt.minimize(objective, init_guess, args=(x,y_noisy), method='nelder-mead')
optimize
optimize has a lot of outputs but we only need the 'x' in the results. It gives the best estimate to theta or true (tuple).
for i in optimize.x: #x is the result of opt.minimize
print('Success={}'.format(optimize.success))
print("parameter optimum: {}".format(i))
Let's see if the squared error is indeed close to ~980 computed earlier.
objective(optimize.x,x, y_noisy)
Let's see the % error
(1 - objective(optimize.x,x, y_noisy)/objective(true,x, y_noisy))*100
Let's see the difference between the results (computed optimum values) and the true values.
(true-optimize.x) #difference from true values
Levenberg-Marquardt Algorithm¶
The next optimization model we will discuss is also known as LMA.
We will use scipy.optimize.curve_fit to find the optimum values (called p_opt and p_cov) which yields minimum squared error between the theoretical function (h: simple_sine) and the data (x, y_noisy). Note that p_opt is a tuple (best estimate for true=[1,1,0]=a,b,c) and p_cov is the covariance from which variance and standard deviation can be computed.
opt.curve_fit?
#create a function identical to simple_sine
#mapping/copying of simple_sine to h
h = lambda x,a,b,c: a*np.sin(b*x+c)
p_opt, p_cov = opt.curve_fit(h, x, y_noisy, p0=init_guess)
var = np.diag(p_cov)
std = np.sqrt(np.diag(p_cov)) #a.k.a. sigma
for i,j in zip(p_opt, std):
print ("parameter optimum: {} +/- {}".format(i, j))
#above is similar to
#for i in range(len(p_opt)):
# print("parameter optimum: {} +/- {}".format(p_opt[i], p_std))
p_opt #is a tuple
plt.plot(x, y_noisy, 'o')
plt.plot(x, h(x,*p_opt), 'r-', lw=3); # *mu; asterisk in variable is required for lambda func
Not quite as robust to bad initial guesses, apparently. So let's try initial guess that is close to true values. This is quite difficult to implement in real life situation however.
better_guess = 0.9, 0.9, 0.1
p_opt, p_cov = opt.curve_fit(h, x, y_noisy, p0=better_guess)
std = np.sqrt(np.diag(p_cov))
plt.plot(x, y_noisy, 'o')
plt.plot(x, h(x, *p_opt),'r-', lw=3);
p_opt
std
# mu, sigma = 0, 0.1 #which among the 3 values of std?
# count, bins, ignore = plt.hist(h(x, *p_opt)-y_noisy, 30, normed=True)
# #superpose the probability density function (pdf)
# plt.plot(bins, 1/(sigma*np.sqrt(2*np.pi))*np.exp(- (bins - mu)**2/ (2*sigma**2)), lw=2, c='r')
# plt.show()
plt.plot(x, y_noisy, 'ko', ms=2)
for i in range(100):
theta = p_opt + np.random.randn(p_opt.size) * std
plt.plot(x, h(x, *theta),'r-', lw=3, alpha=0.1)