I am trying to implement a gradient descent algorithm for simple linear regression. For some reason it doesn't seem to be working.
from __future__ import division
import random
def error(x_i,z_i, theta0,theta1):
return z_i - theta0 - theta1 * x_i
def squared_error(x_i,z_i,theta0,theta1):
return error(x_i,z_i,theta0,theta1)**2
def mse_fn(x, z, theta0,theta1):
m = 2 * len(x)
return sum(squared_error(x_i,z_i,theta0,theta1) for x_i,z_i in zip(x,z)) / m
def mse_gradient(x, z, theta0,theta1):
m = 2 * len(x)
grad_0 = sum(error(x_i,z_i,theta0,theta1) for x_i,z_i in zip(x,z)) / m
grad_1 = sum(error(x_i,z_i,theta0,theta1) * x_i for x_i,z_i in zip(x,z)) / m
return grad_0, grad_1
def minimize_batch(x, z, mse_fn, mse_gradient_fn, theta0,theta1,tolerance=0.000001):
step_sizes = 0.01
theta0 = theta0
theta1 = theta1
value = mse_fn(x,z,theta0,theta1)
while True:
grad_0, grad_1 = mse_gradient(x,z,theta0,theta1)
next_theta0 = theta0 - step_sizes * grad_0
next_theta1 = theta1 - step_sizes * grad_1
next_value = mse_fn(x,z,next_theta0,theta1)
if abs(value - next_value) < tolerance:
return theta0, theta1
else:
theta0, theta1, value = next_theta0, next_theta1, next_value
#The data
x = [i + 1 for i in range(40)]
y = [random.randrange(1,30) for i in range(40)]
z = [2*x_i + y_i + (y_i/7) for x_i,y_i in zip(x,y)]
theta0, theta1 = [random.randint(-10,10) for i in range(2)]
q = minimize_batch(x,z,mse_fn, mse_gradient, theta0,theta1,tolerance=0.000001)
When I run I get the following error: return error(x_i,z_i,theta0,theta1)**2 OverflowError: (34, 'Result too large')