I was trying to build a three layer neural network with each having one neuron using numpy. One was the input layer, one was the hidden layer and one was the output layer. The hidden layer had relu activation and the output layer had sigmoid activation. I tried to make the gradient descent and mean squared error all in numpy, where the gradient descent had an array as input where all the data was. The data was actually random numbers where if the input is more than 18 then the ouput will be 1.But the problem is that the weights and biases are initialized to 1 and after any amount of epoch given, the weights and biases wont change but the loss will decrease. The structure if it helps: (x)---((y1 = w1 * x + b1)|(z1 = relu(y1)))----((y2 = w2 * z1 + b2)|(z2 = sigmoid(y2))). The code:
import numpy as np
import math
import random as r
# dataset generation
x = []
y = []
for i in range (100):
age = r.randint(0, 80)
x.append(age)
if age > 18:
y.append(1)
else:
y.append(0)
# forward pass functions
def sig(x):
y = []
for i in x:
y.append(1/(1+math.exp(-i)))
return y
def relu(x):
y = []
for i in x:
if i > 0:
y.append(i)
else:
y.append(0)
return y
def mse(ytrue,ypred):
total = 0
for yt,yp in zip(ytrue,ypred):
total += (yt - yp)**2
return total/len(ytrue)
# backward pass functions
def dmse(ytrue,ypred):
total = []
for yt,yp in zip(ytrue,ypred):
total.append((yt - yp)*2)
return total
def d_of_relu(x):
y = []
for i in x:
if i > 0:
y.append(1)
else:
y.append(0)
return y
def d_of_sig(x):
y = []
for i in y:
y.append(y*(1-y))
return y
# additional functions
def y_calculation(x,w,b):
y = []
for i in x:
y.append(i*w+b)
return y
def chain_addition_second_layer_with_weight(dcdz2,dz2dy2,dy2dw2):
total = 0
for a,b in zip(dcdz2,dz2dy2):
total += a*b*dy2dw2
return total/len(dcdz2)
def chain_addition_second_layer_with_bias(dcdz2,dz2dy2):
total = 0
for a,b in zip(dcdz2,dz2dy2):
total += a*b*1
return total/len(dcdz2)
def chain_addition_first_layer_with_bias(dcdz2,dz2dy2,dy2dz1,dz1dy1):
total = 0
for a,b,d in zip(dcdz2,dz2dy2,dz1dy1):
total += a*b*dy2dz1*d*1
return total/len(dcdz2)
def chain_addition_first_layer_with_weight(dcdz2,dz2dy2,dy2dz1,dz1dy1,dy1dw1):
total = 0
for a,b,d in zip(dcdz2,dz2dy2,dz1dy1):
total += a*b*dy2dz1*d*dy1dw1
return total/len(dcdz2)
# actual gradient descend
def gradient_descend(x,y,lr,epoch):
w1 = w2 = b1 = b2 = 2
for i in range(epoch):
y1 = y_calculation(x,w1,b1)
z1 = relu(y1)
y2 = y_calculation(z1,w2,b2)
z2 = sig(y2)
cost = mse(y,z2)
dcdz2 = dmse(y,z2)
dz2dy2 = d_of_sig(z2)
dy2dw2 = z1
dy2db2 = 1
dy2dz1 = w2
dz1dy1 = d_of_relu(z1)
dy1dw1 = x
w2 = w2 - lr * chain_addition_second_layer_with_weight(dcdz2,dz2dy2,dy2dw2)
b2 = b2 - lr * chain_addition_second_layer_with_bias(dcdz2,dz2dy2)
w1 = w1 - lr * chain_addition_first_layer_with_weight(dcdz2,dz2dy2,dy2dz1,dz1dy1,dy1dw1)
b1 = b1 - lr * chain_addition_first_layer_with_bias(dcdz2,dz2dy2,dy2dz1,dz1dy1)
return w2,b2,w1,b1,cost
print(gradient_descend(x,y,0.1,300))
If anyone replies the necessary changes, it wil be very helpful. Thanks in advance