I am currently attempting to implement a feed-forward neural network with 1 hidden layer using numpy as a project for university. We are to use hyperbolic tangent activation functions for the hidden layer, and the function for the output layer is unspecified. The NN is to regress a sine wave.

I have already implemented this code for the XOR classification problem and it seemed to work fine. The sine wave regression code only has minor alterations.

I have been tinkering with the hyper parameters for hours now, added momentum (which only seems to make things worse), and have seen little success in getting it working.

For the project, we are to run regression with 3 hidden units, and then with 20. Both of these options seem to dramatically underfit the data. About half the time, it will yield just a horizontal line, and sometimes it will plot what appears to be a single tanh function.

The only thing that seems to make this thing work is to add an excessive amount of units to the hidden layer (Hn in the learn function). A learning rate of .1-.2, epoch count of 2000-5000, and 100-200 hidden units seems to regress the sine wave sort of well.

I've been messing with this for hours and frankly am out of ideas. Any help would be greatly appreciated.

Here's the code:

```
import numpy as np
import matplotlib.pyplot as plt
__name__ = "partB"
class partB:
def __init__(self):
pass
def propegate(self, Xs):
self.A1 = np.dot(self.W1, Xs)
self.Z1 = np.tanh(self.A1) + self.b1
self.A2 = np.dot(self.W2, self.Z1)
self.Z2 = np.power(1 + np.exp(-1 * self.A2), -1) + self.b2
predictions = self.Z2
return predictions
def backProp(self, momentumFac):
# Get gradients
dA2 = self.Z2 - self.targets
dW2 = (1 / self.m) * np.dot(dA2, self.A1.T)
db2 = (1 / self.m) * np.sum(dA2, axis = 1, keepdims = True)
dA1 = np.multiply(np.dot(self.W2.T, dA2), 1 - np.power(self.Z1, 2))
dW1 = (1 / self.m) * np.dot(dA1, self.data.T)
db1 = (1 / self.m) * np.sum(dA1, axis = 1, keepdims = True)
# print("dW2: ", dW2, " dA1: ", dA1)
self.W1 = momentumFac * self.W1 - self.learnRate * dW1
self.W2 = momentumFac * self.W2 - self.learnRate * dW2
self.b1 = self.b1 - self.learnRate * db1
self.b2 = self.b2 - self.learnRate * db2
def logLoss(self):
#prob = np.multiply(np.log(self.Z2), self.targets) + np.multiply(np.log(1 - self.Z2), (1 - self.targets))
cost = (1 / self.m) * np.sqrt(np.sum(np.square(self.Z2 - self.targets)))
#cost = - np.sum(prob) / self.m
return cost
def sigmoid(self, Z):
return np.float64(1) / (np.float64(1) + np.exp(-Z))
def learnBitch(self, learnRate, epochs, moment):
# Make data
perm = np.random.permutation(50)
dat = 2 * np.random.random_sample((1, 50)) - 1
targ = .5 * np.sin(2 * np.pi * dat) + .5 + .3 * np.random.random_sample((1, 50))
self.data = dat[:,perm]
self.targets = targ[:,perm]
# Make Ns
Xn = self.data.shape[0]
Yn = self.targets.shape[0]
Hn = 200
self.m = self.data.shape[1]
# Initialize Ws and bias vectors
cost = np.float64(0)
while (cost == 0):
self.W1 = np.random.random_sample((Hn, Xn))
self.W2 = np.random.random_sample((Yn, Hn))
self.b1 = np.zeros([Hn, 1])
self.b2 = np.zeros([Yn, 1])
self.learnRate = learnRate
itt = np.intc(0)
costArr = []
costXs = []
plt.clf()
costGraph = plt.figure()
costGraph.show()
costGraph.canvas.draw()
for itt in range(epochs):
self.propegate(self, self.data)
cost = self.logLoss(self)
self.backProp(self, moment)
print('cost: ', cost)
if itt % 25 == 0:
costArr.append(cost)
costXs.append(itt)
plt.plot(costXs, costArr)
costGraph.canvas.draw()
self.xVals = [np.linspace(-1, 1, 50)]
self.yVals = self.propegate(self, self.xVals)
print(self.xVals)
print(self.yVals)
plt.clf()
plt.scatter(self.data, self.targets)
plt.plot(np.squeeze(self.xVals), np.squeeze(self.yVals))
plt.show()
#self.plot_decision_boundary(self, lambda x: self.propegate(self, x), self.data, self.targets)
```