Backpropagation in Neural Network for XOR data

50 views Asked by At

I need to implement a Neural Network with only numpy, which gets two inputs, has one hidden layer, which uses ReLU as activation function, and one ouput layer, which uses sigmoid as activation. The loss I need to use is cross binary entropy. When I train my NN the output for every input is around 0.5. I think my problem is the backpropagation, I'm unsure if I implemented it correct. Or the error is somewhere else in my code. Maybe someone can help me.

import numpy as np
from tqdm import tqdm

X = np.array([[0, 0],
             [0, 1],
             [1, 0],
             [1, 1]])

y = np.array([[0], 
             [1],
             [1],
             [0]])

def ReLU(x):
    return np.maximum(0, x)


def d_ReLU(x):
    return np.where(x > 0, 1, 0)


def sigmoid(x):
    return 1/(1 + np.exp(-x))


def d_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))


def binary_cross_entropy(y, y_pred):
    loss = np.mean(-(y * np.log(y_pred) + (1 - y) * np.log(1-y_pred)))
    return loss

def d_binary_cross_entropy(y, y_pred):
    loss = np.where(y == 1, -1/y_pred, 1/(1 - y_pred))
    return loss

class NeuralNetwork():
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.weights_hidden = np.random.uniform(size=(input_size, hidden_size))
        self.weights_output = np.random.uniform(size=(hidden_size, output_size))
        

    def forward_pass(self, X):
        self.output_hidden = ReLU(np.dot(X, self.weights_hidden))
        output = sigmoid(np.dot(self.output_hidden, self.weights_output))
        return output
    
    
    def backward_pass(self, X, y, y_pred):
        output_delta = d_binary_cross_entropy(y, y_pred) *  d_sigmoid(y_pred)
    
        hidden_error = output_delta.dot(self.weights_output.T)
        hidden_delta = hidden_error * d_ReLU(self.output_hidden)

        self.weights_output -= self.learning_rate * self.output_hidden.T.dot(output_delta)
        self.weights_hidden -= self.learning_rate * X.T.dot(hidden_delta)
        
        
    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward_pass(X)
            loss = binary_cross_entropy(y, output)
            self.backward_pass(X, y, output)
            print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}")
        print(self.forward_pass(X))
1

There are 1 answers

0
Harun Cetin On

Some corrections are needed in your code:

The first is if you return directly 0 in your ReLU function, you lost gradients. You sould multiply the return value from ReLU with relatively small number i.e. 0.0001 to avoid this issue.

Second, you should select smaller learning rate and bigger epoch to get proper results.

Corrected and working code as follows:

import numpy as np
from tqdm import tqdm

X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], 
              [1],
              [1],
              [0]])

def ReLU(x):
    return np.maximum(x * 0.00001, x)


def d_ReLU(x):
    return np.where(x > 0, 1, 0)


def sigmoid(x):
    return 1/(1 + np.exp(-x))


def d_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))


def binary_cross_entropy(y, y_pred):
    loss = np.mean(-(y * np.log(y_pred) + (1 - y) * np.log(1-y_pred)))
    return loss

def d_binary_cross_entropy(y, y_pred):
    loss = np.where(y == 1, -1/y_pred, 1/(1 - y_pred))
    return loss

class NeuralNetwork():
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.weights_hidden = np.random.uniform(size=(input_size, hidden_size))
        self.weights_output = np.random.uniform(size=(hidden_size, output_size))
    

    def forward_pass(self, X):
        self.output_hidden = ReLU(np.dot(X, self.weights_hidden))
        output = sigmoid(np.dot(self.output_hidden, self.weights_output))
        return output


    def backward_pass(self, X, y, y_pred):
        output_delta = d_binary_cross_entropy(y, y_pred) *  d_sigmoid(y_pred)

        hidden_error = output_delta.dot(self.weights_output.T)
        hidden_delta = hidden_error * d_ReLU(self.output_hidden)

        self.weights_output -= self.learning_rate * self.output_hidden.T.dot(output_delta)
        self.weights_hidden -= self.learning_rate * X.T.dot(hidden_delta)
    
    
    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward_pass(X)
            loss = binary_cross_entropy(y, output)
            self.backward_pass(X, y, output)
            print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}")
        r = self.forward_pass(X)
        x = [0 if i <= 0.5 else 1 for i in r]
        print(r)
        print(x)
    
nn = NeuralNetwork(2, 10, 1, 0.001)

nn.train(X, y, 10000)