Implement SGD Classifier with Logloss and L2 regularization Using SGD without using sklearn

1.2k views Asked by At
X, y = make_classification(n_samples=50000, n_features=15, n_informative=10, n_redundant=5,
                           n_classes=2, weights=[0.7], class_sep=0.7, random_state=15)

initialize weights

def initialize_weights(dim):
    ''' In this function, we will initialize our weights and bias''' 
    w = np.zeros_like(1,dim)
    b = 0
    return w,b

compute sigmoid

def sigmoid(z):
    ''' In this function, we will return sigmoid of z'''
    sig = 1/(1 + np.exp(-z))
    return sig

computeing log-loss

def logloss(y_true,y_pred):
    '''In this function, we will compute log loss '''
    n = (len(y_true))
    log_loss = (-1/n) * ((y_true * np.log10(y_pred)) + (1-y_true) * np.log10(1-y_pred)).sum()
    return log_loss

computing gradient w.r.t w

def gradient_dw(x,y,w,b,alpha,N):
    '''In this function, we will compute the gardient w.r.to w '''
    dw = (x*(y - sigmoid(np.dot(w.T,x) + b)) - ((alpha)*(1/N) * w)).sum()
    return dw

computing gradiend w.r.t b

def gradient_db(x,y,w,b):
    '''In this function, we will compute gradient w.r.to b '''
    db = y - sigmoid(np.dot(w.T,x) + b)
    return db

implementing logistic rgression

def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0, tol = 1e-3):
    ''' In this function, we will implement logistic regression'''
    #Here eta0 is learning rate
    #implement the code as follows
    # initalize the weights (call the initialize_weights(X_train[0]) function)
    # for every epoch
        # for every data point(X_train,y_train)
           #compute gradient w.r.to w (call the gradient_dw() function)
           #compute gradient w.r.to b (call the gradient_db() function)
           #update w, b
        # predict the output of x_train[for all data points in X_train] using w,b
        #compute the loss between predicted and actual values (call the loss function)
        # store all the train loss values in a list
        # predict the output of x_test[for all data points in X_test] using w,b
        #compute the loss between predicted and actual values (call the loss function)
        # store all the test loss values in a list
        # you can also compare previous loss and current loss, if loss is not updating then stop the process and return w,b
    
    w,b = initialize_weights(X_train[0])
    train_loss = []
    test_loss = []
    for e in range(epochs):
        for x,y in zip(X_train,y_train):
            dw = gradient_dw(x,w,y,b,alpha,N)
            db = gradient_db(x,y,w,b)
            w = w + (eta0 * dw)
            b = b + (eta0 * db)
        for i in X_train:
            y_pred = sigmoid(np.dot(w, i) + b)
            train_loss.append(logloss(y_train,y_pred))
        for j in X_test:
            y_pred_test = sigmoid(np.dot(w, j) + b)
            test_loss.append(logloss(y_test, y_pred_test))
    return w,b,train_loss,test_loss
alpha=0.0001
eta0=0.0001
N=len(X_train)
epochs=50
w,b,train_loss_arr,test_loss_arr = train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)

Ploting epoch number vs train , test loss

plt.plot(range(epochs),train_loss_arr, 'g', label = 'Training loss')
plt.plot(range(epochs),test_loss_arr, 'g', label = 'Test loss')
plt.title('Epoch vs Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

Error while plotting the epoch vs train_loss/test_loss

ValueError                                Traceback (most recent call last)
<ipython-input-138-7d80027b5139> in <module>
      1 import matplotlib.pyplot as plt
----> 2 plt.plot(range(epochs),train_loss_arr, 'g', label = 'Training loss')
      3 plt.plot(range(epochs),test_loss_arr, 'g', label = 'Test loss')
      4 plt.title('Epoch vs Training Loss')
      5 plt.xlabel('Epoch')

~\anaconda3\lib\site-packages\matplotlib\pyplot.py in plot(scalex, scaley, data, *args, **kwargs)
   2838 @_copy_docstring_and_deprecators(Axes.plot)
   2839 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2840     return gca().plot(
   2841         *args, scalex=scalex, scaley=scaley,
   2842         **({"data": data} if data is not None else {}), **kwargs)

~\anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, scalex, scaley, data, *args, **kwargs)
   1741         """
   1742         kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1743         lines = [*self._get_lines(*args, data=data, **kwargs)]
   1744         for line in lines:
   1745             self.add_line(line)

~\anaconda3\lib\site-packages\matplotlib\axes\_base.py in __call__(self, data, *args, **kwargs)
    271                 this += args[0],
    272                 args = args[1:]
--> 273             yield from self._plot_args(this, kwargs)
    274 
    275     def get_next_color(self):

~\anaconda3\lib\site-packages\matplotlib\axes\_base.py in _plot_args(self, tup, kwargs)
    397 
    398         if x.shape[0] != y.shape[0]:
--> 399             raise ValueError(f"x and y must have same first dimension, but "
    400                              f"have shapes {x.shape} and {y.shape}")
    401         if x.ndim > 2 or y.ndim > 2:

ValueError: x and y must have same first dimension, but have shapes (50,) and (1875000,)

I am confused about the code for logistic regression, if its correct then how should I plot the epoch vs train_loss/test_loss. For every epoch there should be one loss and don't know what changes should i make in my code to plot it.

1

There are 1 answers

0
Yash Vyas On

Try this: I added 2 lists to add the training predicted values and testing predicted values to help in iterating, rest looks good. Also while iterating for y_pred (in your code) you need to consider len(X_train) and not just X_train :

 for i in range(epochs):
        train_pred = []
        test_pred = []
        for j in range(N):
            dw = gradient_dw(X_train[j],y_train[j],w,b,alpha,N)
            db = gradient_db(X_train[j],y_train[j],w,b)
            w = w + (eta0 * dw)
            b = b + (eta0 * db)
        for val in range(N):
            train_pred.append(sigmoid(np.dot(w, X_train[val]) + b))
            
        loss1 = logloss(y_train, train_pred)
        train_loss.append(loss1)
            
        for val in range(len(X_test)):
            test_pred.append(sigmoid(np.dot(w, X_test[val]) + b))
            
        loss2 = logloss(y_test, test_pred)
        test_loss.append(loss2)
        
    return w,b,train_loss,test_loss