X, y = make_classification(n_samples=50000, n_features=15, n_informative=10, n_redundant=5,
n_classes=2, weights=[0.7], class_sep=0.7, random_state=15)
initialize weights
def initialize_weights(dim):
''' In this function, we will initialize our weights and bias'''
w = np.zeros_like(1,dim)
b = 0
return w,b
compute sigmoid
def sigmoid(z):
''' In this function, we will return sigmoid of z'''
sig = 1/(1 + np.exp(-z))
return sig
computeing log-loss
def logloss(y_true,y_pred):
'''In this function, we will compute log loss '''
n = (len(y_true))
log_loss = (-1/n) * ((y_true * np.log10(y_pred)) + (1-y_true) * np.log10(1-y_pred)).sum()
return log_loss
computing gradient w.r.t w
def gradient_dw(x,y,w,b,alpha,N):
'''In this function, we will compute the gardient w.r.to w '''
dw = (x*(y - sigmoid(np.dot(w.T,x) + b)) - ((alpha)*(1/N) * w)).sum()
return dw
computing gradiend w.r.t b
def gradient_db(x,y,w,b):
'''In this function, we will compute gradient w.r.to b '''
db = y - sigmoid(np.dot(w.T,x) + b)
return db
implementing logistic rgression
def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0, tol = 1e-3):
''' In this function, we will implement logistic regression'''
#Here eta0 is learning rate
#implement the code as follows
# initalize the weights (call the initialize_weights(X_train[0]) function)
# for every epoch
# for every data point(X_train,y_train)
#compute gradient w.r.to w (call the gradient_dw() function)
#compute gradient w.r.to b (call the gradient_db() function)
#update w, b
# predict the output of x_train[for all data points in X_train] using w,b
#compute the loss between predicted and actual values (call the loss function)
# store all the train loss values in a list
# predict the output of x_test[for all data points in X_test] using w,b
#compute the loss between predicted and actual values (call the loss function)
# store all the test loss values in a list
# you can also compare previous loss and current loss, if loss is not updating then stop the process and return w,b
w,b = initialize_weights(X_train[0])
train_loss = []
test_loss = []
for e in range(epochs):
for x,y in zip(X_train,y_train):
dw = gradient_dw(x,w,y,b,alpha,N)
db = gradient_db(x,y,w,b)
w = w + (eta0 * dw)
b = b + (eta0 * db)
for i in X_train:
y_pred = sigmoid(np.dot(w, i) + b)
train_loss.append(logloss(y_train,y_pred))
for j in X_test:
y_pred_test = sigmoid(np.dot(w, j) + b)
test_loss.append(logloss(y_test, y_pred_test))
return w,b,train_loss,test_loss
alpha=0.0001
eta0=0.0001
N=len(X_train)
epochs=50
w,b,train_loss_arr,test_loss_arr = train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)
Ploting epoch number vs train , test loss
plt.plot(range(epochs),train_loss_arr, 'g', label = 'Training loss')
plt.plot(range(epochs),test_loss_arr, 'g', label = 'Test loss')
plt.title('Epoch vs Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
Error while plotting the epoch vs train_loss/test_loss
ValueError Traceback (most recent call last)
<ipython-input-138-7d80027b5139> in <module>
1 import matplotlib.pyplot as plt
----> 2 plt.plot(range(epochs),train_loss_arr, 'g', label = 'Training loss')
3 plt.plot(range(epochs),test_loss_arr, 'g', label = 'Test loss')
4 plt.title('Epoch vs Training Loss')
5 plt.xlabel('Epoch')
~\anaconda3\lib\site-packages\matplotlib\pyplot.py in plot(scalex, scaley, data, *args, **kwargs)
2838 @_copy_docstring_and_deprecators(Axes.plot)
2839 def plot(*args, scalex=True, scaley=True, data=None, **kwargs):
-> 2840 return gca().plot(
2841 *args, scalex=scalex, scaley=scaley,
2842 **({"data": data} if data is not None else {}), **kwargs)
~\anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, scalex, scaley, data, *args, **kwargs)
1741 """
1742 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D)
-> 1743 lines = [*self._get_lines(*args, data=data, **kwargs)]
1744 for line in lines:
1745 self.add_line(line)
~\anaconda3\lib\site-packages\matplotlib\axes\_base.py in __call__(self, data, *args, **kwargs)
271 this += args[0],
272 args = args[1:]
--> 273 yield from self._plot_args(this, kwargs)
274
275 def get_next_color(self):
~\anaconda3\lib\site-packages\matplotlib\axes\_base.py in _plot_args(self, tup, kwargs)
397
398 if x.shape[0] != y.shape[0]:
--> 399 raise ValueError(f"x and y must have same first dimension, but "
400 f"have shapes {x.shape} and {y.shape}")
401 if x.ndim > 2 or y.ndim > 2:
ValueError: x and y must have same first dimension, but have shapes (50,) and (1875000,)
I am confused about the code for logistic regression, if its correct then how should I plot the epoch vs train_loss/test_loss. For every epoch there should be one loss and don't know what changes should i make in my code to plot it.
Try this: I added 2 lists to add the training predicted values and testing predicted values to help in iterating, rest looks good. Also while iterating for
y_pred
(in your code) you need to considerlen(X_train)
and not justX_train
: