I am trying to plot macro-average for multiclass data. However, the plots do not start from (0,0). I am quite sure that, the code works well since when i plot micro-average ROC curves this problem is not occurring. I attach an example of the two plots micro and macro average for the same dataset, and classifier (Random Forest Classifier). The three curves compared are baseline and two oversampling methods applied to the same dataset. I notice, that this problem occurs more frequently with RFC (Random Forests), whereas less with NB and not occurring with DT.
macro enter image description here micro enter image description here
The code that i use for multi-class dataset, in plotting classifier performance on 3 different oversampled datasets d_probabiities is a dictionary of models and probabiliies.
def result_render_multiclass(d_probabilities,d_accuracies,y_test,title_set,model):
table_multi_micro = pd.DataFrame(columns = ['Classifier','fpr','tpr','auc'])
table_multi_macro = pd.DataFrame(columns = ['Classifier','fpr','tpr','auc'])
imcp_scores, auc_micro,auc_macro, accuracy= [], [], [],[]
list_metrics = []
n_classes = len(np.unique(y_test))
y_test_binarize = label_binarize(y_test, classes=np.arange(n_classes))
scores = {}
for model_name, model_proba in d_probabilities.items(): #iterating over 3 probabilities of 3 models
y_pred = model_proba
scores[model_name] = model_proba
fpr ,tpr ,roc_auc ,thresholds = dict(), dict(), dict() ,dict()
# micro-average
for i in range(n_classes):
fpr[i], tpr[i], thresholds[i] = roc_curve(y_test_binarize[:, i], y_pred[:, i], drop_intermediate=False)
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test_binarize.ravel(), y_pred.ravel())
roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"])
#aggregates all false positive rates
#all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
fpr_grid = np.linspace(0.0, 1.0, 1000)
# Then interpolate all ROC curves at this points
#mean_tpr = np.zeros_like(all_fpr)
mean_tpr = np.zeros_like(fpr_grid)
for i in range(n_classes):
mean_tpr += np.interp(fpr_grid, fpr[i], tpr[i])
# Finally average it and compute AUC
mean_tpr /= n_classes
fpr["macro"] = fpr_grid
tpr["macro"] = mean_tpr
roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])
# storing average-micro fpr, tpr, auc
row_micro = {'Classifier': model_name, 'fpr': fpr['micro'],'tpr':tpr['micro'],'auc':roc_auc['micro']}
table_multi_micro.loc[len(table_multi_micro)] = row_micro
# storing average-macro fpr, tpr, auc
row_macro = {'Classifier': model_name,'fpr':fpr['macro'],'tpr':tpr['macro'],'auc':roc_auc['macro']}
table_multi_macro.loc[len(table_multi_macro)] = row_macro
#appending AUC(ROC) for micro and macro average
auc_micro.append(roc_auc_score(y_test, y_pred, multi_class='ovr',average = 'micro' ))
auc_macro.append(roc_auc_score(y_test, y_pred, multi_class='ovr',average = 'macro' ))
#appending aimcp for (raw,smote,kde)
for acc in d_accuracies.values(): #appending average accuracies (over 10) for raw,smote,kde to list: 3 accuracies
accuracy.append(acc)
for acc_score,auc_micro,auc_macro, imcp_s in zip(accuracy,auc_micro,auc_macro,imcp_scores): #creating list containing acc,auc,imcp for each method sequentially
list_metrics.append(float(f'{acc_score:.3f}'))
list_metrics.append(float(f'{auc_micro:.3f}'))
list_metrics.append(float(f'{auc_macro:.3f}')) #auc micro #inserted new auc !! macro
return list_metrics, table_multi_macro, table_multi_micro
def multi_class_roc_save(title_set,table,model,save_folder,name = str()):
if not os.path.exists(save_folder):
os.makedirs(save_folder)
plt.figure(figsize=(8,6))
table.set_index('Classifier', inplace = True)
colors = ['navy','orange','green']
for i,color in zip(table.index,colors):
plt.plot(table.loc[i]['fpr'],
table.loc[i]['tpr'],
label="{}, AUC={:.3f}".format(i, table.loc[i]['auc']),color = color)
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel("Flase Positive Rate", fontsize=15)
plt.ylabel("True Positive Rate", fontsize=15)
plt.title('{}-average ROC curve - {}'.format(name, title_set), fontweight='bold', fontsize=15)
plt.legend(prop={'size':13}, loc='lower right')
file_name_macro = os.path.join(save_folder, '{}_{}_{}'.format(title_set,model,name))
plt.savefig(file_name_macro)
plt.close()
I was expecting to for macro-average ROC curves to start from 0.0
An example with Naive Bayes This is with NB Macro enter image description here micro enter image description here
How can i fiw this problem?