I'm working on a binary classification problem using the pre-trained model Resnet50, I got a good result on training and validation accuracy using model.evaluate
Training Data : loss: 0.0271 accuracy: 0.9851
Validation data : Validation Loss: 0.027066389098763466 Validation Accuracy: 0.9850746393203735
But when I use classification_report from sklearn.metrics I got :
validation accuracy = 0.5522388059701493
and for the test, I got poor results : Test Loss: 4.279370307922363 Test Accuracy: 0.5
I wonder why there is a difference between the validation accuracy using different evaluation method, and which method should I use in my case?
Any suggestion ? Thank you for your help
This is my code :
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import os
import numpy as np
import matplotlib.pyplot as plt
"""Global Constants"""
NUM_CLASSES = 1
IMAGE_RESIZE = 224
RESNET50_POOLING_AVERAGE = 'avg'
DENSE_LAYER_ACTIVATION = 'sigmoid'
OBJECTIVE_FUNCTION = 'binary_crossentropy'
LOSS_METRICS = ['accuracy']
NUM_EPOCHS = 10
EARLY_STOP_PATIENCE = 3
BATCH_SIZE_TRAINING = 10
BATCH_SIZE_VALIDATION = 10
BATCH_SIZE_TESTING = 1
"""Resnet50 model"""
#Define Transfer Learning Network Model Consisting of 2 Layers
model = Sequential()
model.add(ResNet50(include_top = False, pooling = RESNET50_POOLING_AVERAGE, weights = 'imagenet'))
model.add(Dense(NUM_CLASSES, activation = DENSE_LAYER_ACTIVATION))
model.layers[0].trainable = False
model.summary()
#Compile Transfer Learning Model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer = sgd, loss=OBJECTIVE_FUNCTION, metrics=LOSS_METRICS)
image_size = IMAGE_RESIZE
#Prepare Keras Data Generators
data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = data_generator.flow_from_directory(
'splitDefectData/train',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
shuffle=True,
class_mode='binary')
validation_generator = data_generator.flow_from_directory(
'splitDefectData/valid',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_VALIDATION,
shuffle=False,
class_mode='binary')
(BATCH_SIZE_TRAINING, len(train_generator), BATCH_SIZE_VALIDATION, len(validation_generator))
"""Train Our Model With Defectueux & NonDefectueux Train (splitted) Data Set"""
cb_early_stopper = EarlyStopping(monitor = 'val_loss', patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = 'best_model_1.hdf5', monitor = 'val_loss',
save_best_only = True, mode = 'auto')
history = model.fit(
train_generator,
steps_per_epoch=len(train_generator),
epochs = NUM_EPOCHS,
validation_data=validation_generator,
validation_steps=len(validation_generator),
callbacks=[cb_checkpointer, cb_early_stopper]
)
model.load_weights("best_model_1.hdf5")
#Training Metrics
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')
plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()
#Model evaluation on validation data
validation_generator.reset()
validation_pred = model.predict(validation_generator, steps=len(validation_generator))
# Get predicted labels
predicted_labels = np.argmax(validation_pred, axis=1)
# Evaluate the model on the validation set
validation_loss, validation_accuracy = model.evaluate(validation_generator)
# Get true labels from the validation generator
true_labels = validation_generator.classes
# Calculate classification report
report = classification_report(true_labels, predicted_labels, target_names=validation_generator.class_indices.keys())
print("Validation Loss:", validation_loss)
print("Validation Accuracy:", validation_accuracy)
print("Classification Report:\n", report)
# Plot confusion matrix
# Get class indices
class_indices = validation_generator.class_indices
# Get class labels
class_labels = list(class_indices.keys())
conf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)
for i in range(len(class_labels)):
for j in range(len(class_labels)):
plt.text(j, i, conf_matrix[i, j], ha="center", va="center", color="white" if conf_matrix[i, j] > conf_matrix.max() / 2 else "black")
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.show()
# Calculate accuracy
accuracy = np.mean(predicted_labels == true_labels)
print("Accuracy:", accuracy)
"""# Test Data"""
test_generator = data_generator.flow_from_directory(
directory = 'splitDefectData/test',
target_size = (image_size, image_size),
batch_size = BATCH_SIZE_TESTING,
shuffle = False,
seed = 123
)
#Predict on test data
test_pred = model.predict(test_generator, steps=len(test_generator))
# Get predicted labels
predicted_labels = np.argmax(test_pred, axis=1)
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)
# Get true labels from the test generator
true_labels = test_generator.classes
# Calculate classification report
report = classification_report(true_labels, predicted_labels, target_names=test_generator.class_indices.keys())
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)
print("Classification Report:\n", report)
# Plot confusion matrix
# Get class indices
class_indices = test_generator.class_indices
# Get class labels
class_labels = list(class_indices.keys())
conf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)
for i in range(len(class_labels)):
for j in range(len(class_labels)):
plt.text(j, i, conf_matrix[i, j], ha="center", va="center", color="white" if conf_matrix[i, j] > conf_matrix.max() / 2 else "black")
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.show()
# Calculate accuracy
accuracy = np.mean(predicted_labels == true_labels)
print("Accuracy:", accuracy)
#Display the results
filenames = test_generator.filenames
true_labels = test_generator.classes
class_labels = test_generator.class_indices
# Get the actual classes corresponding to the labels.
class_names = list(class_labels.keys())
# Get the model predictions on test data
predicted_labels = np.argmax(test_pred, axis=1)
num_rows = len(filenames) // 2 + 1
num_cols = 2
# Display the images, predictions, and actual labels.
plt.figure(figsize=(10, 40))
for i in range(len(filenames)):
img = plt.imread(os.path.join(test_generator.directory, filenames[i]))
plt.subplot(num_rows, num_cols, i+1)
plt.imshow(img)
plt.title('Predicted: {}, Actual: {}'.format(class_names[predicted_labels[i]], class_names[true_labels[i]]))
plt.axis('off')
plt.tight_layout()
plt.show()
The accuracy and loss for validation data using validation_loss, validation_accuracy = model.evaluate(validation_generator)
using from sklearn.metrics import classification_report I got this results :
I expect to get the same results using different evaluation methods