FineTuning Resnet50 for a binary classification problem and I got different validation accuracy using classification_report and model.evaluate

13 views Asked by At

I'm working on a binary classification problem using the pre-trained model Resnet50, I got a good result on training and validation accuracy using model.evaluate

Training Data : loss: 0.0271 accuracy: 0.9851

Validation data : Validation Loss: 0.027066389098763466 Validation Accuracy: 0.9850746393203735

But when I use classification_report from sklearn.metrics I got : validation accuracy = 0.5522388059701493

and for the test, I got poor results : Test Loss: 4.279370307922363 Test Accuracy: 0.5

I wonder why there is a difference between the validation accuracy using different evaluation method, and which method should I use in my case?

Any suggestion ? Thank you for your help

This is my code :

from tensorflow.keras.applications import ResNet50
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import os
import numpy as np
import matplotlib.pyplot as plt

"""Global Constants"""

NUM_CLASSES = 1

IMAGE_RESIZE = 224
RESNET50_POOLING_AVERAGE = 'avg'
DENSE_LAYER_ACTIVATION = 'sigmoid'
OBJECTIVE_FUNCTION = 'binary_crossentropy'

LOSS_METRICS = ['accuracy']

NUM_EPOCHS = 10
EARLY_STOP_PATIENCE = 3
BATCH_SIZE_TRAINING = 10
BATCH_SIZE_VALIDATION = 10
BATCH_SIZE_TESTING = 1


"""Resnet50 model"""
#Define Transfer Learning Network Model Consisting of 2 Layers
model = Sequential()
model.add(ResNet50(include_top = False, pooling = RESNET50_POOLING_AVERAGE, weights = 'imagenet'))
model.add(Dense(NUM_CLASSES, activation = DENSE_LAYER_ACTIVATION))
model.layers[0].trainable = False

model.summary()

#Compile Transfer Learning Model

sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer = sgd, loss=OBJECTIVE_FUNCTION, metrics=LOSS_METRICS)



image_size = IMAGE_RESIZE

#Prepare Keras Data Generators
data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)


train_generator = data_generator.flow_from_directory(
        'splitDefectData/train',
        target_size=(image_size, image_size),
        batch_size=BATCH_SIZE_TRAINING,
        shuffle=True,
        class_mode='binary')

validation_generator = data_generator.flow_from_directory(
        'splitDefectData/valid',
        target_size=(image_size, image_size),
        batch_size=BATCH_SIZE_VALIDATION,
        shuffle=False,
        class_mode='binary')

(BATCH_SIZE_TRAINING, len(train_generator), BATCH_SIZE_VALIDATION, len(validation_generator))


"""Train Our Model With Defectueux & NonDefectueux Train (splitted) Data Set"""


cb_early_stopper = EarlyStopping(monitor = 'val_loss', patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = 'best_model_1.hdf5', monitor = 'val_loss',
                                  save_best_only = True, mode = 'auto')



history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs = NUM_EPOCHS,
        validation_data=validation_generator,
        validation_steps=len(validation_generator),
        callbacks=[cb_checkpointer, cb_early_stopper]
)



model.load_weights("best_model_1.hdf5")

#Training Metrics
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

enter image description here

#Model evaluation on validation data
validation_generator.reset()

validation_pred = model.predict(validation_generator, steps=len(validation_generator))

# Get predicted labels
predicted_labels = np.argmax(validation_pred, axis=1)

# Evaluate the model on the validation set
validation_loss, validation_accuracy = model.evaluate(validation_generator)

# Get true labels from the validation generator
true_labels = validation_generator.classes

# Calculate classification report
report = classification_report(true_labels, predicted_labels, target_names=validation_generator.class_indices.keys())

print("Validation Loss:", validation_loss)
print("Validation Accuracy:", validation_accuracy)
print("Classification Report:\n", report)

# Plot confusion matrix
# Get class indices
class_indices = validation_generator.class_indices

# Get class labels
class_labels = list(class_indices.keys())

conf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)
for i in range(len(class_labels)):
    for j in range(len(class_labels)):
        plt.text(j, i, conf_matrix[i, j], ha="center", va="center", color="white" if conf_matrix[i, j] > conf_matrix.max() / 2 else "black")
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.show()

# Calculate accuracy
accuracy = np.mean(predicted_labels == true_labels)
print("Accuracy:", accuracy)

"""# Test Data"""

test_generator = data_generator.flow_from_directory(
    directory = 'splitDefectData/test',
    target_size = (image_size, image_size),
    batch_size = BATCH_SIZE_TESTING,
    shuffle = False,
    seed = 123
)


#Predict on test data
test_pred = model.predict(test_generator, steps=len(test_generator))

# Get predicted labels
predicted_labels = np.argmax(test_pred, axis=1)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)


# Get true labels from the test generator
true_labels = test_generator.classes

# Calculate classification report
report = classification_report(true_labels, predicted_labels, target_names=test_generator.class_indices.keys())

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)
print("Classification Report:\n", report)

# Plot confusion matrix
# Get class indices
class_indices = test_generator.class_indices

# Get class labels
class_labels = list(class_indices.keys())

conf_matrix = confusion_matrix(true_labels, predicted_labels)
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_labels))
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)
for i in range(len(class_labels)):
    for j in range(len(class_labels)):
        plt.text(j, i, conf_matrix[i, j], ha="center", va="center", color="white" if conf_matrix[i, j] > conf_matrix.max() / 2 else "black")
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.show()

# Calculate accuracy
accuracy = np.mean(predicted_labels == true_labels)
print("Accuracy:", accuracy)


#Display the results
filenames = test_generator.filenames
true_labels = test_generator.classes
class_labels = test_generator.class_indices

# Get the actual classes corresponding to the labels.
class_names = list(class_labels.keys())

# Get the model predictions on test data
predicted_labels = np.argmax(test_pred, axis=1)

num_rows = len(filenames) // 2 + 1
num_cols = 2

# Display the images, predictions, and actual labels.
plt.figure(figsize=(10, 40))
for i in range(len(filenames)):
    img = plt.imread(os.path.join(test_generator.directory, filenames[i]))
    plt.subplot(num_rows, num_cols, i+1)
    plt.imshow(img)
    plt.title('Predicted: {}, Actual: {}'.format(class_names[predicted_labels[i]], class_names[true_labels[i]]))
    plt.axis('off')

plt.tight_layout()
plt.show()

The accuracy and loss for validation data using validation_loss, validation_accuracy = model.evaluate(validation_generator)

enter image description here

using from sklearn.metrics import classification_report I got this results :

enter image description here

I expect to get the same results using different evaluation methods

0

There are 0 answers