When keras trains the model, the Training loss converges quickly, but the validation loss stays around 300?

30 views Asked by At

I am working with a VGG-16 to do some image regression task, and I got this wired result: enter image description here

and I can show you my code:

dataset part:

from keras.preprocessing.image import ImageDataGenerator
import pandas as pd


BATCH_SIZE = 32
IMG_HEIGHT, IMG_WIDTH = 280, 280
train_path = '/db/psxrd3/resize(1024^2)/train.xlsx'
train_image_path = "/db/psxrd3/resize(1024^2)/train(2800)/"
test_path = '/db/psxrd3/resize(1024^2)/test.xlsx'
test_image_path = "/db/psxrd3/resize(1024^2)/test(2800)/"


def append_ext(index):
    img_name = 'Pattern_FeatureIdx_' + str(index) + '_TopoUnit.png'
    return img_name

traindf = pd.read_excel(train_path, engine = 'openpyxl')
testdf = pd.read_excel(test_path, engine = 'openpyxl')

traindf["FeatID"]=traindf["FeatID"].apply(append_ext)
testdf["FeatID"]=testdf["FeatID"].apply(append_ext)
datagen=ImageDataGenerator(rescale=1./255,validation_split=0.25)
gen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_dataframe( dataframe = traindf,
                                           directory = train_image_path,
                                           x_col = 'FeatID',
                                           y_col = 'Attachment',
                                           class_mode= 'raw',
                                           batch_size=32,
                                           shuffle=True,
                                           color_mode= 'grayscale',
                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                           subset ='training',
                                           interpolation='nearest')

#Use class_mode="raw" when the label column you're using has the actual raw class values that you intend to use as the training label. For example, if you're doing a regression task or ordinal regression and you have floating point numbers or integers as your columns. In that case, you must make sure that the actual numerical values are what you want your final labels to be.

valid_generator = datagen.flow_from_dataframe( dataframe = traindf,
                                           directory = train_image_path,
                                           x_col = 'FeatID',
                                           y_col = 'Attachment',
                                           class_mode= 'raw',
                                           batch_size=32,
                                           shuffle=False,    #True
                                           color_mode= 'grayscale',
                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                           subset ='validation',
                                           interpolation='nearest')

test_generator = gen.flow_from_dataframe( dataframe= testdf,
                                           directory= test_image_path,
                                           x_col='FeatID',
                                           y_col='Attachment',
                                           class_mode= 'raw',
                                           batch_size=1,
                                           shuffle=True,
                                           color_mode= 'grayscale',
                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                           interpolation='nearest')

the model part:

def VGG_16():
    model = Sequential()
    model.add(Conv2D(64, (3, 3), strides=(1, 1), input_shape=(280, 280, 1), padding='same', activation='relu',
                     kernel_initializer='uniform')) #input_shape=(2800, 2800, 1)
    model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 2), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    # model.add(Dense(8, activation='softmax')) 
    model.add(Dense(1))
    model.add(Activation('linear'))

    return model

the training part:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import keras
import torch
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras import applications
from dataset_balance import train_generator,test_generator,valid_generator
from VGG_16 import VGG_16
import matplotlib.pyplot as plt
from keras import callbacks
import tensorflow as tf

def main():
    torch.cuda.set_device(0)

    #load dataset
    train_gene = train_generator
    test_gene = test_generator

    #using model
    model = VGG_16()
    #model.to('cuda')

    # initiate RMSprop optimizer
    opt = keras.optimizers.RMSprop(lr=0.0001, decay=1e-6)

    # Let's train the model using RMSprop,loss function is MSE
    model.compile(loss='mse', optimizer=opt, )

    #train
    STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size +1
    STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size +1
    STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

    model_save_path = '/home/psxrd3/CNN Regressor/practice'
    filepath = "model_{epoch:02d}.h5"
    checkpoint = keras.callbacks.ModelCheckpoint(os.path.join(model_save_path,filepath), save_weights_only=True,
                                                 save_freq='epoch')


    history = model.fit(x=train_generator,
              batch_size=32,
              epochs=150,
              validation_data=valid_generator,
              steps_per_epoch=STEP_SIZE_TRAIN,
              validation_steps=STEP_SIZE_VALID,
              callbacks=[checkpoint])

    #save model
    model.summary()
    # Save model and weights
    model_path = '/home/psxrd3/CNN Regressor/practice'
    model.save(model_path)
    print('Saved trained model at %s ' % model_path)

if __name__ == '__main__':
    main()

test part:

model_path = '/home/psxrd3/CNN Regressor/model_unbalance2/model_100.h5'

model = VGG_16()
model.load_weights(model_path)   #.h5鏂囦欢

# Let's train the model using RMSprop,MSE
model.compile(loss='mse')


pred = model.predict_generator(test_generator,steps=STEP_SIZE_TEST,verbose=0)

scores=model.evaluate_generator(generator=test_generator)
print(scores)

Could you help me to figure out which part has problem and how should I solve it? Thank you in advance!

0

There are 0 answers