I am training image classification model which contains 21000 image. I have created data pipeline with the help of tf.data API of tensorflow. My issue is that training is too slow despite using API. I have also enabled tensorflow gpu version. Please help me out.I thought first it was due to keras imagedatagenerator which is slowing down my training time but now when I changed it tf.data pipeline it still does not utilizes my gpu. Below is my whole code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50, EfficientNetB3, InceptionV3, DenseNet121
from tensorflow.keras.optimizers import Adam
# ignoring warnings
import warnings
warnings.simplefilter("ignore")
import os,cv2
base_dir = "D:/cassava-leaf-disease-classification/"
train_csv = pd.read_csv("D:/cassava-leaf-disease-classification/train.csv")
# print(train_csv.head())
df_sample = pd.read_csv("D:/cassava-leaf-disease-classification/sample_submission.csv")
train_images = "D:/cassava-leaf-disease-classification/train_images/"+train_csv['image_id']
# print(train_images)
# print(os.listdir(train_images))
train_labels = pd.read_csv(os.path.join(base_dir, "train.csv"))
# print(train_labels)
BATCH_SIZE = 16
EPOCHS = 25
STEPS_PER_EPOCH = len(train_labels)*0.8 / BATCH_SIZE
TARGET_SIZE = 300
# train_labels['label'] = train_labels.label.astype('str')
labels = train_labels.iloc[:,-1].values
# print(labels)
def build_decoder(with_labels=True, target_size=(TARGET_SIZE, TARGET_SIZE), ext='jpg'):
def img_decode(img_path):
file_bytes = tf.io.read_file(img_path)
if ext == 'png':
img = tf.image.decode_png(file_bytes, channels=3)
elif ext in ['jpg', 'jpeg']:
img = tf.image.decode_jpeg(file_bytes, channels=3)
else:
raise ValueError("Image extension not supported")
img = tf.cast(img, tf.float32) / 255.0
img = tf.image.resize(img, target_size)
return img
def decode_with_labels(img_path, label):
return img_decode(img_path), label
if with_labels == True:
return decode_with_labels
else:
return img_decode
def build_augmenter(with_labels=True):
def augment(img):
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
img = tf.image.random_brightness(img, 0.1)
img = tf.image.random_contrast(img, 0.9, 1.1)
img = tf.image.random_saturation(img, 0.9, 1.1)
return img
def augment_with_labels(img, label):
return augment(img), label
if with_labels == True:
return augment_with_labels
else:
return augment
def build_dataset(paths, labels=None, bsize=32, cache=True,
decode_fn=None, augment_fn=None,
augment=True, repeat=True, shuffle=1024,
cache_dir=""):
if cache_dir != "" and cache is True:
os.makedirs(cache_dir, exist_ok=True)
if decode_fn is None:
decode_fn = build_decoder(labels is not None)
if augment_fn is None:
augment_fn = build_augmenter(labels is not None)
AUTO = tf.data.experimental.AUTOTUNE
slices = paths if labels is None else (paths, labels)
dset = tf.data.Dataset.from_tensor_slices(slices)
dset = dset.map(decode_fn, num_parallel_calls=AUTO)
# dset = dset.cache(cache_dir) if cache else dset
dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
dset = dset.repeat() if repeat else dset
dset = dset.shuffle(shuffle) if shuffle else dset
dset = dset.batch(bsize).prefetch(AUTO)
return dset
# Train test split
(train_img, valid_img,train_labels,valid_labels) = train_test_split(train_images,labels,train_size = 0.8,random_state = 0)
# print(train, valid)
# Tensorflow datasets
train_df = build_dataset(
train_img, train_labels, bsize=BATCH_SIZE,
cache=True)
valid_df = build_dataset(
valid_img, valid_labels, bsize=BATCH_SIZE,
repeat=False, shuffle=False, augment=False,
cache=True)
def create_model():
model = models.Sequential()
model.add(EfficientNetB3(include_top=False, weights='imagenet',
input_shape=(TARGET_SIZE,TARGET_SIZE,3)))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(5,activation='softmax'))
model.compile(optimizer=Adam(lr=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
model = create_model()
model.summary()
model_save = ModelCheckpoint('C:/Users/rosha/PycharmProjects/CLDD/saved_Models/EffNetB3_300_16_best_weights.h5',
save_best_only=True,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
verbose=1
)
early_stop = EarlyStopping(monitor='val_accuracy',
min_delta=0.001,
patience=5,
mode='max',
verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',
factor=0.3,
patience=2,
min_delta=0.001,
mode='max',
verbose=1)
history = model.fit(
train_df,
validation_data=valid_df,
steps_per_epoch=STEPS_PER_EPOCH,
epochs=EPOCHS,
callbacks=[model_save, early_stop, reduce_lr],
verbose=1,
)
plt.rcParams.update({'font.size': 16})
hist = pd.DataFrame(history.history)
fig, (ax1, ax2) = plt.subplots(figsize=(12, 12), nrows=2, ncols=1)
hist['loss'].plot(ax=ax1, c='k', label='training loss')
hist['val_loss'].plot(ax=ax1, c='r', linestyle='--', label='validation loss')
ax1.legend()
hist['accuracy'].plot(ax=ax2, c='k', label='training accuracy')
hist['val_accuracy'].plot(ax=ax2, c='r', linestyle='--', label='validation accuracy')
ax2.legend()
plt.show()
model.save('./EffNetB3_300_16.h5')
So here is a small checklist I like to go over:
If the output is "Num GPUs Available: 0", then you should check that you indeed have tensorflow-gpu installed, you might also want to check that support libraries are also in the gpu version.
If your libraries are correct you will need to check to see if your CUDA driver installation is correct. This step is somewhat OS dependent but there are many tutorials online for both. My favourite for TF can be found on the official website: https://www.tensorflow.org/install/gpu