I am using custom data generator. I want to apply weighted average ensemble. The training set has 1042 samples, and validation indicates 298 samples. The batch size is 64. The following is customdatagenerator:
class CustomVoiceAugmentation(tf.keras.utils.Sequence):
def __init__(self, dir_data, data_df, num_classes, batch_size,
shuffle=None):
self.dir_data = dir_data
self.data_frame = data_df
self.data_len = len(data_df)
self.batch_size = batch_size
self.num_classes = num_classes
self.sr = 44100
self.mono = True
self.shuffle= shuffle
self.n = 0
self.max = self.__len__()
self.on_epoch_end()
print(f"Found {self.data_frame.shape[0]} voices belonging to {self.num_classes} classes")
def on_epoch_end(self):
if self.shuffle:
self.data_frame = self.data_frame.sample(frac=1).reset_index(drop=True)
def __len__(self):
return (self.data_len // self.batch_size)
def voice_augmentation(self,signalvoice):
augmented_voice = naa.LoudnessAug(zone = (random.uniform(0,0.5), random.uniform(0.5,1)),factor = (random.uniform(0,1),random.uniform(1,2))).augment(signalvoice)
augmented_voice = naa.MaskAug(sampling_rate = self.sr, zone = (random.uniform(0.4,0.5), random.uniform(0.5,0.6)),mask_with_noise = True).augment(augmented_voice)
augmented_voice = naa.NoiseAug(zone = (random.uniform(0,0.2), random.uniform(0.8,1)), coverage = random.uniform(0,1)).augment(augmented_voice)
augmented_voice = naa.PitchAug(sampling_rate = self.sr,zone = (0,1), factor= (random.uniform(-10,0),random.uniform(0,10))).augment(augmented_voice)
return augmented_voice
#------------------------------------------------------------------------------
def get_voice(self,index, batch_x, batch_y, dir_data):
X = []
Y = []
for i, name in enumerate(batch_x):
label = batch_y.iloc[i]
pathvoice = os.path.join(self.dir_data + '/'+ name)
signalvoice,_ = librosa.load(pathvoice, sr= self.sr)
X.append(signalvoice)
Y.append(label)
return X, Y
#-----------------------------------------------------------------------------
def scale_minmax (self,melspec):
scaler = MinMaxScaler()
melspec = np.asarray(melspec,dtype=np.float32)
minmax_melspec = scaler.fit_transform(melspec)
return minmax_melspec
def pad_image(self,img_array):
height = img_array.shape[1]
padded_array = np.pad(img_array,[(0,0),(0, 450 - height)],mode= 'mean')
return padded_array
def to_melspectrogram (self,augmented_voice_signal):
image = []
for j, signal in enumerate(augmented_voice_signal):
melspec = librosa.feature.melspectrogram(y=np.asarray(signal),n_mels=80,fmin=20,fmax=5000)
melspec = librosa.power_to_db(melspec, ref=np.max)
img = self.scale_minmax(melspec)
padded_array = self.__pad_image(img)
image.append(padded_array)
return image
def __getitem__(self, index):
cell= []
batch_x = self.data_frame["VoiceName"][index * self.batch_size:(index + 1) * self.batch_size]
batch_y = self.data_frame["label"][index * self.batch_size:(index + 1) * self.batch_size]
X ,Y = self.get_voice(index, batch_x, batch_y, self.dir_data)
augmented_voice = self.voice_augmentation(X)
image = self.to_melspectrogram(augmented_voice)
for ii in range(len(image)):
cell.append(image[ii])
cell = tf.convert_to_tensor(cell)
enc = LabelEncoder()
Y = enc.fit_transform(np.array(Y))
new_Y = to_categorical(Y, num_classes=2)
return cell, tf.convert_to_tensor(new_Y)
def __next__(self):
if self.n >= self.max:
self.n = 0
result = self.__getitem__(self.n)
self.n += 1
return result
this is the following part:
dir_train_processed= 'C:/.../NewTrain'
dir_val_processed = 'C:/.../NewVal'
dir_test_processed ='C:/.../NewTest'
new_train_df, num_classes_tr = DatatoDF(dir_train_processed, labels)
new_val_df, num_classes_val = DatatoDF(dir_val_processed, labels)
new_test_df, num_classes_test = DatatoDF(dir_test_processed, labels)
#------------------------------------------------------------------------------
batch_size=64
train_aug_gen = CustomVoiceAugmentation(dir_train_processed,data_df=new_train_df,
num_classes=num_classes_tr,
batch_size=64,
shuffle=True)
val_aug_gen = CustomVoiceAugmentation(dir_val_processed,data_df=new_val_df,
num_classes=num_classes_val,
batch_size=64,
shuffle=True)
train_batch = next(train_aug_gen)
val_batch= next(val_aug_gen)
model_1=cnn((80,87,3))
model_2= inceptionv3((80,87,3))
model_3= resnet50((80,87,3))
model_4= xception((80,87,3))
when I run this :
member1 = KerasMember(name="model1", keras_model=model_1, train_batches=train_batch, val_batches=val_batch)
2/2 [==============================] - 0s 48ms/step
2/2 [==============================] - 0s 65ms/step
it shows 2/2 and 2/2 which I think is incorrect. and it should be 16/16 for training set.
I checked train_batch = next(train_aug_gen)
and val_batches= next(val_aug_gen)
.
len(train_aug_gen): 16
and len(val_aug_gen): 4
Why len(train_batch)
is 2
?!!!
while train_batch
is:
(<tf.Tensor: shape=(64, 80, 87, 3), dtype=float32, numpy=array([[[[..........]],
[[.....]] ,......
]]]], dtype=float32)>,
<tf.Tensor: shape=(64, 2), dtype=float32, numpy=
array([[1., 0.],
[1., 0.],
[1., 0.],
[0., 1.],
[1., 0.],
[0., 1.],
[1., 0.],
....
....
[1., 0.]], dtype=float32)>)
I appreciate if anyone can help me to solve this mistake. Thanks.