Linked Questions

Popular Questions

I am using custom data generator. I want to apply weighted average ensemble. The training set has 1042 samples, and validation indicates 298 samples. The batch size is 64. The following is customdatagenerator:

class CustomVoiceAugmentation(tf.keras.utils.Sequence):
    def __init__(self, dir_data, data_df, num_classes, batch_size,
                 shuffle=None):
        
        self.dir_data = dir_data
        self.data_frame = data_df
        self.data_len = len(data_df)
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.sr = 44100
        self.mono = True
        self.shuffle= shuffle
        self.n = 0
        self.max = self.__len__()
        self.on_epoch_end()
        
        print(f"Found {self.data_frame.shape[0]} voices belonging to {self.num_classes} classes") 
       
    def on_epoch_end(self):
        if self.shuffle:
            self.data_frame = self.data_frame.sample(frac=1).reset_index(drop=True)    
       
    def __len__(self):
        return (self.data_len // self.batch_size)
    
    def voice_augmentation(self,signalvoice):
        augmented_voice = naa.LoudnessAug(zone = (random.uniform(0,0.5), random.uniform(0.5,1)),factor = (random.uniform(0,1),random.uniform(1,2))).augment(signalvoice)  
        augmented_voice = naa.MaskAug(sampling_rate = self.sr, zone = (random.uniform(0.4,0.5), random.uniform(0.5,0.6)),mask_with_noise = True).augment(augmented_voice)
        augmented_voice = naa.NoiseAug(zone = (random.uniform(0,0.2), random.uniform(0.8,1)), coverage = random.uniform(0,1)).augment(augmented_voice)
        augmented_voice = naa.PitchAug(sampling_rate = self.sr,zone = (0,1), factor= (random.uniform(-10,0),random.uniform(0,10))).augment(augmented_voice)
        return augmented_voice
    
#------------------------------------------------------------------------------    
    def get_voice(self,index, batch_x, batch_y, dir_data):
        X = []
        Y = []
        for i, name in enumerate(batch_x):
            label = batch_y.iloc[i]
            pathvoice = os.path.join(self.dir_data + '/'+ name)
            signalvoice,_ = librosa.load(pathvoice, sr= self.sr)
            X.append(signalvoice)
            Y.append(label)
        return X, Y
    
    
 #-----------------------------------------------------------------------------   
    def scale_minmax (self,melspec):
       scaler = MinMaxScaler()
       melspec = np.asarray(melspec,dtype=np.float32)
       minmax_melspec = scaler.fit_transform(melspec)
       
       return minmax_melspec 
 
    def pad_image(self,img_array):
           height = img_array.shape[1]
           padded_array = np.pad(img_array,[(0,0),(0, 450 - height)],mode= 'mean')
           return padded_array
    
    def to_melspectrogram (self,augmented_voice_signal):
        image = []
        for j, signal in enumerate(augmented_voice_signal):
                melspec = librosa.feature.melspectrogram(y=np.asarray(signal),n_mels=80,fmin=20,fmax=5000)
                melspec = librosa.power_to_db(melspec, ref=np.max)
                img = self.scale_minmax(melspec)
                padded_array = self.__pad_image(img)
                image.append(padded_array)
                return image
    
    def __getitem__(self, index):
        cell= []
        batch_x = self.data_frame["VoiceName"][index * self.batch_size:(index + 1) * self.batch_size]
        batch_y = self.data_frame["label"][index * self.batch_size:(index + 1) * self.batch_size]
        X ,Y = self.get_voice(index, batch_x, batch_y, self.dir_data)
        augmented_voice = self.voice_augmentation(X)  
        image = self.to_melspectrogram(augmented_voice)
        for ii in range(len(image)):
            cell.append(image[ii]) 
        cell = tf.convert_to_tensor(cell)
        enc = LabelEncoder()
        Y = enc.fit_transform(np.array(Y))
        new_Y = to_categorical(Y, num_classes=2) 
        
        return cell, tf.convert_to_tensor(new_Y)
    
    def __next__(self):
       if self.n >= self.max:
          self.n = 0
       result = self.__getitem__(self.n)
       self.n += 1
       return result

this is the following part:

dir_train_processed= 'C:/.../NewTrain'
dir_val_processed = 'C:/.../NewVal'
dir_test_processed ='C:/.../NewTest'

new_train_df, num_classes_tr = DatatoDF(dir_train_processed, labels)
new_val_df, num_classes_val = DatatoDF(dir_val_processed, labels)   
new_test_df, num_classes_test = DatatoDF(dir_test_processed, labels) 
  
#------------------------------------------------------------------------------
batch_size=64
train_aug_gen = CustomVoiceAugmentation(dir_train_processed,data_df=new_train_df, 
                                        num_classes=num_classes_tr,
                                        batch_size=64,
                                        shuffle=True)
                                
val_aug_gen = CustomVoiceAugmentation(dir_val_processed,data_df=new_val_df,
                                      num_classes=num_classes_val,
                                      batch_size=64,
                                      shuffle=True)   

train_batch = next(train_aug_gen)
val_batch= next(val_aug_gen)

model_1=cnn((80,87,3))
model_2= inceptionv3((80,87,3))
model_3= resnet50((80,87,3))
model_4= xception((80,87,3))
                

when I run this :

member1 = KerasMember(name="model1", keras_model=model_1, train_batches=train_batch, val_batches=val_batch)

2/2 [==============================] - 0s 48ms/step
2/2 [==============================] - 0s 65ms/step

it shows 2/2 and 2/2 which I think is incorrect. and it should be 16/16 for training set.

I checked train_batch = next(train_aug_gen) and val_batches= next(val_aug_gen).
len(train_aug_gen): 16 and len(val_aug_gen): 4
Why len(train_batch) is 2 ?!!!

while train_batch is:

 (<tf.Tensor: shape=(64, 80, 87, 3), dtype=float32, numpy=array([[[[..........]],
 [[.....]] ,...... 

]]]], dtype=float32)>,
 <tf.Tensor: shape=(64, 2), dtype=float32, numpy=
 array([[1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        ....
        ....
        [1., 0.]], dtype=float32)>)

I appreciate if anyone can help me to solve this mistake. Thanks.

Related Questions