class BaseModel(tf.keras.Model):
def __init__(self):
super(BaseModel, self).__init__()
self.dense1 = Dense(256, input_shape=(152,))
self.batch_norm1 = BatchNormalization()
self.activation1 = Activation(tf.keras.layers.LeakyReLU(alpha=0.3))
self.dropout1 = Dropout(0.2)
self.dense2 = Dense(128, activation=tf.keras.layers.LeakyReLU(alpha=0.3))
self.dropout2 = Dropout(0.2)
self.dense3 = Dense(512, activation=tf.keras.layers.LeakyReLU(alpha=0.3))
self.batch_norm2 = BatchNormalization()
self.dropout3 = Dropout(0.2)
self.dense4 = Dense(512, activation=tf.keras.layers.LeakyReLU(alpha=0.3))
self.batch_norm3 = BatchNormalization()
self.dropout4 = Dropout(0.3)
self.dense5 = Dense(2048, activation=tf.keras.layers.LeakyReLU(alpha=0.3))
self.batch_norm4 = BatchNormalization()
self.dropout5 = Dropout(0.3)
self.dense6 = Dense(1024, activation=tf.keras.layers.LeakyReLU(alpha=0.3))
self.batch_norm5 = BatchNormalization()
self.dropout6 = Dropout(0.3)
self.dense7 = Dense(1024, activation=tf.keras.layers.LeakyReLU(alpha=0.3))
self.batch_norm6 = BatchNormalization()
self.dropout7 = Dropout(0.3)
self.dense8 = Dense(18211, activation="linear")
def call(self, inputs):
x = self.dense1(inputs)
x = self.batch_norm1(x)
x = self.activation1(x)
x = self.dropout1(x)
x = self.dense2(x)
x = self.dropout2(x)
x = self.dense3(x)
x = self.batch_norm2(x)
x = self.dropout3(x)
x = self.dense4(x)
x = self.batch_norm3(x)
x = self.dropout4(x)
x = self.dense5(x)
x = self.batch_norm4(x)
x = self.dropout5(x)
x = self.dense6(x)
x = self.batch_norm5(x)
x = self.dropout6(x)
x = self.dense7(x)
x = self.batch_norm6(x)
x = self.dropout7(x)
return self.dense8(x)
# Instantiate the model
base_model = BaseModel()
class MAMLTrainer:
def __init__(self, model, loss = loss_function, lr_inner=0.01, log_steps=1000):
self.model = model
self.loss_function = loss_function
self.lr_inner = lr_inner
self.log_steps = log_steps
self.optimizer = tf.keras.optimizers.Adam()
def copy_model(self, model, x):
copied_model = tf.keras.models.clone_model(self.model)
copied_model(x) # Initialize weights
copied_model.set_weights(model.get_weights())
return copied_model
def train_maml(self, x_train, y_train, epochs=1, batch_size=1):
for _ in range(epochs):
total_loss = 0
losses = []
start = time.time()
# Split x and y into batches
x_batches = [x_train[i:i + batch_size] for i in range(0, len(x_train), batch_size)]
y_batches = [y_train[i:i + batch_size] for i in range(0, len(y_train), batch_size)]
# Use tqdm to create a progress bar
for x_batch, y_batch in tqdm(zip(x_batches, y_batches), total=len(x_batches), unit="batch"):
x = tf.convert_to_tensor(x_batch, dtype=tf.float32)
y = tf.convert_to_tensor(y_batch, dtype=tf.float32)
with tf.GradientTape() as test_tape:
with tf.GradientTape() as train_tape:
y_pred = self.model(x)
train_loss = self.loss_function(y, y_pred)
gradients = train_tape.gradient(train_loss, self.model.trainable_variables)
k = 0
model_copy = self.copy_model(self.model, x)
for layer, gradient in zip(model_copy.layers, gradients):
if isinstance(layer, tf.keras.layers.BatchNormalization):
continue # Skip BatchNormalization layers
if isinstance(layer, (tf.keras.layers.Activation, tf.keras.layers.LeakyReLU)):
continue # Skip Activation layers
if isinstance(layer, tf.keras.layers.Dropout):
continue # Skip Dropout layers
# Scale the gradients to match the shape of the layer's weights
scaled_gradients = gradient * layer.trainable_weights[0] # Assuming the first trainable weight is the kernel
layer.trainable_weights[0].assign(layer.trainable_weights[0] - self.lr_inner * scaled_gradients)
test_loss = self.loss_function(y, model_copy(x))
gradients = test_tape.gradient(test_loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
total_loss += test_loss
loss = total_loss / len(x_batches) # Use the batch count instead of (i+1.0)
losses.append(loss)
if len(losses) % self.log_steps == 0 and len(losses) > 0:
print('Step {}: loss = {}, Time to run {} steps = {}'.format(len(losses), loss, self.log_steps, time.time() - start))
start = time.time()
return self.model
This is the code that I'm trying to implement for meta-learning using MAML algorithm on the base model. But it is giving me incompatibility error. The strange thing to notice here is model is working for first three layers of base-model(until 1024 neurons Dense layer is encountered.)
Please ignore the complexity of base-model, I was just playing around with base-model. I tried by keeping only first three layers with same neuron configuration given in the above code. But output layer has 18211 neuron which cannot be removed(expected output). When I keep first three layers and output layer, it gives me error on last layer.
InvalidArgumentError: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [512] vs. [512,18211] [Op:Mul]
And don't know why this is happening on the layer with large number of neurons only. I doubt there is a problem in meta-learning
class. Please give the solution on this.
training data: features: (429, 152) labels: (429, 18211)
Thank you in advance!