I am currently working on a project in which the model is able to predict the landmark coordinates of e.g. right side facial landmark coordinates given the left side facial landmark coordinates as the input. I proceeded to using Conditional GAN but all efforts to arrive at a working model seem out of reach.
Here is my current state:
def build_generator():
model_input = layers.Input(shape=(latent_dim,))
landmark_input = layers.Input(shape=(landmark_dim,))
x = layers.Concatenate()([model_input, landmark_input])
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(np.prod(landmark_shape), activation='tanh')(x)
generated_img = layers.Reshape(landmark_shape)(x)
generator = models.Model([model_input, landmark_input], generated_img, name='generator')
return generator
# Define the Discriminator network
# Define the Discriminator network
def build_discriminator():
img_input = layers.Input(shape=landmark_shape)
landmark_input = layers.Input(shape=(landmark_dim,))
x = layers.Reshape(target_shape=(np.prod(landmark_shape),))(img_input)
x = layers.Concatenate()([x, landmark_input])
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(128, activation='relu')(x)
validity = layers.Dense(1, activation='sigmoid')(x)
discriminator = models.Model([img_input, landmark_input], validity, name='discriminator')
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])
return discriminator
# Combine Generator and Discriminator into a GAN
def build_cgan(generator, discriminator):
discriminator.trainable = False
model_input = layers.Input(shape=(latent_dim,))
landmark_input = layers.Input(shape=(landmark_dim,))
generated_img = generator([model_input, landmark_input])
validity = discriminator([generated_img, landmark_input])
cgan = models.Model([model_input, landmark_input], validity, name='cgan')
cgan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))
return cgan
# Create left-to-right and right-to-left generators
generator_left_to_right = build_generator()
generator_right_to_left = build_generator()
# Build discriminator
discriminator = build_discriminator()
# Create left-to-right and right-to-left cGAN models
cgan_left_to_right = build_cgan(generator_left_to_right, discriminator)
cgan_right_to_left = build_cgan(generator_right_to_left, discriminator)
# Example training loop
epochs = 10000
batch_size = 16
for epoch in range(epochs):
# Sample a batch of left and right facial landmarks
left_batch = x_train_left[np.random.randint(0, x_train_left.shape[0], batch_size)]
right_batch = y_train_right[np.random.randint(0, y_train_right.shape[0], batch_size)]
# Generate images using left-to-right cGAN
latent_variables = np.random.normal(size=(batch_size, latent_dim))
generated_left_to_right = generator_left_to_right.predict([latent_variables, left_batch])
# Train left-to-right cGAN
d_loss_left_to_right_real = discriminator.train_on_batch([right_batch, left_batch], np.ones((batch_size, 1)))
d_loss_left_to_right_fake = discriminator.train_on_batch([generated_left_to_right, left_batch],
np.zeros((batch_size, 1)))
d_loss_left_to_right = 0.5 * np.add(d_loss_left_to_right_real, d_loss_left_to_right_fake)
# Train right-to-left cGAN
d_loss_right_to_left_real = discriminator.train_on_batch([left_batch, right_batch], np.ones((batch_size, 1)))
d_loss_right_to_left_fake = discriminator.train_on_batch(
[generator_right_to_left.predict([latent_variables, right_batch]), right_batch], np.zeros((batch_size, 1)))
d_loss_right_to_left = 0.5 * np.add(d_loss_right_to_left_real, d_loss_right_to_left_fake)
# Train cGANs
g_loss_left_to_right = cgan_left_to_right.train_on_batch([latent_variables, left_batch], np.ones((batch_size, 1)))
g_loss_right_to_left = cgan_right_to_left.train_on_batch([latent_variables, right_batch], np.ones((batch_size, 1)))
# Print progress and save generated images
if epoch % 100 == 0:
print(
f"{epoch} [D loss left-to-right: {d_loss_left_to_right[0]} | D accuracy left-to-right: {100 * d_loss_left_to_right[1]}")
# Optionally, visualize generated images
if epoch % 500 == 0:
visualize_generated_images(generator_left_to_right, latent_dim, x_val_left[:3], landmark_shape,
num_samples=3)