Non-frontal Facial landmark prediction

21 views Asked by At

I am currently working on a project in which the model is able to predict the landmark coordinates of e.g. right side facial landmark coordinates given the left side facial landmark coordinates as the input. I proceeded to using Conditional GAN but all efforts to arrive at a working model seem out of reach.

Here is my current state:

def build_generator():
    model_input = layers.Input(shape=(latent_dim,))
    landmark_input = layers.Input(shape=(landmark_dim,))

    x = layers.Concatenate()([model_input, landmark_input])
    x = layers.Dense(128, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(np.prod(landmark_shape), activation='tanh')(x)
    generated_img = layers.Reshape(landmark_shape)(x)

    generator = models.Model([model_input, landmark_input], generated_img, name='generator')
    return generator


# Define the Discriminator network
# Define the Discriminator network
def build_discriminator():
    img_input = layers.Input(shape=landmark_shape)
    landmark_input = layers.Input(shape=(landmark_dim,))

    x = layers.Reshape(target_shape=(np.prod(landmark_shape),))(img_input)
    x = layers.Concatenate()([x, landmark_input])
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(128, activation='relu')(x)
    validity = layers.Dense(1, activation='sigmoid')(x)

    discriminator = models.Model([img_input, landmark_input], validity, name='discriminator')
    discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])
    return discriminator


# Combine Generator and Discriminator into a GAN
def build_cgan(generator, discriminator):
    discriminator.trainable = False

    model_input = layers.Input(shape=(latent_dim,))
    landmark_input = layers.Input(shape=(landmark_dim,))
    generated_img = generator([model_input, landmark_input])
    validity = discriminator([generated_img, landmark_input])

    cgan = models.Model([model_input, landmark_input], validity, name='cgan')
    cgan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))
    return cgan


# Create left-to-right and right-to-left generators
generator_left_to_right = build_generator()
generator_right_to_left = build_generator()

# Build discriminator
discriminator = build_discriminator()

# Create left-to-right and right-to-left cGAN models
cgan_left_to_right = build_cgan(generator_left_to_right, discriminator)
cgan_right_to_left = build_cgan(generator_right_to_left, discriminator)

# Example training loop
epochs = 10000
batch_size = 16

for epoch in range(epochs):
    # Sample a batch of left and right facial landmarks
    left_batch = x_train_left[np.random.randint(0, x_train_left.shape[0], batch_size)]
    right_batch = y_train_right[np.random.randint(0, y_train_right.shape[0], batch_size)]

    # Generate images using left-to-right cGAN
    latent_variables = np.random.normal(size=(batch_size, latent_dim))
    generated_left_to_right = generator_left_to_right.predict([latent_variables, left_batch])

    # Train left-to-right cGAN
    d_loss_left_to_right_real = discriminator.train_on_batch([right_batch, left_batch], np.ones((batch_size, 1)))
    d_loss_left_to_right_fake = discriminator.train_on_batch([generated_left_to_right, left_batch],
                                                             np.zeros((batch_size, 1)))
    d_loss_left_to_right = 0.5 * np.add(d_loss_left_to_right_real, d_loss_left_to_right_fake)

    # Train right-to-left cGAN
    d_loss_right_to_left_real = discriminator.train_on_batch([left_batch, right_batch], np.ones((batch_size, 1)))
    d_loss_right_to_left_fake = discriminator.train_on_batch(
        [generator_right_to_left.predict([latent_variables, right_batch]), right_batch], np.zeros((batch_size, 1)))
    d_loss_right_to_left = 0.5 * np.add(d_loss_right_to_left_real, d_loss_right_to_left_fake)

    # Train cGANs
    g_loss_left_to_right = cgan_left_to_right.train_on_batch([latent_variables, left_batch], np.ones((batch_size, 1)))
    g_loss_right_to_left = cgan_right_to_left.train_on_batch([latent_variables, right_batch], np.ones((batch_size, 1)))

    # Print progress and save generated images
    if epoch % 100 == 0:
        print(
            f"{epoch} [D loss left-to-right: {d_loss_left_to_right[0]} | D accuracy left-to-right: {100 * d_loss_left_to_right[1]}")

        # Optionally, visualize generated images
        if epoch % 500 == 0:
            visualize_generated_images(generator_left_to_right, latent_dim, x_val_left[:3], landmark_shape,
                                       num_samples=3)

0

There are 0 answers