Face recognition with siamese network and triplet loss does not learning useful patterns

91 views Asked by At

i am new to this field and i am trying to make an alogrithm using triplet loss and siamese network to make a face recognition and the problem is that the loss value does not decrease lower than the margin of the triplet loss i've tried 4 networks that may solve the problem and i 've tried resnet50 network and i had the same issue tried to change the learning rate to lots of values and the issue is the same tried regulizations like dropout and did not change anything the loss value does not decrease and when i try to get the L2 distance between any 2 images the distance between are almost 0 ,i've tried this dataset to get the images https://www.kaggle.com/datasets/stoicstatic/face-recognition-dataset and i ran this code that i made read the images then i ran second code to transform the dataset to make me the anchors and positive and negatives this first code to read the images from the harddisk

#dataset_path is the full path to the dataset in my local hard drive
#iterations is a number of different people in the dataset 
iterations= len(os.listdir(dataset_path))
for main_path in range(iterations): 
    #then  i store the current path to use it later when i try to access individual image to read it 
    current_path = os.path.join(dataset_path,str(main_path))
    
    #each person in the main path has a 72 different/positive images
    for sub_path in range(len(os.listdir(current_path))):
        
        
        full_img_path = os.path.join(current_path,str(sub_path))
        
        #after getting the full path of the image i read it using cv2
        img = cv2.imread(full_img_path+".png")
        
        #some images in my previus dataset dimensions were wrong so i used this if statement to make sure that all of the dataset images are in the same  dims
        if img.shape ==(112,112,3):
            rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            resized_im = cv2.resize(rgb_image,(round(128/2),round(128/2)) )
            lst_imgs.append(resized_im)
    
    imgs[main_path] = lst_imgs.copy()
    lst_imgs.clear()

after storing the images in dictionary called imgs where each key is the main path and the value is a list of 72 images of the same person

i used this code to tranform to code to anchors , positives and negatives

anchors = []
positives = []
negatives = []

for main , sub in imgs.items():
    # for each key/person in the imgs dict i get the positive images and suffile them 
    choices = sub.copy()
    random.shuffle(choices)
    for choice in choices:
        
        #then i append the first iamge as an anchor for the other 72 positive images 
        anchors.append(sub[0])
        
        #and for each one of the 72 suffled choices i append the suffled one as a positive image
        positives.append(choice)

        #to get the negative image i get a copy of the dataset 
        negative__images  = imgs.copy()
        #then i exclude the current person because it would be positive of i used the current person as negative image for that person
        del negative__images[main]
        #then chaining all of the images as 1 list with len = (72 * len(imgs) - 72) then suffle them and chooce a random choice of them
        neg_choices = list(itertools.chain(*(value for value in negative__images.values())))
        rand_negtaives_choices = random.choice(neg_choices)
        negatives.append(rand_negtaives_choices)

then i converted the lists to numpy arrays

anchors=np.array(anchors)
positives=np.array(positives)
negatives=np.array(negatives)

this is the triplet loss that i used in all of the tests and i tried to change it and get it from someone else's implementation and i still face the same issue

def triplet_loss(y_true, y_pred, alpha=0.2):
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=-1)
    neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=-1)

    loss = tf.maximum(pos_dist - neg_dist + alpha, 0.0)
    return tf.reduce_mean(loss)

this is the model that i used


input_shape = (anchors.shape[1],anchors.shape[2],3)
input_anchor = Input(shape=input_shape)
input_positive = Input(shape=input_shape)
input_negative = Input(shape=input_shape)

def create_siamese_network(input_shape):
    input_image = Input(shape=input_shape)
    x = Conv2D(64, (3, 3), activation='relu')(input_image)
    x = MaxPooling2D()(x)
    x = Conv2D(32, (3, 3), activation='relu')(x)
    x = MaxPooling2D()(x)
    x = Flatten()(x)
    x = Dense(4069, activation='relu')(x)
    x = Dense(2048, activation='relu')(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(265, activation='relu')(x)
    output = Dense(128)(x)  
    return Model(inputs=input_image, outputs=output)

siamese_network = create_siamese_network(input_shape)
embedding_anchor = siamese_network(input_anchor)
embedding_positive = siamese_network(input_positive)
embedding_negative = siamese_network(input_negative)
output = tf.keras.layers.concatenate([embedding_anchor, embedding_positive, embedding_negative], axis=1)

i've tried to change the model and change the number of the output layer neurons (the embedding) and i the issue did not get solved

siamese_model_2.compile(optimizer=Adam(learning_rate=0.00001), loss=triplet_loss,)
labels = np.zeros((6103,))
siamese_model_2.fit([anchors,positives,negatives],y=labels,epochs=250,batch_size = 40)

any solution please to this issue ? ...

i've tried a more complex model like this in other tries

def create_siamese_network(input_shape):
  inputs = tf.keras.Input(shape=input_shape)
  x = tf.keras.layers.Conv2D(64, (10, 10),padding='same', activation='relu')(inputs)
  x = tf.keras.layers.MaxPooling2D((2, 2))(x)
  x = Dropout(0.3)(x)

  x = tf.keras.layers.Conv2D(128, (7, 7),padding='same', activation='relu')(x)
  x = tf.keras.layers.MaxPooling2D((2, 2))(x)
  x = Dropout(0.3)(x)

  x = tf.keras.layers.Conv2D(128, (4, 4),padding='same', activation='relu')(x)
  x = tf.keras.layers.MaxPooling2D((2, 2))(x)
  x = Dropout(0.3)(x)

  x = tf.keras.layers.Conv2D(256, (4, 4),padding='same', activation='relu')(x)
  x = tf.keras.layers.Flatten()(x)
  x = tf.keras.layers.Dense(4096, activation='relu')(x)
  # embedding = tf.keras.layers.Dense(1024,activation='sigmoid')(x)
  embedding =  tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(x) # L2 normalize embeddings
  model = tf.keras.Model(inputs=inputs, outputs=embedding)
  return model


siamese_network = create_siamese_network(input_shape)

i've tried this other complex network and the issue is still the same

model = tf.keras.applications.ResNet50(weights='imagenet',include_top=False)

for layer in model.layers[120:]:
    layer.trainable = True

new_model_1 = tf.keras.Sequential([
model,
Dense(4069,activation='relu'),
Dense(2048,activation='relu'),
Dense(1024,activation='relu'),
Dense(512,activation='relu'),
Dense(256,activation='relu'),
Dense(128,activation='linear')])

input_shape = (anchors.shape[1],anchors.shape[2],3)
input_anchor = Input(shape=input_shape)
input_positive = Input(shape=input_shape)
input_negative = Input(shape=input_shape)


embedding_anchor = new_model_1(input_anchor)
embedding_positive = new_model_1(input_positive)
embedding_negative = new_model_1(input_negative)



output = tf.keras.layers.concatenate([embedding_anchor, embedding_positive, embedding_negative], axis=1)


new_model_1 = Model(inputs=[input_anchor, input_positive, input_negative], outputs=output)
new_model_1.compile(optimizer=Adam(learning_rate=0.000002), loss=triplet_loss,)
labels = np.zeros((8200,))
new_model_1.fit([anchors,positives,negatives],y=labels,epochs=5,batch_size = 30)
0

There are 0 answers