Validation loss doesn't decrease (tensorflow)

519 views Asked by At

first of all, sorry for my english,

I am facing a problem with my project using tensorflow, i have to code a dictionnary (english-->german), i know it is not easy to figure out where is the problem :/ it is killing me, i am here for any question. THANKS

The inputs:

  1. En_input:

has shape of (16, 14, 128) :

-batch -number of words (1<words<13) and using a function to map over the datasets that pads each English sequence of embeddings with some distinct padding value before the sequence, so that each sequence is length 13

    def pad_eng_embeddings(data):
        def pad_eng_embeddings_(x,y):  
            return (tf.pad(x, [tf.math.maximum([13-tf.shape(x)[0] ,0],   tf.constant([0,0])),tf.constant([0,0])], "CONSTANT", constant_values=0), y)
        return data.map(pad_eng_embeddings_)

128 after the

embedding_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim128-with-normalization/1",output_shape=[128], input_shape=[], dtype=tf.string) 
  1. Input_german

shape (16,14) with "< start>" and "< end>" token to the beginning and end

Problem with the Validation loss :

Epoch : 1 --> Loss train :  1.4463926553726196  --> Loss Validation :  1.1669453382492065
Epoch : 2 --> Loss train :  0.9627977609634399  --> Loss Validation :  0.9346983432769775
Epoch : 3 --> Loss train :  0.6962901949882507  --> Loss Validation :  0.8172098994255066
Epoch : 4 --> Loss train :  0.4979133903980255  --> Loss Validation :  0.7540919184684753
Epoch : 5 --> Loss train :  0.34379565715789795  --> Loss Validation :  0.7045937776565552
.
.
.
.
.
Epoch : 111 --> Loss train :  0.0012935919221490622  --> Loss Validation :  0.43797847628593445
Epoch : 112 --> Loss train :  0.0010554787004366517  --> Loss Validation :  0.4402512311935425
Epoch : 113 --> Loss train :  0.001183984917588532  --> Loss Validation :  0.4351470470428467
Epoch : 114 --> Loss train :  0.0008711100090295076  --> Loss Validation :  0.43835172057151794
Epoch : 115 --> Loss train :  0.0008662969921715558  --> Loss Validation :  0.4418365955352783
Epoch : 116 --> Loss train :  0.0015571219846606255  --> Loss Validation :  0.4526227116584778
Epoch : 117 --> Loss train :  0.002025176538154483  --> Loss Validation :  0.442545086145401
Epoch : 118 --> Loss train :  0.0014257029397413135  --> Loss Validation :  0.43709230422973633
Epoch : 119 --> Loss train :  0.0010628846939653158  --> Loss Validation :  0.43659183382987976
Epoch : 120 --> Loss train :  0.0008744939113967121  --> Loss Validation :  0.44265955686569214

Here mon code:

optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')
@tf.function
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    #print(loss_[0])
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask
    #print(loss_[0])
    return tf.reduce_mean(loss_)

@tf.function
def Inputs_model(Input_german):
    return (tf.cast(Input_german[:,0:-1],tf.float32),tf.cast(Input_german[:,1:],tf.float32))
    

@tf.function
def grad(En_input,GR_input,GR_output):
    with tf.GradientTape() as tape:
        state_h_En,state_c_En = model(En_input)
        de_tensor = tf.squeeze(tf.convert_to_tensor(GR_input))
        lstm_decoder, state_h_decoder, state_c_decoder=decoder(de_tensor,state_h_En, state_c_En)
        loss_value = loss_function(GR_output, lstm_decoder)
        variables = model.trainable_variables + decoder.trainable_variables
        gradients=tape.gradient(loss_value,variables)
    return loss_value,gradients


@tf.function
def train_step(En_input, Input_german):
  GR_input,GR_output=Inputs_model(Input_german)

  loss_value,gradients=grad(En_input,GR_input,GR_output)
  variables = model.trainable_variables + decoder.trainable_variables
  optimizer.apply_gradients(zip(gradients,variables))
  return loss_value

@tf.function
def validation_step(En_input, Input_german):
  GR_input,GR_output=Inputs_model(Input_german)
  state_h_En,state_c_En = model(En_input)
  de_tensor = tf.squeeze(tf.convert_to_tensor(GR_input))
  lstm_decoder, state_h_decoder, state_c_decoder=decoder(de_tensor,state_h_En, state_c_En)
  loss_value = loss_function(GR_output, lstm_decoder)
  return loss_value
  
 def train( num_epochs, dataset_train,dataset_valid):
    train_loss_results=[]
    validation_loss_results=[]
    
    for epoch in range(num_epochs):
      mean_loss_t=tf.keras.metrics.Mean()
      mean_loss_v=tf.keras.metrics.Mean()

      for  En_input, Input_german in dataset_train:
        loss_value=train_step(En_input, Input_german)
        mean_loss_t(loss_value)
      train_loss_results.append(mean_loss_t.result())
      
      
      for  En_input, Input_german in dataset_valid:
        loss_value=validation_step(En_input, Input_german)
        mean_loss_v(loss_value)
      validation_loss_results.append(mean_loss_v.result()) 

      dataset_train.shuffle(2)
      dataset_valid.shuffle(2)
      #checkpoint.save(file_prefix = checkpoint_prefix)
      print("Epoch :",epoch+1,"--> Loss train : ",float(train_loss_results[epoch])," --> Loss Validation : ",float(validation_loss_results[epoch]))
      if float(validation_loss_results[epoch])< 0.02:
        return train_loss_results, validation_loss_results
    return train_loss_results, validation_loss_results
0

There are 0 answers