first of all, sorry for my english,
I am facing a problem with my project using tensorflow, i have to code a dictionnary (english-->german), i know it is not easy to figure out where is the problem :/ it is killing me, i am here for any question. THANKS
The inputs:
- En_input:
has shape of (16, 14, 128) :
-batch -number of words (1<words<13) and using a function to map over the datasets that pads each English sequence of embeddings with some distinct padding value before the sequence, so that each sequence is length 13
def pad_eng_embeddings(data):
def pad_eng_embeddings_(x,y):
return (tf.pad(x, [tf.math.maximum([13-tf.shape(x)[0] ,0], tf.constant([0,0])),tf.constant([0,0])], "CONSTANT", constant_values=0), y)
return data.map(pad_eng_embeddings_)
128 after the
embedding_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim128-with-normalization/1",output_shape=[128], input_shape=[], dtype=tf.string)
- Input_german
shape (16,14) with "< start>" and "< end>" token to the beginning and end
Problem with the Validation loss :
Epoch : 1 --> Loss train : 1.4463926553726196 --> Loss Validation : 1.1669453382492065
Epoch : 2 --> Loss train : 0.9627977609634399 --> Loss Validation : 0.9346983432769775
Epoch : 3 --> Loss train : 0.6962901949882507 --> Loss Validation : 0.8172098994255066
Epoch : 4 --> Loss train : 0.4979133903980255 --> Loss Validation : 0.7540919184684753
Epoch : 5 --> Loss train : 0.34379565715789795 --> Loss Validation : 0.7045937776565552
.
.
.
.
.
Epoch : 111 --> Loss train : 0.0012935919221490622 --> Loss Validation : 0.43797847628593445
Epoch : 112 --> Loss train : 0.0010554787004366517 --> Loss Validation : 0.4402512311935425
Epoch : 113 --> Loss train : 0.001183984917588532 --> Loss Validation : 0.4351470470428467
Epoch : 114 --> Loss train : 0.0008711100090295076 --> Loss Validation : 0.43835172057151794
Epoch : 115 --> Loss train : 0.0008662969921715558 --> Loss Validation : 0.4418365955352783
Epoch : 116 --> Loss train : 0.0015571219846606255 --> Loss Validation : 0.4526227116584778
Epoch : 117 --> Loss train : 0.002025176538154483 --> Loss Validation : 0.442545086145401
Epoch : 118 --> Loss train : 0.0014257029397413135 --> Loss Validation : 0.43709230422973633
Epoch : 119 --> Loss train : 0.0010628846939653158 --> Loss Validation : 0.43659183382987976
Epoch : 120 --> Loss train : 0.0008744939113967121 --> Loss Validation : 0.44265955686569214
Here mon code:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction='none')
@tf.function
def loss_function(real, pred):
mask = tf.math.logical_not(tf.math.equal(real, 0))
loss_ = loss_object(real, pred)
#print(loss_[0])
mask = tf.cast(mask, dtype=loss_.dtype)
loss_ *= mask
#print(loss_[0])
return tf.reduce_mean(loss_)
@tf.function
def Inputs_model(Input_german):
return (tf.cast(Input_german[:,0:-1],tf.float32),tf.cast(Input_german[:,1:],tf.float32))
@tf.function
def grad(En_input,GR_input,GR_output):
with tf.GradientTape() as tape:
state_h_En,state_c_En = model(En_input)
de_tensor = tf.squeeze(tf.convert_to_tensor(GR_input))
lstm_decoder, state_h_decoder, state_c_decoder=decoder(de_tensor,state_h_En, state_c_En)
loss_value = loss_function(GR_output, lstm_decoder)
variables = model.trainable_variables + decoder.trainable_variables
gradients=tape.gradient(loss_value,variables)
return loss_value,gradients
@tf.function
def train_step(En_input, Input_german):
GR_input,GR_output=Inputs_model(Input_german)
loss_value,gradients=grad(En_input,GR_input,GR_output)
variables = model.trainable_variables + decoder.trainable_variables
optimizer.apply_gradients(zip(gradients,variables))
return loss_value
@tf.function
def validation_step(En_input, Input_german):
GR_input,GR_output=Inputs_model(Input_german)
state_h_En,state_c_En = model(En_input)
de_tensor = tf.squeeze(tf.convert_to_tensor(GR_input))
lstm_decoder, state_h_decoder, state_c_decoder=decoder(de_tensor,state_h_En, state_c_En)
loss_value = loss_function(GR_output, lstm_decoder)
return loss_value
def train( num_epochs, dataset_train,dataset_valid):
train_loss_results=[]
validation_loss_results=[]
for epoch in range(num_epochs):
mean_loss_t=tf.keras.metrics.Mean()
mean_loss_v=tf.keras.metrics.Mean()
for En_input, Input_german in dataset_train:
loss_value=train_step(En_input, Input_german)
mean_loss_t(loss_value)
train_loss_results.append(mean_loss_t.result())
for En_input, Input_german in dataset_valid:
loss_value=validation_step(En_input, Input_german)
mean_loss_v(loss_value)
validation_loss_results.append(mean_loss_v.result())
dataset_train.shuffle(2)
dataset_valid.shuffle(2)
#checkpoint.save(file_prefix = checkpoint_prefix)
print("Epoch :",epoch+1,"--> Loss train : ",float(train_loss_results[epoch])," --> Loss Validation : ",float(validation_loss_results[epoch]))
if float(validation_loss_results[epoch])< 0.02:
return train_loss_results, validation_loss_results
return train_loss_results, validation_loss_results