When i load my jupyter notebook to Google colab i get an error, but on my laptop I do not get this error. the error is :
ValueError: layer Exception encountered when calling 'tf_distil_bert_model' (type TFDistilBertModel).
Data of type <class 'keras.src.engine.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for input_ids.
Call arguments received by layer 'tf_distil_bert_model' (type TFDistilBertModel): input_ids=["<KerasTensor: shape=(None, 256) dtype=int32 (created by layer 'input_ids')>", "<KerasTensor: shape=(None, 256) dtype=int32 (created by layer 'input_attention')>"] attention_mask=None head_mask=None inputs_embeds=None output_attentions=None output_hidden_states=None return_dict=None training=False
Anyone knows why i get this error in colab? I was building a pre-trained DISTILBERT model with some additional tensorflow layers as shown below:
MAX_LENGTH = 256
LAYER_DROPOUT = 0.5
LEARNING_RATE = 1e-4
RANDOM_STATE = 42
def build_model(transformer, max_length=MAX_LENGTH):
# Define weight initializer with a random seed to ensure reproducibility
weight_initializer = tf.keras.initializers.GlorotNormal(seed=RANDOM_STATE)
# Define input layers
input_ids_layer = tf.keras.layers.Input(shape=(max_length,),
name='input_ids',
dtype='int32')
input_attention_layer = tf.keras.layers.Input(shape=(max_length,), `your text`
name='input_attention',
dtype='int32')
# DistilBERT outputs a tuple where the first element at index 0
# represents the hidden-state at the output of the model's last layer.
# It is a tf.Tensor of shape (batch_size, sequence_length, hidden_size=768).
last_hidden_state = transformer([input_ids_layer, input_attention_layer])[0]
# We only care about DistilBERT's output for the [CLS] token,
# which is located at index 0 of every encoded sequence.
# Splicing out the [CLS] tokens gives us 2D data.
cls_token = last_hidden_state[:, 0, :]
dense_layer1 = tf.keras.layers.Dense(256,
activation='relu',
kernel_initializer=weight_initializer,
kernel_constraint=None,
bias_initializer='zeros'
)(cls_token)
dropout_layer1=tf.keras.layers.Dropout(LAYER_DROPOUT)(dense_layer1)
dense_layer2 = tf.keras.layers.Dense(256,
activation='relu',
kernel_initializer=weight_initializer,
kernel_constraint=None,
bias_initializer='zeros'
)(dropout_layer1)
dropout_layer2=tf.keras.layers.Dropout(LAYER_DROPOUT)(dense_layer2)
dense_layer3 = tf.keras.layers.Dense(32,
activation='relu',
kernel_initializer=weight_initializer,
kernel_constraint=None,
bias_initializer='zeros'
)(dropout_layer2)
dropout_layer3=tf.keras.layers.Dropout(LAYER_DROPOUT)(dense_layer3)
# Define a single node that makes up the output layer (for binary classification)
output = tf.keras.layers.Dense(1,
activation='sigmoid',
kernel_initializer=weight_initializer,
kernel_constraint=None,
bias_initializer='zeros'
)(dropout_layer3)
# Define the model
model = tf.keras.Model([input_ids_layer, input_attention_layer], output)
# Compile the model
model.compile(tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
#loss='binary_crossentropy',
loss=tf.keras.losses.BinaryFocalCrossentropy(),
metrics=['accuracy'])
return model
model=build_model(distilBERT)
I turns out that the reason for the error is because the version of transformers module on my laptop and that on colab were different. So now i install the same version of transformers as I have on my laptop and that fixed the problem.