I am trying to build a Bi-LSTM CRF model for NER on CoNLL-2003 dataset
I have encoded the words using char embedding and GloVe embedding, for each token I have an embedding of size 341
This is my model:
def get_model(embed_size, max_seq_len, num_labels):
#model
input = Input(shape=(max_seq_len,embed_size), name="Input_Layer")
model = Bidirectional(LSTM(units=75, return_sequences=True), name="Bi-LSTM")(input) # variational biLSTM
model = TimeDistributed(Dense(75, activation="relu"), name="Bi-LSTM-out")(model) # a dense layer as suggested by neuralNer
crf = CRF(num_labels, name='CRF-layer') # CRF layer
out = crf(model) # output
model = Model(input, out)
model.summary(line_length=150)
f1 = tfa.metrics.F1Score(num_classes=num_labels)
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy', f1])
return model
model = get_model(embed_size=341, max_seq_len=16, num_labels=9)
model.fit(
train_x, train_y
)
Model Summary:
______________________________________________________________________________________________________________________________________________________
Layer (type) Output Shape Param #
======================================================================================================================================================
Input_Layer (InputLayer) [(None, 16, 341)] 0
______________________________________________________________________________________________________________________________________________________
Bi-LSTM (Bidirectional) (None, 16, 150) 250200
______________________________________________________________________________________________________________________________________________________
Bi-LSTM-out (TimeDistributed) (None, 16, 75) 11325
______________________________________________________________________________________________________________________________________________________
CRF-layer (CRF) [(None, 16), (None, 16, 9), (None,), (9, 9)] 783
======================================================================================================================================================
Total params: 262,308
Trainable params: 262,308
Non-trainable params: 0
______________________________________________________________________________________________________________________________________________________
Input shape:
x is ((3250, 16, 341)
and y is (3250, 16, 9))
I am training on 3250 data points each seq of length 16, each token is embedded in 341 dimensions and there are 9 labels possible
Now the error I am getting is:
ValueError: Shapes (None, 16, 9) and (None, 16) are incompatible
which I believe is because the CRF output is [(None, 16), (None, 16, 9), (None,), (9, 9)]
Is there a way to just get the second element of the output?
OR any other way this can be fixed?
I am using tf 2.0 + and CRF from from tensorflow_addons.layers import CRF
I have already implemented this in tf 1.15 using CRF from keras-contrib [Don't want that]
Adding Trace-Back based on @MyStackRunnethOver comments:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-15-f0e6bf499704> in <module>()
18 model = get_model(embed_size=341, max_seq_len=16, num_labels=9)
19 model.fit(
---> 20 valid_x, valid_y
21 )
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
869 # This is the first call of __call__, so we have to initialize.
870 initializers = []
--> 871 self._initialize(args, kwds, add_initializers_to=initializers)
872 finally:
873 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
724 self._concrete_stateful_fn = (
725 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 726 *args, **kwds))
727
728 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2967 args, kwargs = None, None
2968 with self._lock:
-> 2969 graph_function, _ = self._maybe_define_function(args, kwargs)
2970 return graph_function
2971
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3359
3360 self._function_cache.missed.add(call_context_key)
-> 3361 graph_function = self._create_graph_function(args, kwargs)
3362 self._function_cache.primary[cache_key] = graph_function
3363
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3204 arg_names=arg_names,
3205 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3206 capture_by_value=self._capture_by_value),
3207 self._function_attributes,
3208 function_spec=self.function_spec,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
988 _, original_func = tf_decorator.unwrap(python_func)
989
--> 990 func_outputs = python_func(*func_args, **func_kwargs)
991
992 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
632 xla_context.Exit()
633 else:
--> 634 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
635 return out
636
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:756 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:152 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:256 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 16, 9) and (None, 16) are incompatible
Finally I will be checking and implementing this question and all the possible/proposed solutions between 8AM to 8PM [IST] until it's solved, so please help!