I have the following Keras data generator, that receives pairs as input of the form:
pairs = [((0, 1, 2), 0),
((3, 4, 5, 6, 7, 8, 9, 10, 11), 0),
((12,), 1),
((13, 14, 15, 16), 1),
((17, 18, 19, 20), 2)]
Now, I want to generate positive and negative examples of the data. If pairs were not nested tuples, the code would've worked fine. Here's the code snippet:
def data_generation(self, pairs):
"""Generate batches of samples for training"""
batch = np.zeros(shape=(self.batch_size, 3), dtype=object)
# Adjust label based on task
if self.classification:
neg_label = 0
else:
neg_label = -1
# This creates a generator
while True:
for idx, (file_id, test_id) in enumerate(random.sample(pairs, self.n_positive)):
batch[idx, :] = (np.asarray(file_id), test_id, 1)
# Increment idx by 1
idx += 1
# Add negative examples until reach batch size
while idx < self.batch_size:
# random selection
random_test = random.randrange(self.nr_tests)
# Check to make sure this is not a positive example
if (file_id, random_test) not in self.pairs_set:
# Add to batch and increment index
batch[idx, :] = (np.asarray(file_id), random_test, neg_label)
idx += 1
np.random.shuffle(batch)
yield {'file': batch[:, 0], 'test': batch[:, 1]}, batch[:, 2]
The shapes of the batch vectors are:
print(batch[:, 0].shape)
print(batch[:, 1].shape)
print(batch[:, 2].shape)
Output:
(2000,)
(2000,)
(2000,)
The problem is that for each batch[:, 0] there is a nested nd.array with variable length.
Now the output of data_generation()
is:
{'file': array([array([809, 386, 813, 75, 248, 614, 34, 332, 389]),
array([ 52, 53, 486, 489]), array([ 52, 53, 486, 489]), ...,
array([ 52, 53, 486, 489]), array([ 52, 53, 486, 489]),
array([ 52, 53, 486, 489])], dtype=object), 'test': array([1247, 1566, 814, ..., 142, 2336, 674], dtype=object)} [1 0 0 ... 0 0 0]
Then I build the following Keras Model that trains one Embedding for set of files and one for tests:
def build_model(self, embedding_size=50, optimizer='Adam', classification=True):
"""
Build model architecture/framework
:return: model
"""
from keras.layers import Input, Embedding, Dot, Reshape, Dense
from keras.models import Model
# Both inputs are 1-dimensional
revision = Input(name='revision', shape=[1])
test = Input(name='test', shape=[1])
# Embedding the book (shape will be (None, 1, 50))
file_embedding = Embedding(name='file_embedding',
input_dim=len(self.Data.file_index),
output_dim=embedding_size, input_length=self.max_len - 1)(revision)
# Embedding the link (shape will be (None, 1, 50))
test_embedding = Embedding(name='test_embedding',
input_dim=len(self.Data.test_index),
output_dim=embedding_size)(test)
# Merge the layers with a dot product along the second axis (shape will be (None, 1, 1))
merged = Dot(name='dot_product', normalize=True, axes=2)([file_embedding, test_embedding])
# Reshape to be a single number (shape will be (None, 1))
merged = Reshape(target_shape=[1])(merged)
# If classification, add extra layer and loss function is binary cross entropy
if classification:
merged = Dense(1, activation='sigmoid')(merged)
model = Model(inputs=[revision, test], outputs=merged)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
After calling model.fit(), I get the following Traceback:
File "/Users/joaolousada/Documents/5ÂșAno/Master-Thesis/main/Prioritizer/Prioritizer.py", line 170, in crossValidation
verbose=2)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1063, in fit
steps_per_execution=self._steps_per_execution)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1117, in __init__
model=model)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 916, in __init__
**kwargs)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 788, in __init__
peek = _process_tensorlike(peek)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1021, in _process_tensorlike
inputs = nest.map_structure(_convert_numpy_and_scipy, inputs)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/util/nest.py", line 635, in map_structure
structure[0], [func(*x) for x in entries],
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/util/nest.py", line 635, in <listcomp>
structure[0], [func(*x) for x in entries],
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1016, in _convert_numpy_and_scipy
return ops.convert_to_tensor(x, dtype=dtype)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1499, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_conversion_registry.py", line 52, in _default_conversion_function
return constant_op.constant(value, dtype, name=name)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 264, in constant
allow_broadcast=True)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 275, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 300, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).
Any kind of help would be very much appreciated !