ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray). To use for embeddings

906 views Asked by At

I have the following Keras data generator, that receives pairs as input of the form:

    pairs = [((0, 1, 2), 0), 
             ((3, 4, 5, 6, 7, 8, 9, 10, 11), 0),
             ((12,), 1), 
             ((13, 14, 15, 16), 1),
             ((17, 18, 19, 20), 2)]

Now, I want to generate positive and negative examples of the data. If pairs were not nested tuples, the code would've worked fine. Here's the code snippet:

    def data_generation(self, pairs):
    """Generate batches of samples for training"""
    batch = np.zeros(shape=(self.batch_size, 3), dtype=object)

    # Adjust label based on task
    if self.classification:
        neg_label = 0
    else:
        neg_label = -1

    # This creates a generator
    while True:
        for idx, (file_id, test_id) in enumerate(random.sample(pairs, self.n_positive)):
            batch[idx, :] = (np.asarray(file_id), test_id, 1)

        # Increment idx by 1
        idx += 1

        # Add negative examples until reach batch size
        while idx < self.batch_size:

            # random selection
            random_test = random.randrange(self.nr_tests)

            # Check to make sure this is not a positive example
            if (file_id, random_test) not in self.pairs_set:
                # Add to batch and increment index
                batch[idx, :] = (np.asarray(file_id), random_test, neg_label)
                idx += 1

        np.random.shuffle(batch)
        yield {'file': batch[:, 0], 'test': batch[:, 1]}, batch[:, 2]

The shapes of the batch vectors are:

print(batch[:, 0].shape)
print(batch[:, 1].shape)
print(batch[:, 2].shape)

Output:

(2000,)
(2000,)
(2000,)

The problem is that for each batch[:, 0] there is a nested nd.array with variable length.

Now the output of data_generation() is:

{'file': array([array([809, 386, 813,  75, 248, 614,  34, 332, 389]),
   array([ 52,  53, 486, 489]), array([ 52,  53, 486, 489]), ...,
   array([ 52,  53, 486, 489]), array([ 52,  53, 486, 489]),
   array([ 52,  53, 486, 489])], dtype=object), 'test': array([1247, 1566, 814, ..., 142, 2336, 674], dtype=object)} [1 0 0 ... 0 0 0]

Then I build the following Keras Model that trains one Embedding for set of files and one for tests:

 def build_model(self, embedding_size=50, optimizer='Adam', classification=True):
    """
    Build model architecture/framework
    :return: model
    """
    from keras.layers import Input, Embedding, Dot, Reshape, Dense
    from keras.models import Model

    # Both inputs are 1-dimensional
    revision = Input(name='revision', shape=[1])
    test = Input(name='test', shape=[1])

    # Embedding the book (shape will be (None, 1, 50))
    file_embedding = Embedding(name='file_embedding',
                               input_dim=len(self.Data.file_index),
                               output_dim=embedding_size, input_length=self.max_len - 1)(revision)

    # Embedding the link (shape will be (None, 1, 50))
    test_embedding = Embedding(name='test_embedding',
                               input_dim=len(self.Data.test_index),
                               output_dim=embedding_size)(test)

    # Merge the layers with a dot product along the second axis (shape will be (None, 1, 1))
    merged = Dot(name='dot_product', normalize=True, axes=2)([file_embedding, test_embedding])

    # Reshape to be a single number (shape will be (None, 1))
    merged = Reshape(target_shape=[1])(merged)

    # If classification, add extra layer and loss function is binary cross entropy
    if classification:
        merged = Dense(1, activation='sigmoid')(merged)
        model = Model(inputs=[revision, test], outputs=merged)
        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

After calling model.fit(), I get the following Traceback:

 File "/Users/joaolousada/Documents/5ÂșAno/Master-Thesis/main/Prioritizer/Prioritizer.py", line 170, in crossValidation
    verbose=2)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1063, in fit
    steps_per_execution=self._steps_per_execution)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1117, in __init__
    model=model)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 916, in __init__
    **kwargs)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 788, in __init__
    peek = _process_tensorlike(peek)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1021, in _process_tensorlike
    inputs = nest.map_structure(_convert_numpy_and_scipy, inputs)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/util/nest.py", line 635, in map_structure
    structure[0], [func(*x) for x in entries],
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/util/nest.py", line 635, in <listcomp>
    structure[0], [func(*x) for x in entries],
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/data_adapter.py", line 1016, in _convert_numpy_and_scipy
    return ops.convert_to_tensor(x, dtype=dtype)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1499, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_conversion_registry.py", line 52, in _default_conversion_function
    return constant_op.constant(value, dtype, name=name)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 264, in constant
    allow_broadcast=True)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 275, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 300, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/Users/joaolousada/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

Any kind of help would be very much appreciated !

0

There are 0 answers