I am trying to set up a Bayesian Neural Network Using Iris Data. However, the code is constantly having issues with the loss function.
The first error comes up in (7). Does anyone know how to solve this issue or has an idea for another way to code? As this is just an exemplary dataset, it would be necessary to apply the code to some other data later.
Every piece of advise is much appreciated!!!
import tensorflow as tf
import tensorflow_probability as tfp
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
#(1) Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
#(2) Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)
#(3) Normalize the features
mean, std = X_train.mean(axis=0), X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std
from tensorflow.python.framework.ops import kwarg_only
kwargs = {'name': 'dense_layer'}
#(4) tfp.layers.DenseReparameterization(
units=10,
activation=None,
activity_regularizer=None,
trainable=True,
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
kernel_posterior_tensor_fn=(lambda d: d.sample()),
kernel_prior_fn=tfp.layers.default_multivariate_normal_fn,
kernel_divergence_fn=(lambda q, p, ignore: tfp.distributions.kl_divergence(q, p)),
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn(is_singular=True),
bias_posterior_tensor_fn=(lambda d: d.sample()),
bias_prior_fn=None,
bias_divergence_fn=(lambda q, p, ignore: tfp.distributions.kl_divergence(q, p)),
**kwargs
)
#(5) Create the Bayesian model
input_shape = (X_train.shape[1])
# (6) Define the Bayesian Neural Network model using tf.keras
def build_bayesian_model(input_shape):
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, input_shape=input_shape, activation="relu"),
tf.keras.layers.Dense(64, activation="relu"),
tfp.layers.DenseFlipout(3, activation="softmax")
])
return model
#(7) Define a custom loss function
logits = build_bayesian_model(X_train.shape[1:])
y_pred = logits(X_test)
neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(labels=None, logits=logits)
kl = sum(model.losses)
loss = neg_log_likelihood + kl
train_op = tf.train.AdamOptimizer().minimize(loss)
#(8) Compile the model
bayesian_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=neg_log_likelihood, metrics=['accuracy'])
#(9) Define the labels
num_classes = 3
labels = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)
#(10) Train the Bayesian Neural Network
bayesian_model.fit(X_train, y_train, epochs=1000, verbose=1)
#(11) Calculate accuracy
accuracy = tf.reduce_mean(tf.keras.metrics.sparse_categorical_accuracy(y_test, tf.argmax(y_pred_mean, axis=1))
print(f'Accuracy: {accuracy.numpy()}')
First I tried the standard log-loss function. However, this did not work. So I decided to define it myself. This does not work either.