have been trying to put together a model with a Bayesian layer for a while now and can't seem to fix a shape mismatch; unsure what's going on and have been trying to debug for a couple hours now. Code and error below:
!pip install tensorflow tensorflow-probability
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow_probability.python.layers import DistributionLambda
import numpy as np
tfd = tfp.distributions
def BayesianDenseLayer(units, kl_weight):
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = Sequential([
tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=tf.zeros(n), scale=tf.ones(n)))
])
return prior_model
def posterior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfp.layers.VariableLayer(2 * n, dtype=dtype),
tfp.layers.DistributionLambda(lambda t: tfd.Normal(
loc=t[..., :n],
scale=tf.nn.softplus(t[..., n:]) + 1e-5))
])
return posterior_model
return tfp.layers.DenseVariational(units, make_posterior_fn=posterior, make_prior_fn=prior, kl_weight=kl_weight)
def build_bayesian_model(input_shape, learning_rate, dense_units, bayesian_units, kl_weight, num_components=94):
model = Sequential([
InputLayer(input_shape=input_shape),
LSTM(dense_units, return_sequences=True),
LeakyReLU(alpha=0.01),
LSTM(dense_units, return_sequences=True),
LeakyReLU(alpha=0.01),
LSTM(dense_units, return_sequences=False),
LeakyReLU(alpha=0.01),
BayesianDenseLayer(bayesian_units, kl_weight),
Dense(num_components * 3), # 3 for each component (logits, mean and std)
DistributionLambda(lambda t: tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(logits=t[..., :num_components]),
components_distribution=tfd.Normal(
loc=t[..., num_components:2*num_components],
scale=tf.math.softplus(t[..., 2*num_components:]) + 1e-5)
))
])
def elpd_loss(y_true, y_pred):
log_likelihoods = y_pred.log_prob(y_true)
return -tf.reduce_mean(log_likelihoods)
model.compile(loss=elpd_loss, optimizer=Adam(learning_rate=learning_rate))
return model
np.random.seed(0)
sample_input_shape = (30, 5) # 30 time steps, 5 features
sample_data = np.random.random((100, *sample_input_shape)) # 100 sequences
sample_targets = np.random.random((100, 1)) # 100 target values
model = build_bayesian_model(sample_input_shape, learning_rate=0.001, dense_units=50, bayesian_units=10, kl_weight=0.001)
model.fit(sample_data, sample_targets, epochs=5, verbose=1)
y_pred_params = model.predict(sample_data)
num_components = 3
y_pred_distributions = tfd.MixtureSameFamily(
mixture_distribution=tfd.Categorical(logits=y_pred_params[..., :num_components]),
components_distribution=tfd.Normal(
loc=y_pred_params[..., num_components:2*num_components],
scale=tf.math.softplus(y_pred_params[..., 2*num_components:]) + 1e-5)
)
print(y_pred_distributions)
print(type(y_pred_distributions))
Error:
ValueError Traceback (most recent call last) /usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/distributions/normal.py in _parameter_control_dependencies(self, is_init) 237 try: --> 238 self._batch_shape() 239 except ValueError:
8 frames ValueError: Incompatible shapes for broadcasting. Two shapes are compatible if for each dimension pair they are either equal or one of them is 1. Received: (3,) and (94,).
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last) in init(self, loc, scale, validate_args, allow_nan_stats, name)
/usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/distributions/normal.py in _parameter_control_dependencies(self, is_init)
238 self._batch_shape()
239 except ValueError:
--> 240 raise ValueError(
241 'Arguments loc and scale must have compatible shapes; '
242 'loc.shape={}, scale.shape={}.'.format(
ValueError: Arguments loc and scale must have compatible shapes; loc.shape=(3,), scale.shape=(94,).
Have tried a couple different approaches to slicing but none of them have worked; got to this point and then got stuck. Was hoping to get this to output distribution tf objects for each point in y pred, but that doesn't seem to be working.