Bayesian Dense Layer Scaling Issues

16 views Asked by At

have been trying to put together a model with a Bayesian layer for a while now and can't seem to fix a shape mismatch; unsure what's going on and have been trying to debug for a couple hours now. Code and error below:

!pip install tensorflow tensorflow-probability
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer, LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow_probability.python.layers import DistributionLambda
import numpy as np

tfd = tfp.distributions

def BayesianDenseLayer(units, kl_weight):
    def prior(kernel_size, bias_size, dtype=None):
        n = kernel_size + bias_size
        prior_model = Sequential([
            tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=tf.zeros(n), scale=tf.ones(n)))
        ])
        return prior_model

    def posterior(kernel_size, bias_size, dtype=None):
        n = kernel_size + bias_size
        posterior_model = Sequential([
            tfp.layers.VariableLayer(2 * n, dtype=dtype),
            tfp.layers.DistributionLambda(lambda t: tfd.Normal(
                loc=t[..., :n],
                scale=tf.nn.softplus(t[..., n:]) + 1e-5))
        ])
        return posterior_model

    return tfp.layers.DenseVariational(units, make_posterior_fn=posterior, make_prior_fn=prior, kl_weight=kl_weight)

def build_bayesian_model(input_shape, learning_rate, dense_units, bayesian_units, kl_weight, num_components=94):
    model = Sequential([
        InputLayer(input_shape=input_shape),
        LSTM(dense_units, return_sequences=True),
        LeakyReLU(alpha=0.01),
        LSTM(dense_units, return_sequences=True),
        LeakyReLU(alpha=0.01),
        LSTM(dense_units, return_sequences=False),
        LeakyReLU(alpha=0.01),
        BayesianDenseLayer(bayesian_units, kl_weight),
        Dense(num_components * 3),  # 3 for each component (logits, mean and std)
        DistributionLambda(lambda t: tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(logits=t[..., :num_components]),
            components_distribution=tfd.Normal(
                loc=t[..., num_components:2*num_components],
                scale=tf.math.softplus(t[..., 2*num_components:]) + 1e-5)
        ))
    ])

    def elpd_loss(y_true, y_pred):
        log_likelihoods = y_pred.log_prob(y_true)
        return -tf.reduce_mean(log_likelihoods)

    model.compile(loss=elpd_loss, optimizer=Adam(learning_rate=learning_rate))
    return model
np.random.seed(0)
sample_input_shape = (30, 5)  # 30 time steps, 5 features
sample_data = np.random.random((100, *sample_input_shape))  # 100 sequences
sample_targets = np.random.random((100, 1))  # 100 target values

model = build_bayesian_model(sample_input_shape, learning_rate=0.001, dense_units=50, bayesian_units=10, kl_weight=0.001)

model.fit(sample_data, sample_targets, epochs=5, verbose=1)

y_pred_params = model.predict(sample_data)

num_components = 3

y_pred_distributions = tfd.MixtureSameFamily(
    mixture_distribution=tfd.Categorical(logits=y_pred_params[..., :num_components]),
    components_distribution=tfd.Normal(
        loc=y_pred_params[..., num_components:2*num_components],
        scale=tf.math.softplus(y_pred_params[..., 2*num_components:]) + 1e-5)
)

print(y_pred_distributions)
print(type(y_pred_distributions))

Error:

ValueError Traceback (most recent call last) /usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/distributions/normal.py in _parameter_control_dependencies(self, is_init) 237 try: --> 238 self._batch_shape() 239 except ValueError:

8 frames ValueError: Incompatible shapes for broadcasting. Two shapes are compatible if for each dimension pair they are either equal or one of them is 1. Received: (3,) and (94,).

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last) in init(self, loc, scale, validate_args, allow_nan_stats, name)

/usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/distributions/normal.py in _parameter_control_dependencies(self, is_init) 238 self._batch_shape() 239 except ValueError: --> 240 raise ValueError( 241 'Arguments loc and scale must have compatible shapes; ' 242 'loc.shape={}, scale.shape={}.'.format(

ValueError: Arguments loc and scale must have compatible shapes; loc.shape=(3,), scale.shape=(94,).

Have tried a couple different approaches to slicing but none of them have worked; got to this point and then got stuck. Was hoping to get this to output distribution tf objects for each point in y pred, but that doesn't seem to be working.

0

There are 0 answers