How can I add user defined activation function in CNN model instead of builtin function in keras?

1k views Asked by At

Instead of sigmoid which gives sigmoid(x) = 1 / (1 + exp(-x)) I want a activation function mish such as mish(x)=x * tanh(softplus(x)) I want ti use it as conv_layer1 = Conv3D(filters=8, kernel_size=(3, 3, 5), activation='mish')(input_layer) like this

3

There are 3 answers

0
Innat On

You probably need this

# custom activation function 
def mish(x):
    return tf.keras.layers.Lambda(lambda x: x*tf.tanh(tf.math.log(1+tf.exp(x))))(x)

Dense(hidden_units, activation=mish)
2
Nicolas Gervais - Open to Work On

You can just create a function and pass it as a callable to your model, under the argument activation. Here's the function:

def mish(inputs):
    x = tf.nn.softplus(inputs)
    x = tf.nn.tanh(x)
    x = tf.multiply(x, inputs)
    return x

You can put it as activation in one of your layers:

model = tf.keras.Sequential([
            Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), 
                   input_shape=(28, 28, 1), activation='relu'),
            MaxPool2D(pool_size=(2, 2)),
            Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), 
                   activation='relu'),
            MaxPool2D(pool_size=(2, 2)),
            Flatten(),
            Dense(64, activation=mish), # here
            Dropout(5e-1),
            Dense(10, activation='softmax')
])

Here's the training:

import tensorflow as tf
from tensorflow import keras
import numpy as np

(xtrain, ytrain), (xtest, ytest) = keras.datasets.mnist.load_data()

xtrain = np.float32(xtrain/255)
xtest = np.float32(xtest/255)

ytrain = np.int32(ytrain)
ytest = np.int32(ytest)

def pre_process(inputs, targets):
    inputs = tf.expand_dims(inputs, -1)
    targets = tf.one_hot(targets, depth=10)
    return tf.divide(inputs, 255), targets

train_data = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).\
    take(10_000).shuffle(10_000).batch(8).map(pre_process)
test_data = tf.data.Dataset.from_tensor_slices((xtest, ytest)).\
    take(1_000).shuffle(1_000).batch(8).map(pre_process)

def mish(inputs):
    x = tf.nn.softplus(inputs)
    x = tf.nn.tanh(x)
    x = tf.multiply(x, inputs)
    return x

model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), 
                   input_shape=(28, 28, 1), activation='relu'),
            tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
            tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), 
                   activation='relu'),
            tf.keras.layers.MaxPool2D(pool_size=(2, 2)),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation=mish), 
            tf.keras.layers.Dropout(5e-1),
            tf.keras.layers.Dense(10, activation='softmax')])

model.compile(loss='categorical_crossentropy', optimizer='adam')

history = model.fit(train_data, validation_data=test_data, epochs=10)
0
Diganta Misra On

Mish is already available in TensorFlow Addons. Thus you don't require to define it as a custom layer. Visit this page for more details - https://www.tensorflow.org/addons/api_docs/python/tfa/activations/mish