Tensorflow OOM when running Hyperopt for neural network

26 views Asked by At

I am running neural network training for sequential data. The hyperopt will run approximately 20 iterations before Tensorflow OOM. The training is done using sliding window in HPC environment.

Hence, changing hardward/instance configuration is not possible. Is there anything can be done in my code so that i will not have Tensorflow OOM issues?

Below is my code.

def data_generator(data, labels, batch_size):
    num_samples = len(data)
    while True: 
        for offset in range(0, num_samples, batch_size):
            batch_data = data[offset:offset+batch_size]
            batch_labels = labels[offset:offset+batch_size]
            yield batch_data, batch_labels


Callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=Patience)

# build model
def hybrid_model(params):

    keras.backend.clear_session()
    train_generator = None

    model_input = Input(shape=(len(X_train_array[0]), len(X_train_array[0][0])))
    conv_1 = Conv1D(params['CNN1_neurons'], kernel_size=3, activation='relu')(model_input)
    pooling_1 = MaxPooling1D(3, strides=2)(conv_1)
    drop_cnn1 = tf.keras.layers.Dropout(params['dropout_rate'])(pooling_1)

    flatten_1 = Flatten()(drop_cnn1)
    dense_cnnflat = tf.keras.layers.Dense(params['CNN_dense'], activation='relu')(flatten_1)
    drop_cnnflat = tf.keras.layers.Dropout(params['dropout_rate'])(dense_cnnflat)

    reshape_1 = tf.keras.layers.Reshape((-1, params['CNN_dense']))(drop_cnnflat)
    lstm_1 = LSTM(params['LSTM1_neurons'], activation=mish, return_sequences=True)(reshape_1)
    
    attn = tf.keras.layers.Attention()([lstm_1, lstm_1])
    
    dense_lstm1 = tf.keras.layers.Dense(params['LSTM1_dense'], activation='relu')(attn)
    drop_lstm1 = tf.keras.layers.Dropout(params['dropout_rate'])(dense_lstm1)
    
    model_output = tf.keras.layers.Dense(len(Y_train_array[0]), activation='relu')(drop_lstm1)
    model = tf.keras.models.Model(inputs=model_input, outputs=model_output)
    
    model.compile(optimizer='adam', loss=rmse, metrics=['mse', 'mae', r2_scores, 'mape'])
    # model.summary(print_fn=myprint)
    
    train_generator = data_generator(X_train_array, Y_train_array, batch_size=params['batch_size'])

    train_start = time.time()
    # Train the model using the generator
    history = model.fit_generator(generator=train_generator,
                                  steps_per_epoch=len(X_train_array) // params['batch_size'],
                                  epochs=EPOCHS,
                                  callbacks=[Callback], verbose=2)
    train_time = str(round(time.time()-train_start, 3))
    test_result = model.evaluate(X_test_array, Y_test_array, verbose=2)

    return {'loss': test_result[0], 'mse': test_result[1], 'mae': test_result[2],
            'r2': test_result[3], 'mape': test_result[4], 'status': STATUS_OK}

space = {
    'batch_size': hp.randint('batch_size', 50, 300),
    'CNN1_neurons': hp.randint('CNN1_neurons', 2000),
    'CNN_dense': hp.randint('CNN_dense', 2000),
    'LSTM1_neurons': hp.randint('LSTM1_neurons', 2000),
    'LSTM1_dense': hp.randint('LSTM1_dense', 2000),
    'dropout_rate': hp.uniform('dropout_rate', 0.2, 0.8)
}

print('hyperparameter optimizing!')
t2 = time.time()

trials = Trials()
best = fmin(fn=hybrid_model,
            space=space,
            algo=tpe.suggest,
            max_evals=300,
            trials=trials)

I have tried clearing some memory before the start of every trials. Clear keras sessions: keras.backend.clear_session()

Clear Train generator: train_generator = None

Is there any other method to trace the memory leaks? Or what needed to be done to prevent Tensorflow OOM?

0

There are 0 answers