I'm using hyperopt to optimize hyperparameter of lightGBM. The code I use are shown below. I'm trying to log hyperparameters using log_params() in the objective function.
from sklearn.metrics import f1_score
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope
import mlflow
lgbm_space = {
'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)),
'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02),
'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
'subsample': hp.uniform('subsample', 0.7, 1.0),
'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))
}
search_space = lgbm_space
run_name = "run_optimization"
max_eval = 100
#define objective function
def objective (search_space):
model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )
model.fit(X_train, y_train,
eval_set= [ ( X_val, y_val) ],
early_stopping_rounds= 10,
verbose=False)
y_pred = model.predict_proba(X_val)[:,1]
f1 = f1_score(y_val, (y_pred>0.5).astype(int) )
mlflow.log_metric('f1 score', f1)
mlflow.log_params(search_space)
score = 1 - f1
return {'loss': score, 'status': STATUS_OK, 'model': model, 'params': search_space}
spark_trials = Trials()
with mlflow.start_run(run_name = run_name):
best_params = hyperopt.fmin(
fn = objective,
space = search_space,
algo = tpe.suggest,
max_evals = max_eval,
trials = spark_trials )
I got some error messages like below:
INVALID_PARAMETER_VALUE: Parameter with key colsample_bytree was already logged with a value of 0.9523828639856076. The attempted new value was 0.7640043300157543
I'm not sure what I did wrong.
Added the
with mlflow.start_run(nested=True):
within the objective function. There was also an issue raised for this here. Now the code creates separate folders for each evaluation containing the params and metric.