Cannot log lightGBM parameter using log_params in mlflow/hyperopt

Question

Cannot log lightGBM parameter using log_params in mlflow/hyperopt

162 views Asked by zesla At 08 November 2023 at 01:52

I'm using hyperopt to optimize hyperparameter of lightGBM. The code I use are shown below. I'm trying to log hyperparameters using log_params() in the objective function.

from sklearn.metrics import f1_score
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope 
import mlflow


lgbm_space = {
        'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
        'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)), 
        'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02), 
        'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
        'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
        'subsample': hp.uniform('subsample', 0.7, 1.0), 
        'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))

}

search_space = lgbm_space
run_name = "run_optimization" 
max_eval = 100

#define objective function
def objective (search_space):
    model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )      
    model.fit(X_train, y_train,            
           eval_set= [ ( X_val, y_val) ], 
           early_stopping_rounds= 10, 
           verbose=False)    
    y_pred = model.predict_proba(X_val)[:,1]   
    f1 = f1_score(y_val, (y_pred>0.5).astype(int) )
    mlflow.log_metric('f1 score', f1)
    mlflow.log_params(search_space)
    score = 1 - f1
    
    return {'loss': score, 'status': STATUS_OK, 'model': model, 'params': search_space}

spark_trials = Trials()
with mlflow.start_run(run_name = run_name):
    best_params = hyperopt.fmin(
                    fn = objective,
                    space = search_space,
                    algo = tpe.suggest,
                    max_evals = max_eval, 
                    trials = spark_trials )

I got some error messages like below:

INVALID_PARAMETER_VALUE: Parameter with key colsample_bytree was already logged with a value of 0.9523828639856076. The attempted new value was 0.7640043300157543

I'm not sure what I did wrong.

Original Q&A

There are 1 answers

**Kinjal** · Accepted Answer · 2023-11-16T11:12:18+00:00

Added the with mlflow.start_run(nested=True): within the objective function. There was also an issue raised for this here. Now the code creates separate folders for each evaluation containing the params and metric.

import numpy as np
from sklearn.metrics import f1_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope 
import mlflow

iris = load_iris()
X_train, X_val, y_train, y_val = train_test_split(iris.data, iris.target, stratify=iris.target)


lgbm_space = {
        'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
        'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)), 
        'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02), 
        'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
        'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
        'subsample': hp.uniform('subsample', 0.7, 1.0), 
        'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))

}

search_space = lgbm_space
run_name = "run_optimization" 
max_eval = 2

#define objective function
def objective (search_space):
    model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )
    callbacks = [lgbm.early_stopping(2, verbose=-10), lgbm.log_evaluation(period=0)]
    with mlflow.start_run(nested=True):
        model.fit(X_train, y_train,
                  eval_set= [(X_val, y_val)],
                  callbacks = callbacks
                  # early_stopping_rounds= 10, 
               # verbose=False
                 )    
        y_pred = model.predict_proba(X_val)[:,1]   
        f1 = f1_score(y_val, (y_pred>0.5).astype(int), average='weighted')
        mlflow.log_metric('f1 score', f1)
        score = 1 - f1
        mlflow.log_params(search_space)

    return {'loss': score, 'status': STATUS_OK, 'model': model} #'params': search_space}

spark_trials = Trials()
with mlflow.start_run(run_name = run_name, nested=True):
    best_params = hyperopt.fmin(
                    fn = objective,
                    space = search_space,
                    algo = tpe.suggest,
                    max_evals = max_eval, 
                    trials = spark_trials)
print("Best value found: ", best_params)

TechQA.

Cannot log lightGBM parameter using log_params in mlflow/hyperopt

There are 1 answers

Related Questions in PYTHON

Related Questions in MLFLOW

Related Questions in LIGHTGBM

Related Questions in HYPEROPT

Popular Questions

Popular Tags

Trending Questions