Orange Data Mining Python Script for LGBM w/optimization. Not showing parameters

69 views Asked by At

I tried to replicate the behavior of the Orange widgets for models like XGBoost. In this case, I managed to get the Test&Score to show results using LightGBM with Bayesian optimization. I had to create a wrapper and finally printed the parameters in the console, but I intend them to be viewable in a data table. Finally, although the Data Table connected to Test&Score displays the probability and classification columns well, when I connect a Data Table to the widget, it hangs. If anyone has experience in this, please review the code to ensure it adheres to best practices, so I can have a model for continuing to write scripts with other models. THANK YOU VERY MUCH!!!

import Orange
import numpy as np
import lightgbm as lgb
from Orange.data import Table, Domain, ContinuousVariable, StringVariable
from Orange.classification import Learner, Model
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

class LightGBMLearner(Learner):
    """
    A wrapper learner for LightGBM classifier with Bayesian Optimization.
    """
    def __init__(self, preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.name = 'LightGBM BayesOpt'

    def fit_storage(self, data):
        return LightGBMModel(data)

class LightGBMModel(Model):
    """
    A wrapper model for LightGBM classifier with Bayesian Optimization.
    """
    def __init__(self, data):
        super().__init__(data.domain)
        self.domain = data.domain
        self.lgbm, self.best_params = self.bayesian_optimization(data)

    def bayesian_optimization(self, data):
        # Define the hyperparameter search space
        search_space = {
            'learning_rate': Real(0.01, 0.5),
            'n_estimators': Integer(100, 1000),
            'num_leaves': Integer(20, 150),
            'max_depth': Integer(3, 10),
            'min_child_weight': Integer(1, 10),
            'colsample_bytree': Real(0.1, 1.0)
        }

        # Setup LightGBM classifier within BayesSearchCV
        lgbm = lgb.LGBMClassifier()
        optimizer = BayesSearchCV(lgbm, search_space, n_iter=32, random_state=0, cv=3)

        # Fit the model
        optimizer.fit(data.X, data.Y.flatten())

        # Return the best estimator and best parameters
        return optimizer.best_estimator_, optimizer.best_params_

    def predict(self, X):
        return self.lgbm.predict(X)

    def predict_storage(self, data):
        X = data.X
        predictions = self.predict(X)
        probabilities = self.lgbm.predict_proba(X)
        return predictions, probabilities

# Create the learner and train the classifier
if in_data:
    out_learner = LightGBMLearner()
    out_classifier = out_learner(in_data)

    # Convert the optimal parameters to an Orange table
    param_names = [StringVariable("Parameter")]
    param_values = [ContinuousVariable("Value")]
    domain = Domain(param_names, None, metas=param_values)
    
    rows = [[name, np.array([value])] for name, value in out_classifier.best_params.items()]
    out_params = Table.from_list(domain, rows)

    # Get predictions and probabilities
    predictions, probabilities = out_classifier.predict_storage(in_data)

    # Create new variables for predictions and probabilities
    prediction_var = StringVariable("Prediction")
    prob_vars = [ContinuousVariable(f'P(Class={i})') for i in range(probabilities.shape[1])]

    # Update the data table domain
    new_domain = Domain(in_data.domain.attributes, in_data.domain.class_vars,
                        in_data.domain.metas + tuple([prediction_var] + prob_vars))

    # Create the new table with predictions and probabilities
    new_metas = np.hstack((in_data.metas, 
                           predictions.reshape(-1, 1), 
                           probabilities))
    out_data = Table(new_domain, in_data.X, in_data.Y, new_metas)
else:
    out_learner = None
    out_classifier = None
    out_data = None
    out_params = None

I tried the above code and I expect to see the parameters optimized as coefficients in Curve Fit widget.

0

There are 0 answers