I tried to replicate the behavior of the Orange widgets for models like XGBoost. In this case, I managed to get the Test&Score to show results using LightGBM with Bayesian optimization. I had to create a wrapper and finally printed the parameters in the console, but I intend them to be viewable in a data table. Finally, although the Data Table connected to Test&Score displays the probability and classification columns well, when I connect a Data Table to the widget, it hangs. If anyone has experience in this, please review the code to ensure it adheres to best practices, so I can have a model for continuing to write scripts with other models. THANK YOU VERY MUCH!!!
import Orange
import numpy as np
import lightgbm as lgb
from Orange.data import Table, Domain, ContinuousVariable, StringVariable
from Orange.classification import Learner, Model
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
class LightGBMLearner(Learner):
"""
A wrapper learner for LightGBM classifier with Bayesian Optimization.
"""
def __init__(self, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.name = 'LightGBM BayesOpt'
def fit_storage(self, data):
return LightGBMModel(data)
class LightGBMModel(Model):
"""
A wrapper model for LightGBM classifier with Bayesian Optimization.
"""
def __init__(self, data):
super().__init__(data.domain)
self.domain = data.domain
self.lgbm, self.best_params = self.bayesian_optimization(data)
def bayesian_optimization(self, data):
# Define the hyperparameter search space
search_space = {
'learning_rate': Real(0.01, 0.5),
'n_estimators': Integer(100, 1000),
'num_leaves': Integer(20, 150),
'max_depth': Integer(3, 10),
'min_child_weight': Integer(1, 10),
'colsample_bytree': Real(0.1, 1.0)
}
# Setup LightGBM classifier within BayesSearchCV
lgbm = lgb.LGBMClassifier()
optimizer = BayesSearchCV(lgbm, search_space, n_iter=32, random_state=0, cv=3)
# Fit the model
optimizer.fit(data.X, data.Y.flatten())
# Return the best estimator and best parameters
return optimizer.best_estimator_, optimizer.best_params_
def predict(self, X):
return self.lgbm.predict(X)
def predict_storage(self, data):
X = data.X
predictions = self.predict(X)
probabilities = self.lgbm.predict_proba(X)
return predictions, probabilities
# Create the learner and train the classifier
if in_data:
out_learner = LightGBMLearner()
out_classifier = out_learner(in_data)
# Convert the optimal parameters to an Orange table
param_names = [StringVariable("Parameter")]
param_values = [ContinuousVariable("Value")]
domain = Domain(param_names, None, metas=param_values)
rows = [[name, np.array([value])] for name, value in out_classifier.best_params.items()]
out_params = Table.from_list(domain, rows)
# Get predictions and probabilities
predictions, probabilities = out_classifier.predict_storage(in_data)
# Create new variables for predictions and probabilities
prediction_var = StringVariable("Prediction")
prob_vars = [ContinuousVariable(f'P(Class={i})') for i in range(probabilities.shape[1])]
# Update the data table domain
new_domain = Domain(in_data.domain.attributes, in_data.domain.class_vars,
in_data.domain.metas + tuple([prediction_var] + prob_vars))
# Create the new table with predictions and probabilities
new_metas = np.hstack((in_data.metas,
predictions.reshape(-1, 1),
probabilities))
out_data = Table(new_domain, in_data.X, in_data.Y, new_metas)
else:
out_learner = None
out_classifier = None
out_data = None
out_params = None
I tried the above code and I expect to see the parameters optimized as coefficients in Curve Fit widget.