I'm working on a imbalanced data (99.84 : 0.16) with hyperopt
The code as follow:
def objective(parameter):
train_ = lgb.Dataset(data=X_train,label=y_train)
model = lgb.train(parameter,train_set=train_)
y_pred=model.predict(X_val)
return {'loss':-roc_auc_score(y_valid,y_pred),'status':STATUS_OK}
parameter = {
'boosting_type':'gbdt','objective':'binary','metric':'auc', 'verbosity':'-1',
'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
'lambda_l1' : hp.choice('lambda_l1',[0, hp.loguniform('lambda_l1_positive',-16,2)]),
'lambda_l2' : hp.choice('lambda_l2',[0, hp.loguniform('lambda_l2_positive',-16,2)]),
'num_leaves' : scope.int(hp.quniform('num_leaves', 20, 100,1)),
'feature_fraction' : hp.uniform('feature_fraction', 0.25, 0.75),
'bagging_fraction' : hp.uniform('bagging_fraction', 0.25, 0.75),
'bagging_freq' : hp.randint('bagging_freq', 1, 4),
'min_child_samples' : hp.randint('min_child_samples', 5, 55),
'max_depth' : hp.choice('max_depth',range(1,6)),
'min_data_in_leaf' : scope.int(hp.qloguniform('min_data_in_leaf', 0, 6,1)),
'min_sum_hessian_in_leaf': hp.uniform('min_sum_hessian_in_leaf', 0.001, 0.01),
'min_split_gain' : hp.uniform('min_split_gain', 0.5, 1),
'min_child_weight' : hp.loguniform('min_child_weight',-16, 5),
'num_leaves' : hp.randint('num_leaves', 50, 250),
'path_smooth' : hp.randint('path_smooth', 1, 15),
'scale_pos_weight' : hp.randint('scale_pos_weight', 20, 225)
}
best_params = fmin(fn=objective, space=parameter, algo=tpe.suggest, max_evals=42)
I plot the distribution which order by predict_prob and group by each 3000 datapoint after got the best_params like this.
I wander to know how can I fix than make the plot more smooth and make the point not higher than previous point
