I have written the following custom evaluation function to use with xgboost, in order to optimize F1. Umfortuantely it returns an exception when run with xgboost.
The evaluation function is the following:
def F1_eval(preds, labels):
t = np.arange(0, 1, 0.005)
f = np.repeat(0, 200)
Results = np.vstack([t, f]).T
P = sum(labels == 1)
for i in range(200):
m = (preds >= Results[i, 0])
TP = sum(labels[m] == 1)
FP = sum(labels[m] == 0)
if (FP + TP) > 0:
Precision = TP/(FP + TP)
Recall = TP/P
if (Precision + Recall >0) :
F1 = 2 * Precision * Recall / (Precision + Recall)
else:
F1 = 0
Results[i, 1] = F1
return(max(Results[:, 1]))
Below I provide a reproducible example along with the error message:
from sklearn import datasets
Wine = datasets.load_wine()
X_wine = Wine.data
y_wine = Wine.target
y_wine[y_wine == 2] = 1
X_wine_train, X_wine_test, y_wine_train, y_wine_test = train_test_split(X_wine, y_wine, test_size = 0.2)
clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', \
booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, \
subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
clf_wine.fit(X_wine_train, y_wine_train,\
eval_set=[(X_wine_train, y_wine_train), (X_wine_test, y_wine_test)], eval_metric=F1_eval, early_stopping_rounds=10, verbose=True)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-453-452852658dd8> in <module>()
12 clf_wine = xgb.XGBClassifier(max_depth=6, learning_rate=0.1,silent=False, objective='binary:logistic', booster='gbtree', n_jobs=8, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=0.8, colsample_bytree=0.8, colsample_bylevel=1, reg_alpha=0, reg_lambda=1)
13
---> 14 clf_wine.fit(X_wine_train, y_wine_train,eval_set=[(X_wine_train, y_wine_train), (X_wine_test, y_wine_test)], eval_metric=F1_eval, early_stopping_rounds=10, verbose=True)
15
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\sklearn.py in fit(self, X, y, sample_weight, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set)
519 early_stopping_rounds=early_stopping_rounds,
520 evals_result=evals_result, obj=obj, feval=feval,
--> 521 verbose_eval=verbose, xgb_model=None)
522
523 self.objective = xgb_options["objective"]
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, learning_rates)
202 evals=evals,
203 obj=obj, feval=feval,
--> 204 xgb_model=xgb_model, callbacks=callbacks)
205
206
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\training.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
82 # check evaluation result.
83 if len(evals) != 0:
---> 84 bst_eval_set = bst.eval_set(evals, i, feval)
85 if isinstance(bst_eval_set, STRING_TYPES):
86 msg = bst_eval_set
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\core.py in eval_set(self, evals, iteration, feval)
957 if feval is not None:
958 for dmat, evname in evals:
--> 959 feval_ret = feval(self.predict(dmat), dmat)
960 if isinstance(feval_ret, list):
961 for name, val in feval_ret:
<ipython-input-383-dfb8d5181b18> in F1_eval(preds, labels)
11
12
---> 13 P = sum(labels == 1)
14
15
TypeError: 'bool' object is not iterable
I do not understand why the function is not working. I have followed the examples here: https://github.com/dmlc/xgboost/blob/master/demo/guide-python/custom_objective.py
I would like to understand where I err.
When doing
sum(labels == 1)
, Python evaluates labels == 1 as aBoolean
object, thus you getTypeError: 'bool' object is not iterable
The function
sum
expecting an iterable object, like a list. Here's an example of your error:If you want to use f1_score of scikit-learn you can implement the following wrapup:
params of the wrap up are
list
(of predictions) andDMatrix
, and it returns a string, floatHere you can see an example of how to implement custom objective function and custom evaluation metric
Example containing the following code:
which specify that an evaluation function gets as arguments (predictions, dtrain) dtrain is of type
DMatrix
and returns a string, float which is the name of the metric and the error.Adding working python code example
And if you want to implement _F1_eval to work specifically for xgboost evaluation methods add this: