While using a custom loss function in a multi class classification, I am getting an error that my custom objective function is not having calc_ders_range
attribute. However, as per my discussion in Telegram channel of catboost, calc_ders_range
is for single classification/regression. I am puzzled to get the error below even when I am passing my custom objective to CatBoostClassifier
.
My Code:
Output label is of int64
type with values from 0
to 25
representing 26 classes. Custom objective and accuracy metric are taken from Usage Examples
for sample https://catboost.ai/docs/concepts/python-usages-examples.html#user-defined-loss-function
class MyObjective(object):
def calc_ders_multi(self, approx, target, weight):
approx = np.array(approx) - max(approx)
exp_approx = np.exp(approx)
exp_sum = exp_approx.sum()
grad = []
hess = []
for j in range(len(approx)):
der1 = -exp_approx[j] / exp_sum
if j == target:
der1 += 1
hess_row = []
for j2 in range(len(approx)):
der2 = exp_approx[j] * exp_approx[j2] / (exp_sum**2)
if j2 == j:
der2 -= exp_approx[j] / exp_sum
hess_row.append(der2 * weight)
grad.append(der1 * weight)
hess.append(hess_row)
return (grad, hess)
class AccuracyMetric(object):
def get_final_error(self, error, weight):
return error / (weight + 1e-38)
def is_max_optimal(self):
return True
def evaluate(self, approxes, target, weight):
best_class = np.argmax(approxes, axis=0)
accuracy_sum = 0
weight_sum = 0
for i in range(len(target)):
w = 1.0 if weight is None else weight[i]
weight_sum += w
accuracy_sum += w * (best_class[i] == target[i])
return accuracy_sum, weight_sum
def get_pipeline(args):
"""Create a pipeline."""
pipeline_feat1 = Pipeline([
('selector', ColumnSelector(cols='feat1', drop_axis=True)),
('vec', TfidfVectorizer(tokenizer=word_tokenize)),
])
pipeline_feat2 = Pipeline([
('selector', ColumnSelector(cols='feat2', drop_axis=False)),
('imputer', SimpleImputer(missing_values=np.nan, strategy="constant", fill_value=0, copy=False)),
('ohe', OneHotEncoder(handle_unknown='ignore')),
])
pipeline_feat3 = Pipeline([
('selector', ColumnSelector(cols='feat3', drop_axis=False)),
('imputer', SimpleImputer(missing_values=np.nan, strategy="constant", fill_value=0, copy=False)),
('ohe', OneHotEncoder(handle_unknown='ignore')),
])
features = FeatureUnion([
('f1', pipeline_feat1),
('f2', pipeline_feat2),
('f3', pipeline_feat3),
])
steps = [
('features', features),
('clf', CatBoostClassifier(task_type='CPU', iterations=5000, random_seed=0,
loss_function=MyObjective(), eval_metric=AccuracyMetric(), verbose=100))
]
train_pipeline = Pipeline(steps)
params = {
"features__f1__vec__max_features": args.f1_max_features,
"features__f1__vec__ngram_range": (1, args.f1_max_ngram)
}
params = {k: v for k, v in params.items() if v is not None}
train_pipeline.set_params(**params)
return train_pipeline
# Train model.
pipeline = get_pipeline()
# Split train and test data.
X_train, X_val, y_train, y_val = train_test_split(df_train[['feat1', 'feat2', 'feat3']], df_train['label'], train_size=0.8, random_state=21)
model = pipeline.fit(X_train, y_train)
Error Message:
AttributeError Traceback (most recent call last)
_catboost.pyx in _catboost._ObjectiveCalcDersRange()
AttributeError: 'MyObjective' object has no attribute 'calc_ders_range'
During handling of the above exception, another exception occurred:
CatBoostError Traceback (most recent call last)
<ipython-input-12-ea20f154d788> in <module>
10 train_size=0.8,
11 random_state=21)
---> 12 model = pipeline.fit(X_train, y_train)
13
/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
333 if self._final_estimator != 'passthrough':
334 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 335 self._final_estimator.fit(Xt, y, **fit_params_last_step)
336
337 return self
/opt/conda/lib/python3.7/site-packages/catboost/core.py in fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
4296 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
4297 eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period,
-> 4298 silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
4299 return self
4300
/opt/conda/lib/python3.7/site-packages/catboost/core.py in _fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
1807 params,
1808 allow_clear_pool,
-> 1809 train_params["init_model"]
1810 )
1811
/opt/conda/lib/python3.7/site-packages/catboost/core.py in _train(self, train_pool, test_pool, params, allow_clear_pool, init_model)
1256
1257 def _train(self, train_pool, test_pool, params, allow_clear_pool, init_model):
-> 1258 self._object._train(train_pool, test_pool, params, allow_clear_pool, init_model._object if init_model else None)
1259 self._set_trained_model_attributes()
1260
_catboost.pyx in _catboost._CatBoost._train()
_catboost.pyx in _catboost._CatBoost._train()
CatBoostError: catboost/python-package/catboost/helpers.cpp:42: Traceback (most recent call last):
File "_catboost.pyx", line 1345, in _catboost._ObjectiveCalcDersRange
AttributeError: 'MyObjective' object has no attribute 'calc_ders_range'
I managed to solve this problem by explicitly setting the
classes_count
parameter. e.g.