I am trying to do hyper paramter tuning with Hyperopt on latest version of both scikit learn and hyperopt. While doing it, it is showing an error in using max_features parameter as list ['auto','sqrt','log2']. It is saying it should be int.
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials
space={
'criterion':hp.choice('criterion',['entropy','gini']),
'max_depth':hp.quniform('max_depth',10,1200,10),
'max_features':hp.choice('max_features',['auto','sqrt','log2',None]),
'min_samples_leaf':hp.uniform('min_samples_leaf',0,0.5),
'min_samples_split':hp.uniform('min_samples_split',0,1),
'n_estimators':hp.choice('n_estimators',[10,50,300,750,1200,1300,1800,2000])
}
def objective(space):
model=RandomForestClassifier(criterion=space['criterion'],
max_depth=int(space['max_depth']),
#max_features=space['max_features'],
min_samples_leaf=space['min_samples_leaf'],
min_samples_split=space['min_samples_split'],
n_estimators=space['n_estimators']
)
accuracy=cross_val_score(model,X_train,Y_train,cv=5).mean()
return {'loss':-accuracy,'status':STATUS_OK}
trails=Trials()
best=fmin(fn=objective,
space=space,
algo=tpe.suggest,
max_evals=80,
trials=trails)
best
Error I am getting:
InvalidParameterError: The 'max_features' parameter of RandomForestClassifier must be an int in the range [1, inf), a float in the range (0.0, 1.0], a str among {'sqrt', 'log2'} or None. Got 'auto' instead.
Also if I comment out max_features the code runs perfect.
---------------------------------------------------------------------------
InvalidParameterError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_35460\1441699718.py in <module>
23
24 # Step 5: Run Bayesian Optimization
---> 25 best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100)
26
27 # Step 6: Retrieve best hyperparameters
~\Anaconda3\lib\site-packages\hyperopt\fmin.py in fmin(fn, space, algo, max_evals, timeout, loss_threshold, trials, rstate, allow_trials_fmin, pass_expr_memo_ctrl, catch_eval_exceptions, verbose, return_argmin, points_to_evaluate, max_queue_len, show_progressbar, early_stop_fn, trials_save_file)
584
585 # next line is where the fmin is actually executed
--> 586 rval.exhaust()
587
588 if return_argmin:
~\Anaconda3\lib\site-packages\hyperopt\fmin.py in exhaust(self)
362 def exhaust(self):
363 n_done = len(self.trials)
--> 364 self.run(self.max_evals - n_done, block_until_done=self.asynchronous)
365 self.trials.refresh()
366 return self
~\Anaconda3\lib\site-packages\hyperopt\fmin.py in run(self, N, block_until_done)
298 else:
299 # -- loop over trials and do the jobs directly
--> 300 self.serial_evaluate()
301
302 self.trials.refresh()
~\Anaconda3\lib\site-packages\hyperopt\fmin.py in serial_evaluate(self, N)
176 ctrl = base.Ctrl(self.trials, current_trial=trial)
177 try:
--> 178 result = self.domain.evaluate(spec, ctrl)
179 except Exception as e:
180 logger.error("job exception: %s" % str(e))
~\Anaconda3\lib\site-packages\hyperopt\base.py in evaluate(self, config, ctrl, attach_attachments)
890 print_node_on_error=self.rec_eval_print_node_on_error,
891 )
--> 892 rval = self.fn(pyll_rval)
893
894 if isinstance(rval, (float, int, np.number)):
~\AppData\Local\Temp\ipykernel_35460\1441699718.py in objective(params)
7 def objective(params):
8 clf = RandomForestClassifier(**params)
----> 9 clf.fit(X_train, Y_train)
10 y_pred = clf.predict(X_test)
11 accuracy = accuracy_score(Y_test, y_pred)
~\Anaconda3\lib\site-packages\sklearn\base.py in wrapper(estimator, *args, **kwargs)
1142
1143 if not global_skip_validation and not partial_fit_and_fitted:
-> 1144 estimator._validate_params()
1145
1146 with config_context(
~\Anaconda3\lib\site-packages\sklearn\base.py in _validate_params(self)
635 accepted constraints.
636 """
--> 637 validate_parameter_constraints(
638 self._parameter_constraints,
639 self.get_params(deep=False),
~\Anaconda3\lib\site-packages\sklearn\utils\_param_validation.py in validate_parameter_constraints(parameter_constraints, params, caller_name)
93 )
94
---> 95 raise InvalidParameterError(
96 f"The {param_name!r} parameter of {caller_name} must be"
97 f" {constraints_str}. Got {param_val!r} instead."
InvalidParameterError: The 'max_features' parameter of RandomForestClassifier must be an int in the range [1, inf), a float in the range (0.0, 1.0], a str among {'sqrt', 'log2'} or None. Got 'auto' instead.
RandomForestClassifierdoesn't accept"auto"formax_features=, so you need to remove"auto"from the list of parameters. The only acceptable values there are"sqrt","log2",None, and an integer or float.https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html