I'm working on the Titanic dataset and one-hot applied to all columns. So everything 0 and 1.
However, I get the following error in train operation. I tried many things but I could not find the solution. I tried to explain the error as much as I can, I hope you can understand the problem easily. Thank you for your time.
Traceback (most recent call last):
File "titanic_classifier.py", line 277, in <module>
fire.Fire()
File "C:\Users\PC\Anaconda3\lib\site-packages\fire\core.py", line 138, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "C:\Users\PC\Anaconda3\lib\site-packages\fire\core.py", line 468, in _Fire
target=component.__name__)
File "C:\Users\PC\Anaconda3\lib\site-packages\fire\core.py", line 672, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "titanic_classifier.py", line 220, in train
n_sample = nsample if nsample is not None else m['n_sample'])
File "C:\Users\PC\Desktop\x\source\run_train.py", line 248, in run_train
dfXy, dfXytest = train(model_dict, dfXy, cols, post_process_fun)
File "C:\Users\PC\Desktop\x\source\run_train.py", line 125, in train
data_pars['train'] = {'Xtrain' : dfX[colsX].iloc[:itrain, :],
File "C:\Users\PC\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2908, in __getitem__
indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
File "C:\Users\PC\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 1254, in _get_listlike_indexer
self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
File "C:\Users\PC\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 1304, in _validate_read_indexer
raise KeyError(f"{not_found} not in index")
KeyError: "['space_svd_0', 'interaction_svd_1', 'transit_svd_0', 'summary_svd_1', 'last_scraped_month', 'neighborhood_overview_svd_0', 'host_name_svd_0', 'house_rules_svd_1', 'house_rules_svd_0', 'first_review_month', 'transit_svd_1', 'last_review_year', 'interaction_svd_0', 'first_review_day', 'last_scraped_day', 'summary_svd_0', 'access_svd_1', 'first_review_year', 'notes_svd_0', 'name_svd_1', 'space_svd_1', 'neighborhood_overview_svd_1', 'host_since_year', 'amenities_svd_1', 'amenities_svd_0', 'host_about_svd_0', 'host_name_svd_1', 'notes_svd_1', 'name_svd_0', 'last_review_day', 'last_scraped_year', 'description_svd_0', 'host_since_day', 'last_review_month', 'access_svd_0', 'host_about_svd_1', 'host_since_month', 'description_svd_1'] not in index"
The part of the code where the error came from run_train.py:
data_pars['train'] = {'Xtrain' : dfX[colsX].iloc[:itrain, :],
'ytrain' : dfX[coly].iloc[:itrain],
'Xtest' : dfX[colsX].iloc[itrain:ival, :],
'ytest' : dfX[coly].iloc[itrain:ival],
'Xval' : dfX[colsX].iloc[ival:, :],
'yval' : dfX[coly].iloc[ival:]
}