I tried making a function to get best feature for every model passed thorugh feature selection library. But getting the error.
ValueError: Found array with 1 feature(s) (shape=(1176, 1)) while a minimum of 2 is required.
for sfs.fit(xtrain,ytrain) in the following code
method = 'forward'
variance = 0
test_size = 0.2
random_state = 31
y_var = 'monthlyrate'
x = df.drop(y_var, axis = 1)
y = df[[y_var]]
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=test_size,random_state=random_state)
regress_models = [LinearRegression(), KNeighborsRegressor(), DecisionTreeRegressor(),
RandomForestRegressor(), AdaBoostRegressor(), SVR()]
model_dict = {}
if method == 'forward':
for model in regress_models:
sfs = SequentialFeatureSelector(model.fit(xtrain,ytrain),direction=method)
sfs.fit(xtrain,ytrain)
array = sfs.get_support()
feature_select = pd.DataFrame(array,index=x.columns)
true_feature = feature_select[feature_select.iloc[:,0]==True]
pc_feature = [list(true_feature.index)[:i+1] for i in range(len(list(true_feature.index)))]
r2_list = []
for i in pc_feature:
x = df[i]
y = df[[y_var]]
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=test_size,random_state=random_state)
model.fit(xtrain, ytrain)
ypredtrain = model.predict(xtrain)
train_r2 = r2_score(ytrain,ypredtrain)
train_r2_adjusted = 1- ((1-train_r2)*(len(xtrain)-1)/(len(xtrain)-len(x.columns)-1))
ypredtest = model.predict(xtest)
test_r2 = r2_score(ytest,ypredtest)
test_r2_adjusted = 1- ((1-test_r2)*(len(xtest)-1)/(len(xtest)-len(x.columns)-1))
r2_list.append([i,train_r2,test_r2,train_r2_adjusted,test_r2_adjusted])
r2_df = pd.DataFrame(r2_list, columns = ['features','train_r2','test_r2','train_r2_adjusted','test_r2_adjusted'])
r2_df['variance'] = round((r2_df['train_r2_adjusted'] - r2_df['test_r2_adjusted']),4)
if variance == 0:
model_dict[str(model).replace('()',"")] = r2_df.sort_values(by = 'variance')
else:
if len(r2_df[r2_df.variance <= variance]) == 0:
print('Try with different value')
else:
model_dict[str(model).replace('()',"")] = r2_df[r2_df.variance <= variance].sort_values(by = 'variance')
model_dict