I'm new to python coding, and working on a project but stuck at coding part. I have one target variable and 23 relevant variables. My dataset is 11(simples)*23(descriptors) and one target dataset 11(simples)*1(target variable). How do I find the most relevant variables from these 23 descriptors with PCA dimensional reduction?
pca = PCA()
pca.fit(df2)
transformed = pca.transform(df2)
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
steps = [('pca', PCA()), ('m', LogisticRegression())]
model = Pipeline(steps=steps)`
from sklearn.preprocessing import MinMaxScaler
steps = [('norm', MinMaxScaler()), ('pca', PCA()), ('m', LogisticRegression())]
model = Pipeline(steps=steps)
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=11, n_features=23, n_informative=5, n_redundant=18, random_state=7)
print(X.shape, y.shape)
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_samples=11, n_features=23, n_informative=5, n_redundant=18, random_state=7)
steps = [('pca', PCA(n_components=4)), ('m', LogisticRegression())]
model = Pipeline(steps=steps)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
enter image description heredata
enter image description here I'm supposed to get this image but I don't know how to do.