I'm trying to load in some MRI grey matter map images as numpy arrays so they can be processed in sklearn. My code imports them and loads them using NiftiMasker (NiftiMasker applies a mask to the image to extract a time series, which is what I believe is the 4th dimension in the numpy array that is causing the error). I'm getting a ValueError when I try to run this code because for some reason the data is being loaded in as a 4D numpy array when all I need is a 3D array. Is there a way to reduce the dimensions of the array or some other way to solve this error? Here is code:
import nibabel
import numpy as np
import matplotlib.pyplot as plt
from nilearn.input_data import NiftiMasker
import pandas as pd
import pdb
import os
def getgm(path):
"""Gets the subject number on the folder that contains the scans
and appends them on a list called dirnames"""
gmnames = []
paths = []
for root, dirs, files in os.walk(path):
for filename in files:
gmnames.append(filename)
for gmname in gmnames:
path = os.path.join('Z:\Projects\MTS_Project\spm\dataset', gmname)
paths.append(path)
return paths
print(getgm('Z:\Projects\MTS_Project\spm\dataset'))
#loads MTSData as a list of paths
MTS_data = getgm('Z:\Projects\MTS_Project\spm\dataset')
print(MTS_data[0])
#loads side data from mtsdata.csv
fields = ['ID', 'SIDE']
csv = pd.read_csv('mtsdata.csv', skipinitialspace = True, usecols = fields)
age = csv.SIDE
nifti_masker = NiftiMasker(standardize = False, smoothing_fwhm=2, verbose=1)
gm_maps_masked = []
for data in MTS_data:
gm_maps_masked.append(nifti_masker.fit_transform(data))
#nifti_masker.fit(MTS_data)
#gm_maps_masked = nifti_masker.transform(MTS_data)
#print(gm_maps_masked)
#n_features = gm_maps_masked.shape
#print(n_features)
print("ANOVA + SVR")
# Define the prediction function to be used.
# Here we use a Support Vector Classification, with a linear kernel
from sklearn.svm import SVR
svr = SVR(kernel='linear')
# Dimension reduction
from sklearn.feature_selection import VarianceThreshold, SelectKBest, \
f_regression
# Remove features with too low between-subject variance
variance_threshold = VarianceThreshold(threshold=1)
# Here we use a classical univariate feature selection based on F-test,
# namely Anova.
feature_selection = SelectKBest(f_regression, k=2000)
# We have our predictor (SVR), our feature selection (SelectKBest), and now,
# we can plug them together in a *pipeline* that performs the two operations
# successively:
from sklearn.pipeline import Pipeline
anova_svr = Pipeline([
('variance_threshold', variance_threshold),
('anova', feature_selection),
('svr', svr)])
### Fit and predict
for gmmap in gm_maps_masked:
anova_svr.fit(gm_maps_masked,age)
pdb.set_trace()
age_pred = anova_svr.predict(gm_maps_masked)
Here is error and traceback:
Traceback (most recent call last):
File "learn.py", line 81, in <module>
anova_svr.fit(gm_maps_masked,age)
File "C:\Program Files\Python36\lib\site-packages\sklearn\pipeline.py", line 248, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "C:\Program Files\Python36\lib\site-packages\sklearn\pipeline.py", line 213, in _fit
**fit_params_steps[name])
File "C:\Program Files\Python36\lib\site-packages\sklearn\externals\joblib\memory.py", line 362, in __call__
return self.func(*args, **kwargs)
File "C:\Program Files\Python36\lib\site-packages\sklearn\pipeline.py", line 581, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Program Files\Python36\lib\site-packages\sklearn\base.py", line 520, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "C:\Program Files\Python36\lib\site-packages\sklearn\feature_selection\variance_threshold.py", line 64, in fit
X = check_array(X, ('csr', 'csc'), dtype=np.float64)
File "C:\Program Files\Python36\lib\site-packages\sklearn\utils\validation.py", line 433, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy)
ValueError: could not broadcast input array from shape (4518515) into shape (1)