I'm trying to perform a ridge regression on a large dataset with 11 predictor variables. I've pulled all the relevant variables from my CSV dataset and set them as matrices so that the glmnet library can access them. I've also split the dataset in half, with one half for training and one half for testing. While trying to run the first test fit model, I keep getting this error code Error in rep(1, N) : invalid 'times' argument which prevents the code from coming up with optimal values for lambda and it stops cold.

Based off of other posts, I've checked and made sure that there aren't any NA columns or rows that I am reading in. Other questions that I've found dealing with this error don't seem to use the glmnet library. I'm also using the "mgaussian" family because I'm testing multiple variables, but I tried it the standard "gaussian" family and it still comes up with the same error code.

library(glmnet) #use the glmnet library to perfrom ridge regression
SWVars = read.csv(file.choose('SWData'), header = TRUE) #read the data into Rstudio
n = 11940 #length of dataset
x = as.matrix(SWVars[0:12]) #read the desired variables in as a matrix
y = as.matrix(SWVars[16:16]) #read the desired response variable in as a matrix 
train_rows = sample(1:n, 0.5*n) #randomly designate half of the data as training rows
x.train = x[train_rows] #designate half of indepdent variable data for training
x.test = x[-train_rows] #designate the other half of independent variable data for testing
y.train = y[train_rows] #designate half of the dependent variable data for training
y.test = y[-train_rows] #designate the other half of dependent variable data for testing
#Fit a training curve for the data using cv.glmnet, minimising MSE, and using the mgaussian famility for multiple regression
alpha0.fit = cv.glmnet(x.train,y.train, type.measure="mse", alpha=0, family="mgaussian")
structure(list(
Latitude = c(33.37648429, 33.58147205, 43.76802869, 
33.55658479, 44.36456222, 40.16155115, 45.77329011, 36.81228138, 
39.37683345, 34.4202345), 
ABSLt = c(33.37648429, 33.58147205, 
43.76802869, 33.55658479, 44.36456222, 40.16155115, 45.77329011, 
36.81228138, 39.37683345, 34.4202345), 
Longitude = c(-111.9196013, 
-111.2821257, -103.5206581, -111.5104323, -101.0545158, -79.05296653, 
-99.64100853, -96.04132091, -89.02743535, -111.0896969), 
ABSLn = c(111.9196013, 
111.2821257, 103.5206581, 111.5104323, 101.0545158, 79.05296653, 
99.64100853, 96.04132091, 89.02743535, 111.0896969), 
Eleveation = c(360.29, 
583, 1581.8, 459, 597.43, 551.86, 562.5, 230.56, 195.91, 2220
), 
Deuiterium = c(-32.16640732, -60.6107658, -64.8100282, -61.11196959, 
-22.34856023, -58.2616656, -69.80240134, -12.77002745, -37.88557439, 
-55.65939053), 
ABSd2H = c(32.16640732, 60.6107658, 64.8100282, 
61.11196959, 22.34856023, 58.2616656, 69.80240134, 12.77002745, 
37.88557439, 55.65939053), 
Oxygen.18 = c(-0.679664825, -7.65316576, 
-6.660581453, -7.378091132, 0.207154673, -8.921727565, -7.789111383, 
-2.43286863, -5.066014096, -7.447887386), 
ABSd18O = c(0.679664825, 
7.65316576, 6.660581453, 7.378091132, 0.207154673, 8.921727565, 
7.789111383, 2.43286863, 5.066014096, 7.447887386), dex = c(-27L, 
1L, -12L, -2L, -24L, 13L, -7L, 7L, 3L, 4L), 
ABSdex = c(27L, 1L, 
12L, 2L, 24L, 13L, 7L, 7L, 3L, 4L), 
DOY = c(15L, 15L, 15L, 15L, 
15L, 15L, 15L, 15L, 15L, 15L), 
sine_DOY = c(0.128748177, 0.128748177, 
0.128748177, 0.128748177, 0.128748177, 0.128748177, 0.128748177, 
0.128748177, 0.128748177, 0.128748177), 
PDSI = c(-1.133137345, 
-1.133137345, -0.944772124, -1.163842678, 3.165101767, -2.081107855, 
-3.871144056, -1.90775156, -2.455032349, -2.285209417)), 
.Names = c("Latitude", 
"ABSLt", "Longitude", "ABSLn", "Eleveation", "Deuiterium", "ABSd2H", 
"Oxygen.18", "ABSd18O", "dex", "ABSdex", "DOY", "sine_DOY", "PDSI"
), row.names = c(NA, 10L), class = "data.frame")

I'm not sure what is causing this problem and I expected this part of the code to run smoothly. I would love to figure out what the issue is so that I can move forward with my regression. Thanks for any help that you can give.

0 Answers