How to get manual prediction from caret's glmnet train object?

29 views Asked by At

I have a caret model trained using glmnet with cv and hyper parameter tuning. I need to get the prediction probability for each case manually. I am trying to multiply the variables by the model's coefficients, but I am getting different results than caret::predict.train. Not sure if it is the right way to do or I'm missing something. Here's my attempt to replicate the problem.

edited a typo in the code model$FinalModel$xNames instead of model$FinalModel$xNamespred

library(caret)
library(tidyverse)
set.seed(1)
df1 <- data.frame(dep_var = sample(c("No","Yes"), size =1000, replace = TRUE),
                  var1= runif(1000, min = 0, max= 100),
                  var2 = runif(1000, min = 50, max= 100),
                  var_cat= sample(c("Male", "Female"), size = 1000, replace = TRUE))
set.seed(1)
train <- sample(1:nrow(df1), 0.75*nrow(df1))                  
dftrain <- df1[train,]
dftest <- df1[-train,]
fmla <- as.formula(paste("dep_var", "~", paste(c('var1', 'var2', 'var_cat'), collapse = "+")))

train_obj <- trainControl(method = "repeatedcv", 
                          number= 100, 
                          repeats=3,
                          classProbs = TRUE,
                          preProcOptions = c("BoxCox", "scale", "zv"))
pr_grid <- expand.grid(alpha = seq(0,1, length=10),
                       lambda = seq(0.0001,10, length= 20))

# Model
set.seed(2)
model <- train(fmla, 
               data = dftrain, 
               method = "glmnet",
               family = "binomial",
               metric= "ROC",
               tuneGrid= pr_grid, 
               trControl=train_obj, 
               na.action = "na.omit")

dfx <- dftest[1,]
dfx$dep_var <- NULL
pred <- caret::predict.train(model, newdata = dfx, type='prob')

#changing the name and value of a categorical variable

dfx2 <- dfx
colnames(dfx2) <- model$finalModel$xNames
dfx2$var_catMale <- 1
dfx2$`(Intercept)` <- 1

dfx2<- select(dfx2, "(Intercept)", "var1", "var2", "var_catMale")

coef <- coef(model$finalModel, model$bestTune$lambda)

pred_man <- sum(as.matrix(dfx2) %*% as.matrix(coef))

isTRUE(pred$Yes == pred_man)

\> pred$Yes

[1] 0.5142378

\> pred_man

[1] 0.05696666

\> model$preProcess

NULL

0

There are 0 answers