Randomness in `ranger()` parameter tuning via different packages

36 views Asked by At

I'm trying to understand why different functions for hyperparameter tuning for ranger() random forests lead to different results. I have tried to control randomness in cross-validation fold splitting via creating the folds manually, and in model fitting via seeds arguments. Also, I've turned off parallel computing. Yet, differences remain.

Q: Are there differences in default settings of functions I have overlooked? Are there sources of randomness I forgot to consider?

For simplicity, in the example below, I focus on a grid search with only one parameter combination. I use five times repeated 10-fold CV.

data preparation

library(tidyverse)
library(ranger)
library(caret)
library(tidymodels)

df <- iris
df$Species <- factor(df$Species)


# split data into 5 x 10 = 50 folds manually once
set.seed(1)
folds <- map(1:5, function(x){
  
  foldid <- sample(c(rep(1:10, each = floor(nrow(df) / 10)), 
                     sample(1:10, size = nrow(df) - floor(nrow(df) / 10) * 10, replace = TRUE)), replace = FALSE)
  
  fold_in  <- map(1:10, function(i){which(foldid != i)})
  fold_out <- map(1:10, function(i){which(foldid == i)})
  
  list(fold_in = fold_in,
       fold_out = fold_out)
})
folds_in  <- flatten(flatten(folds)[names(flatten(folds)) == "fold_in"]) # 5 x 10 train sets
folds_out <- flatten(flatten(folds)[names(flatten(folds)) == "fold_out"]) # 5 x 10 holds-out sets

# define parameters
mtry = 2
min.node.size = 10

test set performances from for loop

score <- numeric(50)
for(i in 1:50){

  dat_out <- df[folds_out[[i]], ]
  dat_in <- df[folds_in[[i]], ]
  
  m <- ranger(
    Species ~ .,
    data = df,
    mtry = mtry,
    min.node.size = min.node.size,
    splitrule = "gini",
    num.trees = 1e3,
    seed = 1,
    replace = TRUE,
    sample.fraction = 1
  )
  
  prd <- predict(m, data = dat_out, set.seed = 1)
  
  score[i] <- mean(prd$predictions == dat_out$Species)
}

test set performances from {caret}

train_ctrl <- trainControl(method = "repeatedcv",
                           returnResamp = "all",
                           savePredictions = "all",
                           selectionFunction = "best",
                           classProbs = TRUE,
                           sampling = NULL,
                           search = "grid",
                           preProcOptions = NULL,
                           index = folds_in,
                           indexOut = folds_out,
                           allowParallel = FALSE,
                           seeds = as.list(rep(1, 50 + 1)))

caret_tune <- caret::train(
  Species ~ .,
  data = df,
  method = "ranger",
  num.trees = 1e3,
  replace = TRUE,
  sample.fraction = 1,
  trControl = train_ctrl,
  metric = "Accuracy",
  tuneGrid = expand.grid(
    splitrule = "gini",
    mtry = mtry,
    min.node.size = min.node.size
  )
)
score_ct <- caret_tune$resample$Accuracy

test set performances from {tidymodels}

train_folds <- map_dfr(1:length(folds_in), function(i){
  tibble(splits = list(make_splits(list("analysis" = folds_in[[i]], "assessment" = folds_out[[i]]),
                                   data = df)))
})

train_folds <- manual_rset(train_folds$splits,
                           ids = tibble(
                             id = paste0("Repeat", rep(1:5, each = 10)),
                             id2 = paste0("Fold", sprintf("%002d", rep(1:10, 5)))
                           ))

ranger_recipe <- recipe(formula = Species ~ ., data = df) 
ranger_spec <- rand_forest(mtry = tune(), min_n = tune(), trees = 1e3) %>%
  set_mode("classification") %>%
  set_engine("ranger",
             seed = 1,
             replace = TRUE,
             sample.fraction = 1,
             splitrule = "gini")
ranger_workflow <- workflow() %>% add_recipe(ranger_recipe) %>% add_model(ranger_spec)

ranger_tune <- tune_grid(
  ranger_workflow,
  resamples = train_folds,
  grid = tibble(mtry = mtry,
                min_n = min.node.size)
  )

score_tm <- unlist(lapply(ranger_tune$.metrics, FUN = function(x) x$.estimate[x$.estimator == "multiclass"]))

comparison of predictions

plot(score + rnorm(50, 0,0.002), score_ct + rnorm(50, 0,0.002),
     main = paste0("manual loop vs. caret; r = ", round(cor(score, score_ct),3)))

plot(score_tm + rnorm(50, 0,0.002), score_ct + rnorm(50, 0,0.002),
     main = paste0("tidymodels vs. caret; r = ", round(cor(score_ct, score_tm),3)))

compare

So we can see that across the 50 folds the accuracy estimates are correlated with the other methods when running the manual loop. But they are not identical.

0

There are 0 answers