I am trying to follow this tutorial over here : : https://hfshr.netlify.app/posts/2020-06-07-variable-inportance-with-fastshap/
This tutorial is about using a machine learning algorithm called "SHAP" that attempts to provide the user with a method to interpret the results of complicated "blackbox" style algorithms.
Following the tutorial, I was able to get everything to work - except the "force plots" at the end. I have provided the code I am using at the bottom. Could someone please help me in figuring out why these force plots are not working?
library(modeldata)
library(tidymodels)
library(tidyverse)
library(doParallel)
library(probably)
library(gt)
data("credit_data")
data("credit_data")
credit_data <- credit_data %>%
drop_na()
set.seed(12)
# initial split
split <- initial_split(credit_data, prop = 0.75, strata = "Status")
# train/test sets
train <- training(split)
test <- testing(split)
rec <- recipe(Status ~ ., data = train) %>%
step_bagimpute(Home, Marital, Job, Income, Assets, Debt) %>%
step_dummy(Home, Marital, Records, Job, one_hot = T)
# Just some sensible values, not optimised by any means!
mod <- boost_tree(trees = 500,
mtry = 6,
min_n = 10,
tree_depth = 5) %>%
set_engine("xgboost") %>%
set_mode("classification")
xgboost_wflow <- workflow() %>%
add_recipe(rec) %>%
add_model(mod) %>%
fit(train)
xg_res <- last_fit(xgboost_wflow,
split,
metrics = metric_set(roc_auc, pr_auc, accuracy))
preds <- xg_res %>%
collect_predictions()
xg_res %>%
collect_metrics()
library(vip)
# Get our model object
xg_mod <- pull_workflow_fit(xgboost_wflow)
vip(xg_mod$fit)
library(fastshap)
# Apply the preprocessing steps with prep and juice to the training data
X <- prep(rec, train) %>%
juice() %>%
select(-Status) %>%
as.matrix()
# Compute shapley values
shap <- explain(xg_mod$fit, X = X, exact = TRUE)
# Create a dataframe of our training data
feat <- prep(rec, train) %>%
juice()
autoplot(shap,
type = "dependence",
feature = "Amount",
X = feat,
smooth = TRUE,
color_by = "Status")
predict(xgboost_wflow, train, type = "prob") %>%
rownames_to_column("rowid") %>%
filter(.pred_bad == min(.pred_bad) | .pred_bad == max(.pred_bad)) %>%
gt()%>%
fmt_number(columns = 2:3,
decimals = 3)
library(patchwork)
p1 <- autoplot(shap, type = "contribution", row_num = 1541) +
ggtitle("Likely bad")
p2 <- autoplot(shap, type = "contribution", row_num = 1806) +
ggtitle("Likely good")
p1+p2
# here is the error (prior to running this code, I ran "pip install shap" in conda)
force_plot(object = shap[1541,],
feature_values = X[1541,],
display = "html",
link = "logit")
Error in py_call_impl(callable, dots$args, dots$keywords) :
TypeError: save_html() got an unexpected keyword argument 'plot_html'
Thank you
force_plot()
is rather experimental, and just happened to work. If you receive an error, make sure that you have the correspondingshap
package (and its dependencies) installed. In any case, you should report this issue on the fastshap GitHub repo: https://github.com/bgreenwell/fastshap/issues.--BG