diff --git a/slides/figures/overfitting-test-1.svg b/slides/figures/overfitting-test-1.svg new file mode 100644 index 00000000..0fb40f59 --- /dev/null +++ b/slides/figures/overfitting-test-1.svg @@ -0,0 +1,928 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +underfit + + + + + + + + + + +about right + + + + + + + + + + +overfit + + + + + + +-6 +-3 +0 +3 + + + + +-6 +-3 +0 +3 + + + + +-6 +-3 +0 +3 +-6 +-3 +0 +3 + + + + +Predictor A +Predictor B +Test Set + + diff --git a/slides/figures/overfitting-train-1.svg b/slides/figures/overfitting-train-1.svg new file mode 100644 index 00000000..5b3d6fc3 --- /dev/null +++ b/slides/figures/overfitting-train-1.svg @@ -0,0 +1,928 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +underfit + + + + + + + + + + +about right + + + + + + + + + + +overfit + + + + + + +-6 +-3 +0 +3 + + + + +-6 +-3 +0 +3 + + + + +-6 +-3 +0 +3 +-6 +-3 +0 +3 + + + + +Predictor A +Predictor B +Training Set + + diff --git a/slides/intro-04-evaluating-models.qmd b/slides/intro-04-evaluating-models.qmd index 79ffeb4f..7f8f09d3 100644 --- a/slides/intro-04-evaluating-models.qmd +++ b/slides/intro-04-evaluating-models.qmd @@ -332,11 +332,109 @@ Calibration plot: We bin observations according to predicted probability. In the ## Dangers of overfitting {.annotation} -![](https://raw.githubusercontent.com/topepo/2022-nyr-workshop/main/images/tuning-overfitting-train-1.svg) +```{r overfitting-train} +#| echo: false +data(parabolic) + +set.seed(15) +split <- initial_split(parabolic, strata = "class", prop = 1/2) + +training_set <- training(split) +testing_set <- testing(split) + +data_grid <- + crossing(X1 = seq(-6, 5, length = 200), + X2 = seq(-6, 5, length = 200)) + + +two_class_rec <- + recipe(class ~ ., data = parabolic) %>% + step_normalize(all_numeric_predictors()) + +svm_mod <- + svm_rbf(cost = tune(), rbf_sigma = 1) %>% + set_engine("kernlab") %>% + set_mode("classification") + +svm_wflow <- + workflow() %>% + add_recipe(two_class_rec) %>% + add_model(svm_mod) + +vals <- c("underfit", "about right", "overfit") +svm_res <- + tibble( + cost = c(0.005, 0.5, 5000), + label = factor(vals, levels = vals), + train = NA_real_, + test = NA_real_, + model = vector(mode = "list", length = 3) + ) + +for (i in 1:nrow(svm_res)) { + set.seed(27) + tmp_mod <- + svm_wflow %>% finalize_workflow(svm_res %>% slice(i) %>% select(cost)) %>% + fit(training_set) + svm_res$train[i] <- + roc_auc_vec(training_set$class, + predict(tmp_mod, training_set, type = "prob")$.pred_Class1) + svm_res$test[i] <- + roc_auc_vec(testing_set$class, + predict(tmp_mod, testing_set, type = "prob")$.pred_Class1) + svm_res$model[[i]] <- tmp_mod +} + + +te_plot <- + svm_res %>% + mutate(probs = map(model, ~ bind_cols( + data_grid, predict(.x, data_grid, type = "prob") + ))) %>% + dplyr::select(label, probs) %>% + unnest(cols = c(probs)) %>% + ggplot(aes(x = X1, y = X2)) + + geom_point( + data = testing_set, + aes(col = class), + alpha = .75, + cex = 1, + show.legend = FALSE + ) + + geom_contour(aes(z = .pred_Class1), breaks = 0.5, col = "black") + + facet_wrap( ~ label, nrow = 1) + + ggtitle("Test Set") + + labs(x = "Predictor A", y = "Predictor B") + +tr_plot <- + svm_res %>% + mutate(probs = map(model, ~ bind_cols( + data_grid, predict(.x, data_grid, type = "prob") + ))) %>% + dplyr::select(label, probs) %>% + unnest(cols = c(probs)) %>% + ggplot(aes(x = X1, y = X2)) + + geom_point( + data = training_set, + aes(col = class), + alpha = .75, + cex = 1, + show.legend = FALSE + ) + + geom_contour(aes(z = .pred_Class1), breaks = 0.5, col = "black") + + facet_wrap( ~ label, nrow = 1) + + ggtitle("Training Set") + + labs(x = "Predictor A", y = "Predictor B") + +tr_plot + lims(x = c(-6, 5), y = c(-6, 5)) +``` ## Dangers of overfitting ⚠️ -![](https://raw.githubusercontent.com/topepo/2022-nyr-workshop/main/images/tuning-overfitting-test-1.svg) +```{r overfitting-test} +#| echo: false +te_plot + lims(x = c(-6, 5), y = c(-6, 5)) +``` ## Dangers of overfitting ⚠️ `r hexes("yardstick")` diff --git a/slides/setup.R b/slides/setup.R index a9c280f4..43f3bc35 100644 --- a/slides/setup.R +++ b/slides/setup.R @@ -1,5 +1,6 @@ # packages needed to make the slides, but not needed for participants -# dev_pkgs <- c("countdown", "forcats", "hadley/emo", "sessioninfo", "svglite") +# dev_pkgs <- +# c("countdown", "forcats", "hadley/emo", "sessioninfo", "svglite", "kernlab") # pak::pak(dev_pkgs) # ----------------------------------------------------------------------