From 38fe8a01f5eae1274e08e2921c1082dd09d02158 Mon Sep 17 00:00:00 2001 From: topepo Date: Tue, 12 Nov 2024 17:58:07 -0500 Subject: [PATCH 1/7] refactored compute_grid_info() using dplyr, purrr, and tidyr --- R/grid_helpers.R | 77 ++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/R/grid_helpers.R b/R/grid_helpers.R index 6fcad549..fe643ace 100644 --- a/R/grid_helpers.R +++ b/R/grid_helpers.R @@ -318,50 +318,69 @@ compute_grid_info <- function(workflow, grid) { res <- min_grid(extract_spec_parsnip(workflow), grid) + syms_pre <- rlang::syms(parameters_preprocessor$id) + syms_mod <- rlang::syms(parameters_model$id) + + # ---------------------------------------------------------------------------- + # Create an order of execution to train the preprocessor (if any). This will + # define a loop over any preprocessing tuning parameter combinations. if (any_parameters_preprocessor) { - res$.iter_preprocessor <- seq_len(nrow(res)) + pp_df <- + dplyr::distinct(res, !!!syms_pre) %>% + dplyr::arrange(!!!syms_pre) %>% + dplyr::mutate( + .iter_preprocessor = dplyr::row_number(), + .lab_pre = recipes::names0(max(dplyr::n()), "Preprocessor") + ) + res <- + dplyr::full_join(res, pp_df, by = parameters_preprocessor$id) %>% + dplyr::arrange(.iter_preprocessor) } else { res$.iter_preprocessor <- 1L + res$.lab_pre <- "Preprocessor1" } + # Make the label shown in the grid and in loggining res$.msg_preprocessor <- new_msgs_preprocessor( - seq_len(max(res$.iter_preprocessor)), + res$.iter_preprocessor, max(res$.iter_preprocessor) ) - if (nrow(res) != nrow(grid) || - (any_parameters_model && !any_parameters_preprocessor)) { - res$.iter_model <- seq_len(dplyr::n_distinct(res[parameters_model$id])) - } else { - res$.iter_model <- 1L - } - - res$.iter_config <- list(list()) - for (row in seq_len(nrow(res))) { - res$.iter_config[row] <- list(iter_config(res[row, ])) - } + # ---------------------------------------------------------------------------- + # Now make a similar iterator across models. Conditioning on each unique + # preprocessing candidate set, make an iterator for the model candidate sets + # (if any) + + res <- + res %>% + dplyr::group_nest(.iter_preprocessor, keep = TRUE) %>% + dplyr::mutate( + .iter_config = purrr::map(data, make_iter_config), + .model = purrr::map(data, ~ tibble::tibble(.iter_model = seq_len(nrow(.x)))) + ) %>% + dplyr::select(-.iter_preprocessor) %>% + tidyr::unnest(cols = c(data, .model, .iter_config)) %>% + dplyr::select(-.lab_pre) %>% + dplyr::relocate(dplyr::starts_with(".iter")) res$.msg_model <- - new_msgs_model(i = res$.iter_model, n = max(res$.iter_model), res$.msg_preprocessor) + new_msgs_model(i = res$.iter_model, + n = max(res$.iter_model), + res$.msg_preprocessor) - res + res %>% + dplyr::relocate(dplyr::starts_with(".msg")) } -iter_config <- function(res_row) { - submodels <- res_row$.submodels[[1]] - if (identical(submodels, list())) { - models <- res_row$.iter_model - } else { - models <- seq_len(length(submodels[[1]]) + 1) - } - - paste0( - "Preprocessor", - res_row$.iter_preprocessor, - "_Model", - format_with_padding(models) - ) +make_iter_config <- function(dat) { + # Compute labels for the models *within* each preprocessing loop. + num_submodels <- purrr::map_int(dat$.submodels, ~ length(unlist(.x))) + num_models <- sum(num_submodels + 1) # +1 for the model being trained + .mod_label <- recipes::names0(num_models, "Model") + .iter_config <- paste(dat$.lab_pre[1], .mod_label, sep = "_") + .iter_config <- vctrs::vec_chop(.iter_config, sizes = num_submodels + 1) + tibble::tibble(.iter_config = .iter_config) } # This generates a "dummy" grid_info object that has the same From 2a78da3477efc77a4fe64e128fcf7318b4c8dfb2 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 13 Nov 2024 10:20:05 -0500 Subject: [PATCH 2/7] remove padding in .config --- R/grid_helpers.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/grid_helpers.R b/R/grid_helpers.R index fe643ace..102d496f 100644 --- a/R/grid_helpers.R +++ b/R/grid_helpers.R @@ -330,7 +330,7 @@ compute_grid_info <- function(workflow, grid) { dplyr::arrange(!!!syms_pre) %>% dplyr::mutate( .iter_preprocessor = dplyr::row_number(), - .lab_pre = recipes::names0(max(dplyr::n()), "Preprocessor") + .lab_pre = paste0("Preprocessor", 1:dplyr::n()) ) res <- dplyr::full_join(res, pp_df, by = parameters_preprocessor$id) %>% @@ -377,7 +377,7 @@ make_iter_config <- function(dat) { # Compute labels for the models *within* each preprocessing loop. num_submodels <- purrr::map_int(dat$.submodels, ~ length(unlist(.x))) num_models <- sum(num_submodels + 1) # +1 for the model being trained - .mod_label <- recipes::names0(num_models, "Model") + .mod_label <- paste0("Model", 1:num_models) .iter_config <- paste(dat$.lab_pre[1], .mod_label, sep = "_") .iter_config <- vctrs::vec_chop(.iter_config, sizes = num_submodels + 1) tibble::tibble(.iter_config = .iter_config) From a5d94aa82a75d20a27be9fb2e0fe47908b9f5475 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 13 Nov 2024 10:20:24 -0500 Subject: [PATCH 3/7] sort values for tests --- tests/testthat/test-grid_helpers.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-grid_helpers.R b/tests/testthat/test-grid_helpers.R index e0acfe1a..a78efa1d 100644 --- a/tests/testthat/test-grid_helpers.R +++ b/tests/testthat/test-grid_helpers.R @@ -15,7 +15,7 @@ test_that("compute_grid_info - recipe only", { expect_equal(res$.iter_preprocessor, 1:5) expect_equal(res$.msg_preprocessor, paste0("preprocessor ", 1:5, "/5")) - expect_equal(res$deg_free, grid$deg_free) + expect_equal(sort(res$deg_free), sort(grid$deg_free)) expect_equal(res$.iter_model, rep(1, 5)) expect_equal(res$.iter_config, as.list(paste0("Preprocessor", 1:5, "_Model1"))) expect_equal(res$.msg_model, paste0("preprocessor ", 1:5, "/5, model 1/1")) @@ -27,6 +27,7 @@ test_that("compute_grid_info - recipe only", { ignore.order = TRUE ) expect_equal(nrow(res), 5) + expect_equal(vctrs::vec_unique_count(res$.iter_config), nrow(grid)) }) test_that("compute_grid_info - model only (no submodels)", { @@ -57,6 +58,7 @@ test_that("compute_grid_info - model only (no submodels)", { ignore.order = TRUE ) expect_equal(nrow(res), 5) + expect_equal(vctrs::vec_unique_count(res$.iter_config), nrow(grid)) }) test_that("compute_grid_info - model only (with submodels)", { @@ -107,8 +109,8 @@ test_that("compute_grid_info - recipe and model (no submodels)", { expect_equal(res$.iter_preprocessor, 1:5) expect_equal(res$.msg_preprocessor, paste0("preprocessor ", 1:5, "/5")) - expect_equal(res$learn_rate, grid$learn_rate) - expect_equal(res$deg_free, grid$deg_free) + expect_equal(sort(res$learn_rate), sort(grid$learn_rate)) + expect_equal(sort(res$deg_free), sort(grid$deg_free)) expect_equal(res$.iter_model, rep(1, 5)) expect_equal(res$.iter_config, as.list(paste0("Preprocessor", 1:5, "_Model1"))) expect_equal(res$.msg_model, paste0("preprocessor ", 1:5, "/5, model 1/1")) @@ -120,6 +122,7 @@ test_that("compute_grid_info - recipe and model (no submodels)", { ignore.order = TRUE ) expect_equal(nrow(res), 5) + expect_equal(vctrs::vec_unique_count(res$.iter_config), nrow(grid)) }) test_that("compute_grid_info - recipe and model (with submodels)", { @@ -169,6 +172,7 @@ test_that("compute_grid_info - recipe and model (with submodels)", { ) expect_equal(nrow(res), 3) }) + test_that("compute_grid_info - recipe and model (with and without submodels)", { library(workflows) library(parsnip) @@ -193,7 +197,7 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { unique(res$.msg_preprocessor), paste0("preprocessor ", 1:5, "/5") ) - expect_equal(res$trees, c(rep(max(grid$trees), 10), 1)) + expect_equal(sort(res$trees), sort(c(rep(max(grid$trees), 10), 1))) expect_equal(unique(res$.iter_model), 1:3) expect_equal( res$.iter_config[1:3], @@ -325,4 +329,5 @@ test_that("compute_grid_info - recipe and model (no submodels but has inner grid ignore.order = TRUE ) expect_equal(nrow(res), 9) + expect_equal(vctrs::vec_unique_count(res$.iter_config), nrow(grid)) }) From d71936066a7e93f7b1a95bd54754d1e6d5129251 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 13 Nov 2024 11:16:35 -0500 Subject: [PATCH 4/7] update test specification for different sorting --- tests/testthat/test-grid_helpers.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-grid_helpers.R b/tests/testthat/test-grid_helpers.R index a78efa1d..428617e8 100644 --- a/tests/testthat/test-grid_helpers.R +++ b/tests/testthat/test-grid_helpers.R @@ -189,7 +189,9 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { # use grid_regular to (partially) trigger submodel trick set.seed(1) param_set <- extract_parameter_set_dials(wflow) - grid <- bind_rows(grid_regular(param_set), grid_space_filling(param_set)) + grid <- + bind_rows(grid_regular(param_set), grid_space_filling(param_set)) %>% + arrange(deg_free, loss_reduction, trees) res <- compute_grid_info(wflow, grid) expect_equal(length(unique(res$.iter_preprocessor)), 5) @@ -200,14 +202,17 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { expect_equal(sort(res$trees), sort(c(rep(max(grid$trees), 10), 1))) expect_equal(unique(res$.iter_model), 1:3) expect_equal( - res$.iter_config[1:3], + res$.iter_config[res$.iter_preprocessor == 1], list( c("Preprocessor1_Model1", "Preprocessor1_Model2", "Preprocessor1_Model3", "Preprocessor1_Model4"), - c("Preprocessor2_Model1", "Preprocessor2_Model2", "Preprocessor2_Model3"), - c("Preprocessor3_Model1", "Preprocessor3_Model2", "Preprocessor3_Model3") + c("Preprocessor1_Model5", "Preprocessor1_Model6", "Preprocessor1_Model7"), + c("Preprocessor1_Model8", "Preprocessor1_Model9", "Preprocessor1_Model10") ) ) - expect_equal(res$.msg_model[1:3], paste0("preprocessor ", 1:3, "/5, model 1/3")) + expect_equal( + res$.msg_model[res$.iter_preprocessor == 1], + paste0("preprocessor 1/5, model ", 1:3, "/3") + ) expect_equal( res$.submodels[1:3], list( @@ -216,6 +221,12 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { list(trees = c(1L, 1000L)) ) ) + expect_equal( + res %>% + mutate(num_models = purrr::map_int(.iter_config, length)) %>% + summarize(n = sum(num_models), .by = c(deg_free)), + grid %>% count(deg_free) + ) expect_named( res, c(".iter_preprocessor", ".msg_preprocessor", "deg_free", "trees", From 7450c12153c00b7c9432f37ea7636b8b3ee8e0f1 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 13 Nov 2024 11:16:53 -0500 Subject: [PATCH 5/7] fix bug in the messages --- R/grid_helpers.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/grid_helpers.R b/R/grid_helpers.R index 102d496f..bca9d80d 100644 --- a/R/grid_helpers.R +++ b/R/grid_helpers.R @@ -357,7 +357,8 @@ compute_grid_info <- function(workflow, grid) { dplyr::group_nest(.iter_preprocessor, keep = TRUE) %>% dplyr::mutate( .iter_config = purrr::map(data, make_iter_config), - .model = purrr::map(data, ~ tibble::tibble(.iter_model = seq_len(nrow(.x)))) + .model = purrr::map(data, ~ tibble::tibble(.iter_model = seq_len(nrow(.x)))), + .num_models = purrr::map_int(.model, nrow) ) %>% dplyr::select(-.iter_preprocessor) %>% tidyr::unnest(cols = c(data, .model, .iter_config)) %>% @@ -366,10 +367,11 @@ compute_grid_info <- function(workflow, grid) { res$.msg_model <- new_msgs_model(i = res$.iter_model, - n = max(res$.iter_model), + n = res$.num_models, res$.msg_preprocessor) res %>% + dplyr::select(-.num_models) %>% dplyr::relocate(dplyr::starts_with(".msg")) } From 39359873e5e5fb40e181dcbe0628e9cff8dd6cba Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 13 Nov 2024 12:12:44 -0500 Subject: [PATCH 6/7] update snapshots with new remotes --- tests/testthat/_snaps/bayes.md | 12 ++++++------ tests/testthat/_snaps/checks.md | 16 ++++++++++++++++ tests/testthat/_snaps/grid.md | 12 ++++++------ tests/testthat/_snaps/resample.md | 10 +++++----- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/tests/testthat/_snaps/bayes.md b/tests/testthat/_snaps/bayes.md index d2e2f99f..4d50a185 100644 --- a/tests/testthat/_snaps/bayes.md +++ b/tests/testthat/_snaps/bayes.md @@ -393,12 +393,12 @@ Message x Fold1: preprocessor 1/1: Error in `step_spline_b()`: - Caused by error in `spline_msg()`: - ! Error in if (df < 0) : missing value where TRUE/FALSE needed + Caused by error in `prep()`: + ! `deg_free` must be a whole number, not a numeric `NA`. x Fold2: preprocessor 1/1: Error in `step_spline_b()`: - Caused by error in `spline_msg()`: - ! Error in if (df < 0) : missing value where TRUE/FALSE needed + Caused by error in `prep()`: + ! `deg_free` must be a whole number, not a numeric `NA`. Condition Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. @@ -415,10 +415,10 @@ Message x Fold1: preprocessor 1/1: Error in `get_all_predictors()`: - ! The following predictors were not found in `data`: 'z'. + ! The following predictor was not found in `data`: "z". x Fold2: preprocessor 1/1: Error in `get_all_predictors()`: - ! The following predictors were not found in `data`: 'z'. + ! The following predictor was not found in `data`: "z". Condition Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. diff --git a/tests/testthat/_snaps/checks.md b/tests/testthat/_snaps/checks.md index 94524b2d..d98ea891 100644 --- a/tests/testthat/_snaps/checks.md +++ b/tests/testthat/_snaps/checks.md @@ -92,6 +92,22 @@ Error in `tune:::check_workflow()`: ! A parsnip model is required. +# errors informatively when needed package isn't installed + + Code + check_workflow(stan_wflow) + Condition + Error: + ! Package install is required for rstanarm. + +--- + + Code + fit_resamples(stan_wflow, rsample::bootstraps(mtcars)) + Condition + Error in `fit_resamples()`: + ! Package install is required for rstanarm. + # workflow objects (will not tune, tidymodels/tune#548) Code diff --git a/tests/testthat/_snaps/grid.md b/tests/testthat/_snaps/grid.md index 4f64bce4..a975cfb4 100644 --- a/tests/testthat/_snaps/grid.md +++ b/tests/testthat/_snaps/grid.md @@ -8,12 +8,12 @@ Message x Fold1: preprocessor 1/1: Error in `step_spline_b()`: - Caused by error in `spline_msg()`: - ! Error in if (df < 0) : missing value where TRUE/FALSE needed + Caused by error in `prep()`: + ! `deg_free` must be a whole number, not a numeric `NA`. x Fold2: preprocessor 1/1: Error in `step_spline_b()`: - Caused by error in `spline_msg()`: - ! Error in if (df < 0) : missing value where TRUE/FALSE needed + Caused by error in `prep()`: + ! `deg_free` must be a whole number, not a numeric `NA`. Condition Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. @@ -28,10 +28,10 @@ Message x Fold1: preprocessor 1/1: Error in `get_all_predictors()`: - ! The following predictors were not found in `data`: 'z'. + ! The following predictor was not found in `data`: "z". x Fold2: preprocessor 1/1: Error in `get_all_predictors()`: - ! The following predictors were not found in `data`: 'z'. + ! The following predictor was not found in `data`: "z". Condition Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. diff --git a/tests/testthat/_snaps/resample.md b/tests/testthat/_snaps/resample.md index e768f80c..9fc0e154 100644 --- a/tests/testthat/_snaps/resample.md +++ b/tests/testthat/_snaps/resample.md @@ -5,12 +5,12 @@ Message x Fold1: preprocessor 1/1: Error in `step_spline_natural()`: - Caused by error in `spline_msg()`: - ! Error in if (df < 2) : missing value where TRUE/FALSE needed + Caused by error in `prep()`: + ! `deg_free` must be a whole number, not a numeric `NA`. x Fold2: preprocessor 1/1: Error in `step_spline_natural()`: - Caused by error in `spline_msg()`: - ! Error in if (df < 2) : missing value where TRUE/FALSE needed + Caused by error in `prep()`: + ! `deg_free` must be a whole number, not a numeric `NA`. Condition Warning: All models failed. Run `show_notes(.Last.tune.result)` for more information. @@ -20,7 +20,7 @@ Code note Output - [1] "Error in `step_spline_natural()`:\nCaused by error in `spline_msg()`:\n! Error in if (df < 2) { : missing value where TRUE/FALSE needed" + [1] "Error in `step_spline_natural()`:\nCaused by error in `prep()`:\n! `deg_free` must be a whole number, not a numeric `NA`." # failure in variables tidyselect specification is caught elegantly From cd1a79f5d1ca90ec57d0204f77291ffa7ebdd7c0 Mon Sep 17 00:00:00 2001 From: topepo Date: Wed, 13 Nov 2024 17:09:47 -0500 Subject: [PATCH 7/7] added padding back --- R/0_imports.R | 2 +- R/grid_helpers.R | 4 ++-- tests/testthat/test-grid_helpers.R | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/0_imports.R b/R/0_imports.R index de3353c5..c7e4b448 100644 --- a/R/0_imports.R +++ b/R/0_imports.R @@ -47,7 +47,7 @@ utils::globalVariables( "rowwise", ".best", "location", "msg", "..object", ".eval_time", ".pred_survival", ".pred_time", ".weight_censored", "nice_time", "time_metric", ".lower", ".upper", "i", "results", "term", ".alpha", - ".method", "old_term" + ".method", "old_term", ".lab_pre", ".model", ".num_models" ) ) diff --git a/R/grid_helpers.R b/R/grid_helpers.R index bca9d80d..365386d5 100644 --- a/R/grid_helpers.R +++ b/R/grid_helpers.R @@ -330,7 +330,7 @@ compute_grid_info <- function(workflow, grid) { dplyr::arrange(!!!syms_pre) %>% dplyr::mutate( .iter_preprocessor = dplyr::row_number(), - .lab_pre = paste0("Preprocessor", 1:dplyr::n()) + .lab_pre = recipes::names0(max(dplyr::n()), "Preprocessor") ) res <- dplyr::full_join(res, pp_df, by = parameters_preprocessor$id) %>% @@ -379,7 +379,7 @@ make_iter_config <- function(dat) { # Compute labels for the models *within* each preprocessing loop. num_submodels <- purrr::map_int(dat$.submodels, ~ length(unlist(.x))) num_models <- sum(num_submodels + 1) # +1 for the model being trained - .mod_label <- paste0("Model", 1:num_models) + .mod_label <- recipes::names0(num_models, "Model") .iter_config <- paste(dat$.lab_pre[1], .mod_label, sep = "_") .iter_config <- vctrs::vec_chop(.iter_config, sizes = num_submodels + 1) tibble::tibble(.iter_config = .iter_config) diff --git a/tests/testthat/test-grid_helpers.R b/tests/testthat/test-grid_helpers.R index 428617e8..dff242c9 100644 --- a/tests/testthat/test-grid_helpers.R +++ b/tests/testthat/test-grid_helpers.R @@ -204,9 +204,9 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { expect_equal( res$.iter_config[res$.iter_preprocessor == 1], list( - c("Preprocessor1_Model1", "Preprocessor1_Model2", "Preprocessor1_Model3", "Preprocessor1_Model4"), - c("Preprocessor1_Model5", "Preprocessor1_Model6", "Preprocessor1_Model7"), - c("Preprocessor1_Model8", "Preprocessor1_Model9", "Preprocessor1_Model10") + c("Preprocessor1_Model01", "Preprocessor1_Model02", "Preprocessor1_Model03", "Preprocessor1_Model04"), + c("Preprocessor1_Model05", "Preprocessor1_Model06", "Preprocessor1_Model07"), + c("Preprocessor1_Model08", "Preprocessor1_Model09", "Preprocessor1_Model10") ) ) expect_equal(