From 4b853c46fdbefdcda025070b7d33bf00131a9054 Mon Sep 17 00:00:00 2001 From: topepo Date: Fri, 17 Oct 2025 10:06:11 -0400 Subject: [PATCH 1/4] changes for #914 --- NEWS.md | 2 ++ R/control.R | 17 ++++++++++---- R/tune_grid.R | 2 +- man/control_bayes.Rd | 7 +++++- man/control_grid.Rd | 10 +++++++-- tests/testthat/_snaps/control.md | 11 +++++++++ tests/testthat/test-control.R | 38 ++++++++++++++++++++++++++++++++ 7 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/_snaps/control.md create mode 100644 tests/testthat/test-control.R diff --git a/NEWS.md b/NEWS.md index 215d57e1b..e092f4dbc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ * Fixed a bug where `int_pctl()` wouldn't work on `last_fit()` outcomes when future parallelism was enabled. (#1099) +* The warning threshold when check the size of a workflow is now a parameter to the control functions and has a new default of 100MB. (#914) + # tune 2.0.0 ## Changes to `tune_grid()`. diff --git a/R/control.R b/R/control.R index 388ed88ff..d16886b28 100644 --- a/R/control.R +++ b/R/control.R @@ -37,7 +37,8 @@ control_grid <- function( save_workflow = FALSE, event_level = "first", parallel_over = NULL, - backend_options = NULL + backend_options = NULL, + workflow_size = 100.0 ) { # Any added arguments should also be added in superset control functions # in other packages @@ -50,6 +51,7 @@ control_grid <- function( check_string(event_level) check_character(pkgs, allow_null = TRUE) check_function(extract, allow_null = TRUE) + check_number_decimal(workflow_size) val_parallel_over(parallel_over, "control_grid()") @@ -62,7 +64,8 @@ control_grid <- function( save_workflow = save_workflow, event_level = event_level, parallel_over = parallel_over, - backend_options = backend_options + backend_options = backend_options, + workflow_size = workflow_size ) class(res) <- c("control_grid", "control_resamples") @@ -200,6 +203,9 @@ print.control_last_fit <- function(x, ...) { #' backend. Defaults to `NULL` for default backend options. #' @param allow_par A logical to allow parallel processing (if a parallel #' backend is registered). +#' @param workflow_size A non-negative number that is used as a threshold for a +#' warning regarding the size of the workflow. Only used when +#' `save_workflow = TRUE`. #' #' @inheritSection collect_predictions Hyperparameters and extracted objects #' @@ -240,7 +246,8 @@ control_bayes <- event_level = "first", parallel_over = NULL, backend_options = NULL, - allow_par = TRUE + allow_par = TRUE, + workflow_size = 100.0 ) { # Any added arguments should also be added in superset control functions # in other packages @@ -257,6 +264,7 @@ control_bayes <- check_number_whole(no_improve, min = 0, allow_infinite = TRUE) check_number_whole(uncertain, min = 0, allow_infinite = TRUE) check_number_whole(seed) + check_number_decimal(workflow_size) check_time_limit_arg(time_limit) @@ -285,7 +293,8 @@ control_bayes <- save_gp_scoring = save_gp_scoring, event_level = event_level, parallel_over = parallel_over, - backend_options = backend_options + backend_options = backend_options, + workflow_size = workflow_size ) class(res) <- "control_bayes" diff --git a/R/tune_grid.R b/R/tune_grid.R index 5080c5a9f..5d2cff7a0 100644 --- a/R/tune_grid.R +++ b/R/tune_grid.R @@ -439,7 +439,7 @@ set_workflow <- function(workflow, control) { if (!is.null(workflow$pre$actions$recipe)) { w_size <- utils::object.size(workflow$pre$actions$recipe) # make 5MB cutoff - if (w_size / 1024^2 > 5) { + if (w_size / 1024^2 > control$workflow_size) { msg <- paste0( "The workflow being saved contains a recipe, which is ", diff --git a/man/control_bayes.Rd b/man/control_bayes.Rd index 9fdce80d6..1eb011537 100644 --- a/man/control_bayes.Rd +++ b/man/control_bayes.Rd @@ -19,7 +19,8 @@ control_bayes( event_level = "first", parallel_over = NULL, backend_options = NULL, - allow_par = TRUE + allow_par = TRUE, + workflow_size = 100 ) } \arguments{ @@ -115,6 +116,10 @@ backend. Defaults to \code{NULL} for default backend options.} \item{allow_par}{A logical to allow parallel processing (if a parallel backend is registered).} + +\item{workflow_size}{A non-negative number that is used as a threshold for a +warning regarding the size of the workflow. Only used when +\code{save_workflow = TRUE}.} } \description{ Control aspects of the Bayesian search process diff --git a/man/control_grid.Rd b/man/control_grid.Rd index c800898a2..d1cbf7925 100644 --- a/man/control_grid.Rd +++ b/man/control_grid.Rd @@ -15,7 +15,8 @@ control_grid( save_workflow = FALSE, event_level = "first", parallel_over = NULL, - backend_options = NULL + backend_options = NULL, + workflow_size = 100 ) control_resamples( @@ -27,7 +28,8 @@ control_resamples( save_workflow = FALSE, event_level = "first", parallel_over = NULL, - backend_options = NULL + backend_options = NULL, + workflow_size = 100 ) new_backend_options(..., class = character()) @@ -88,6 +90,10 @@ reproducible between runs.} \item{backend_options}{An object of class \code{"tune_backend_options"} as created by \code{tune::new_backend_options()}, used to pass arguments to specific tuning backend. Defaults to \code{NULL} for default backend options.} + +\item{workflow_size}{A non-negative number that is used as a threshold for a +warning regarding the size of the workflow. Only used when +\code{save_workflow = TRUE}.} } \description{ Control aspects of the grid search process diff --git a/tests/testthat/_snaps/control.md b/tests/testthat/_snaps/control.md new file mode 100644 index 000000000..b7fe993e3 --- /dev/null +++ b/tests/testthat/_snaps/control.md @@ -0,0 +1,11 @@ +# workflow size warning + + Code + set.seed(1) + warns <- fit_resamples(lm_wflow, resamples = vfold_cv(MTCARS), control = control_resamples( + save_workflow = TRUE, workflow_size = 2)) + Message + i The workflow being saved contains a recipe, which is 2.7 Mb in i memory. If + this was not intentional, please set the control setting i `save_workflow = + FALSE`. + diff --git a/tests/testthat/test-control.R b/tests/testthat/test-control.R new file mode 100644 index 000000000..b7206cb99 --- /dev/null +++ b/tests/testthat/test-control.R @@ -0,0 +1,38 @@ +test_that("workflow size warning", { + # A larger data set to trip the warning + MTCARS <- mtcars[rep(1:32, each = 1000), ] + + lm_rec <- recipe(mpg ~ ., data = MTCARS) + lm_wflow <- workflow(lm_rec, linear_reg() |> set_engine("lm", x = TRUE)) + # About 2.7MB when fit + + expect_silent({ + set.seed(1) + no_warning <- + lm_wflow |> + fit_resamples( + resamples = vfold_cv(MTCARS), + control = control_resamples(save_workflow = TRUE, workflow_size = Inf) + ) + }) + + expect_snapshot({ + set.seed(1) + warns <- + lm_wflow |> + fit_resamples( + resamples = vfold_cv(MTCARS), + control = control_resamples(save_workflow = TRUE, workflow_size = 2) + ) + }) + + expect_silent({ + set.seed(1) + no_save <- + lm_wflow |> + fit_resamples( + resamples = vfold_cv(MTCARS), + control = control_resamples(save_workflow = FALSE, workflow_size = 2) + ) + }) +}) From d6b22d781e66368b7f3046b5009841244988abd9 Mon Sep 17 00:00:00 2001 From: topepo Date: Fri, 17 Oct 2025 11:19:34 -0400 Subject: [PATCH 2/4] remove older tests --- tests/testthat/test-grid.R | 16 ---------------- tests/testthat/test-resample.R | 9 --------- 2 files changed, 25 deletions(-) diff --git a/tests/testthat/test-grid.R b/tests/testthat/test-grid.R index 6df7fe06a..19f64495b 100644 --- a/tests/testthat/test-grid.R +++ b/tests/testthat/test-grid.R @@ -786,20 +786,4 @@ test_that("retain extra attributes", { ) expect_null(attr(res, "workflow")) expect_true(inherits(attr(res2, "workflow"), "workflow")) - - wflow2 <- workflow() |> - add_recipe(recipes::recipe(mpg ~ ., mtcars[rep(1:32, 3000), ])) |> - add_model(helper_objects$svm_mod) - pset2 <- extract_parameter_set_dials(wflow2) - grid2 <- dials::grid_regular(pset2, levels = 3) - - expect_message( - tune_grid( - wflow2, - resamples = folds, - grid = grid2, - control = control_grid(save_workflow = TRUE) - ), - "being saved contains a recipe, which is" - ) }) diff --git a/tests/testthat/test-resample.R b/tests/testthat/test-resample.R index 231f0fe50..6ebd3e74c 100644 --- a/tests/testthat/test-resample.R +++ b/tests/testthat/test-resample.R @@ -484,15 +484,6 @@ test_that("retain extra attributes", { ) expect_null(attr(res, "workflow")) expect_true(inherits(attr(res2, "workflow"), "workflow")) - - expect_snapshot( - fit_resamples( - lin_mod, - recipes::recipe(mpg ~ ., mtcars[rep(1:32, 3000), ]), - folds, - control = control_resamples(save_workflow = TRUE) - ) - ) }) From 8f318c9b1a7b9bcff8189db6b4deb8af7469f5cc Mon Sep 17 00:00:00 2001 From: topepo Date: Fri, 17 Oct 2025 12:01:37 -0400 Subject: [PATCH 3/4] fix news entry --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index d38a22e76..8d9b5b8ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,11 @@ # tune (development version) +* The warning threshold when check the size of a workflow is now a parameter to the control functions and has a new default of 100MB. (#914) + # tune 2.0.1 * Fixed a bug where `int_pctl()` wouldn't work on `last_fit()` outcomes when future parallelism was enabled. (#1099) -* The warning threshold when check the size of a workflow is now a parameter to the control functions and has a new default of 100MB. (#914) - # tune 2.0.0 ## Changes to `tune_grid()`. From f5578246e41a5e7f9c2b402036bb6307686890a5 Mon Sep 17 00:00:00 2001 From: topepo Date: Sat, 18 Oct 2025 15:25:46 -0400 Subject: [PATCH 4/4] changes based on reviewer comments --- R/control.R | 4 ++-- R/tune_grid.R | 19 +++++++++++-------- man/control_bayes.Rd | 4 ++-- man/control_grid.Rd | 4 ++-- tests/testthat/_snaps/control.md | 9 ++++----- tests/testthat/test-control.R | 2 ++ 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/R/control.R b/R/control.R index d16886b28..264f92d50 100644 --- a/R/control.R +++ b/R/control.R @@ -203,8 +203,8 @@ print.control_last_fit <- function(x, ...) { #' backend. Defaults to `NULL` for default backend options. #' @param allow_par A logical to allow parallel processing (if a parallel #' backend is registered). -#' @param workflow_size A non-negative number that is used as a threshold for a -#' warning regarding the size of the workflow. Only used when +#' @param workflow_size A non-negative number (in MB) that is used as a +#' threshold for a warning regarding the size of the workflow. Only used when #' `save_workflow = TRUE`. #' #' @inheritSection collect_predictions Hyperparameters and extracted objects diff --git a/R/tune_grid.R b/R/tune_grid.R index 5d2cff7a0..4e6f49f94 100644 --- a/R/tune_grid.R +++ b/R/tune_grid.R @@ -436,16 +436,19 @@ pull_rset_attributes <- function(x) { set_workflow <- function(workflow, control) { if (control$save_workflow) { - if (!is.null(workflow$pre$actions$recipe)) { - w_size <- utils::object.size(workflow$pre$actions$recipe) - # make 5MB cutoff + if (!is.null(workflow)) { + w_size <- utils::object.size(workflow) if (w_size / 1024^2 > control$workflow_size) { + rounded <- round(w_size / 1024^2, 1) msg <- - paste0( - "The workflow being saved contains a recipe, which is ", - format(w_size, units = "Mb", digits = 2), - " in memory. If this was not intentional, please set the control ", - "setting `save_workflow = FALSE`." + cli::format_inline( + "The workflow being saved is large ({rounded} MB). If this + was not intentional, please set the control setting + {.arg save_workflow} to be {.code FALSE} or change the threshold + for this warning (currently {control$workflow_size} MB) with the + {.arg workflow_size} argument.", + keep_whitespace = FALSE, + collapse = FALSE ) cols <- get_tune_colors() msg <- strwrap( diff --git a/man/control_bayes.Rd b/man/control_bayes.Rd index 1eb011537..66f006a73 100644 --- a/man/control_bayes.Rd +++ b/man/control_bayes.Rd @@ -117,8 +117,8 @@ backend. Defaults to \code{NULL} for default backend options.} \item{allow_par}{A logical to allow parallel processing (if a parallel backend is registered).} -\item{workflow_size}{A non-negative number that is used as a threshold for a -warning regarding the size of the workflow. Only used when +\item{workflow_size}{A non-negative number (in MB) that is used as a +threshold for a warning regarding the size of the workflow. Only used when \code{save_workflow = TRUE}.} } \description{ diff --git a/man/control_grid.Rd b/man/control_grid.Rd index d1cbf7925..b32538a03 100644 --- a/man/control_grid.Rd +++ b/man/control_grid.Rd @@ -91,8 +91,8 @@ reproducible between runs.} by \code{tune::new_backend_options()}, used to pass arguments to specific tuning backend. Defaults to \code{NULL} for default backend options.} -\item{workflow_size}{A non-negative number that is used as a threshold for a -warning regarding the size of the workflow. Only used when +\item{workflow_size}{A non-negative number (in MB) that is used as a +threshold for a warning regarding the size of the workflow. Only used when \code{save_workflow = TRUE}.} } \description{ diff --git a/tests/testthat/_snaps/control.md b/tests/testthat/_snaps/control.md index b7fe993e3..e747970c7 100644 --- a/tests/testthat/_snaps/control.md +++ b/tests/testthat/_snaps/control.md @@ -2,10 +2,9 @@ Code set.seed(1) - warns <- fit_resamples(lm_wflow, resamples = vfold_cv(MTCARS), control = control_resamples( - save_workflow = TRUE, workflow_size = 2)) + warns <- fit_resamples(lm_wflow, resamples = vfold_cv(MTCARS), control = control_resamples(save_workflow = TRUE, workflow_size = 2)) Message - i The workflow being saved contains a recipe, which is 2.7 Mb in i memory. If - this was not intentional, please set the control setting i `save_workflow = - FALSE`. + i The workflow being saved is large (2.7 MB). If this was not intentional, + please set the control setting `save_workflow` to be `FALSE` or change the + threshold for this warning (currently 2 MB) with the `workflow_size` argument. diff --git a/tests/testthat/test-control.R b/tests/testthat/test-control.R index b7206cb99..8f7f48f37 100644 --- a/tests/testthat/test-control.R +++ b/tests/testthat/test-control.R @@ -1,4 +1,6 @@ test_that("workflow size warning", { + withr::local_options(width = 500) + # A larger data set to trip the warning MTCARS <- mtcars[rep(1:32, each = 1000), ]