Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# tune (development version)

* The warning threshold when check the size of a workflow is now a parameter to the control functions and has a new default of 100MB. (#914)

# tune 2.0.1

* Fixed a bug where `int_pctl()` wouldn't work on `last_fit()` outcomes when future parallelism was enabled. (#1099)
Expand Down
17 changes: 13 additions & 4 deletions R/control.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ control_grid <- function(
save_workflow = FALSE,
event_level = "first",
parallel_over = NULL,
backend_options = NULL
backend_options = NULL,
workflow_size = 100.0
) {
# Any added arguments should also be added in superset control functions
# in other packages
Expand All @@ -50,6 +51,7 @@ control_grid <- function(
check_string(event_level)
check_character(pkgs, allow_null = TRUE)
check_function(extract, allow_null = TRUE)
check_number_decimal(workflow_size)

val_parallel_over(parallel_over, "control_grid()")

Expand All @@ -62,7 +64,8 @@ control_grid <- function(
save_workflow = save_workflow,
event_level = event_level,
parallel_over = parallel_over,
backend_options = backend_options
backend_options = backend_options,
workflow_size = workflow_size
)

class(res) <- c("control_grid", "control_resamples")
Expand Down Expand Up @@ -200,6 +203,9 @@ print.control_last_fit <- function(x, ...) {
#' backend. Defaults to `NULL` for default backend options.
#' @param allow_par A logical to allow parallel processing (if a parallel
#' backend is registered).
#' @param workflow_size A non-negative number that is used as a threshold for a
#' warning regarding the size of the workflow. Only used when
#' `save_workflow = TRUE`.
#'
#' @inheritSection collect_predictions Hyperparameters and extracted objects
#'
Expand Down Expand Up @@ -240,7 +246,8 @@ control_bayes <-
event_level = "first",
parallel_over = NULL,
backend_options = NULL,
allow_par = TRUE
allow_par = TRUE,
workflow_size = 100.0
) {
# Any added arguments should also be added in superset control functions
# in other packages
Expand All @@ -257,6 +264,7 @@ control_bayes <-
check_number_whole(no_improve, min = 0, allow_infinite = TRUE)
check_number_whole(uncertain, min = 0, allow_infinite = TRUE)
check_number_whole(seed)
check_number_decimal(workflow_size)

check_time_limit_arg(time_limit)

Expand Down Expand Up @@ -285,7 +293,8 @@ control_bayes <-
save_gp_scoring = save_gp_scoring,
event_level = event_level,
parallel_over = parallel_over,
backend_options = backend_options
backend_options = backend_options,
workflow_size = workflow_size
)

class(res) <- "control_bayes"
Expand Down
2 changes: 1 addition & 1 deletion R/tune_grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ set_workflow <- function(workflow, control) {
if (!is.null(workflow$pre$actions$recipe)) {
w_size <- utils::object.size(workflow$pre$actions$recipe)
# make 5MB cutoff
if (w_size / 1024^2 > 5) {
if (w_size / 1024^2 > control$workflow_size) {
msg <-
paste0(
"The workflow being saved contains a recipe, which is ",
Expand Down
7 changes: 6 additions & 1 deletion man/control_bayes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions man/control_grid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/_snaps/control.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# workflow size warning

Code
set.seed(1)
warns <- fit_resamples(lm_wflow, resamples = vfold_cv(MTCARS), control = control_resamples(
save_workflow = TRUE, workflow_size = 2))
Message
i The workflow being saved contains a recipe, which is 2.7 Mb in i memory. If
this was not intentional, please set the control setting i `save_workflow =
FALSE`.

38 changes: 38 additions & 0 deletions tests/testthat/test-control.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
test_that("workflow size warning", {
# A larger data set to trip the warning
MTCARS <- mtcars[rep(1:32, each = 1000), ]

lm_rec <- recipe(mpg ~ ., data = MTCARS)
lm_wflow <- workflow(lm_rec, linear_reg() |> set_engine("lm", x = TRUE))
# About 2.7MB when fit

expect_silent({
set.seed(1)
no_warning <-
lm_wflow |>
fit_resamples(
resamples = vfold_cv(MTCARS),
control = control_resamples(save_workflow = TRUE, workflow_size = Inf)
)
})

expect_snapshot({
set.seed(1)
warns <-
lm_wflow |>
fit_resamples(
resamples = vfold_cv(MTCARS),
control = control_resamples(save_workflow = TRUE, workflow_size = 2)
)
})

expect_silent({
set.seed(1)
no_save <-
lm_wflow |>
fit_resamples(
resamples = vfold_cv(MTCARS),
control = control_resamples(save_workflow = FALSE, workflow_size = 2)
)
})
})
16 changes: 0 additions & 16 deletions tests/testthat/test-grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -786,20 +786,4 @@ test_that("retain extra attributes", {
)
expect_null(attr(res, "workflow"))
expect_true(inherits(attr(res2, "workflow"), "workflow"))

wflow2 <- workflow() |>
add_recipe(recipes::recipe(mpg ~ ., mtcars[rep(1:32, 3000), ])) |>
add_model(helper_objects$svm_mod)
pset2 <- extract_parameter_set_dials(wflow2)
grid2 <- dials::grid_regular(pset2, levels = 3)

expect_message(
tune_grid(
wflow2,
resamples = folds,
grid = grid2,
control = control_grid(save_workflow = TRUE)
),
"being saved contains a recipe, which is"
)
})
9 changes: 0 additions & 9 deletions tests/testthat/test-resample.R
Original file line number Diff line number Diff line change
Expand Up @@ -484,15 +484,6 @@ test_that("retain extra attributes", {
)
expect_null(attr(res, "workflow"))
expect_true(inherits(attr(res2, "workflow"), "workflow"))

expect_snapshot(
fit_resamples(
lin_mod,
recipes::recipe(mpg ~ ., mtcars[rep(1:32, 3000), ]),
folds,
control = control_resamples(save_workflow = TRUE)
)
)
})


Expand Down