From b605a428eece5578869a651572b58b9359369db5 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sun, 9 Nov 2025 15:09:54 +0000 Subject: [PATCH 01/15] feat: `optimizations` arguments [skip ci] --- R/000-wrappers.R | 7 +++ R/lazyframe-frame.R | 73 ++++++++++++++-------------- R/lazyframe-utils.R | 79 +++++++++++++++++++++++++++++++ src/init.c | 6 +++ src/rust/api.h | 1 + src/rust/src/lazyframe/general.rs | 9 ++++ 6 files changed, 139 insertions(+), 36 deletions(-) diff --git a/R/000-wrappers.R b/R/000-wrappers.R index 43b29f3c9..ca5dab30b 100644 --- a/R/000-wrappers.R +++ b/R/000-wrappers.R @@ -4194,6 +4194,12 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed") } } +`PlRLazyFrame_with_optimizations` <- function(self) { + function(`optimizations`) { + .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_optimizations__impl, `self`, `optimizations`)) + } +} + `PlRLazyFrame_with_row_index` <- function(self) { function(`name`, `offset` = NULL) { .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_row_index__impl, `self`, `name`, `offset`)) @@ -4263,6 +4269,7 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed") e$`var` <- `PlRLazyFrame_var`(ptr) e$`with_columns` <- `PlRLazyFrame_with_columns`(ptr) e$`with_columns_seq` <- `PlRLazyFrame_with_columns_seq`(ptr) + e$`with_optimizations` <- `PlRLazyFrame_with_optimizations`(ptr) e$`with_row_index` <- `PlRLazyFrame_with_row_index`(ptr) class(e) <- c("PlRLazyFrame", "savvy_polars__sealed") diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R index 655e57eda..d6ae0b192 100644 --- a/R/lazyframe-frame.R +++ b/R/lazyframe-frame.R @@ -446,59 +446,60 @@ lazyframe__profile <- function( #' lazy_query <- lazy_frame$sort("Species")$filter(pl$col("Species") != "setosa") #' #' # This is the query that was written by the user, without any optimizations -#' # (use cat() for better printing) -#' lazy_query$explain(optimized = FALSE) |> cat() +#' # (use writeLines() for better printing) +#' lazy_query$explain(optimized = FALSE) |> writeLines() #' #' # This is the query after `polars` optimizes it: instead of sorting first and #' # then filtering, it is faster to filter first and then sort the rest. -#' lazy_query$explain() |> cat() +#' lazy_query$explain() |> writeLines() +#' +#' # You can disable specific optimizations. +#' lazy_query$explain( +#' optimizations = pl$QueryOptFlags(predicate_pushdown = FALSE) +#' ) |> +#' writeLines() #' #' # Also possible to see this as tree format -#' lazy_query$explain(format = "tree") |> cat() +#' lazy_query$explain(format = "tree") |> writeLines() lazyframe__explain <- function( ..., format = c("plain", "tree"), + engine = c("auto", "in-memory", "streaming"), optimized = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - collapse_joins = deprecated(), - `_check_order` = TRUE + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated() ) { wrap({ check_dots_empty0(...) format <- arg_match0(format, c("plain", "tree")) + engine <- arg_match0(engine, c("auto", "in-memory", "streaming")) + check_is_S7(optimizations, QueryOptFlags) - if (is_present(collapse_joins)) { - deprecate_warn( - c( - `!` = sprintf("%s is deprecated.", format_arg("collapse_joins")), - `i` = sprintf("Use %s instead.", format_arg("predicate_pushdown")) - ) - ) - } + optimizations <- forward_old_opt_flags( + optimizations, + type_coercion = type_coercion, + predicate_pushdown = predicate_pushdown, + projection_pushdown = projection_pushdown, + simplify_expression = simplify_expression, + slice_pushdown = slice_pushdown, + comm_subplan_elim = comm_subplan_elim, + comm_subexpr_elim = comm_subexpr_elim, + cluster_with_columns = cluster_with_columns, + collapse_joins = collapse_joins + ) if (isTRUE(optimized)) { - ldf <- self$`_ldf`$optimization_toggle( - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - comm_subplan_elim = comm_subplan_elim, - comm_subexpr_elim = comm_subexpr_elim, - cluster_with_columns = cluster_with_columns, - `_check_order` = `_check_order`, - `_eager` = FALSE - ) + prop(optimizations, "streaming", check = FALSE) <- engine == "streaming" + ldf <- self$`_ldf`$with_optimizations(optimizations) if (format == "tree") { ldf$describe_optimized_plan_tree() diff --git a/R/lazyframe-utils.R b/R/lazyframe-utils.R index c1123adf6..6931937db 100644 --- a/R/lazyframe-utils.R +++ b/R/lazyframe-utils.R @@ -158,3 +158,82 @@ parse_percentiles <- function(percentiles, inject_median = FALSE) { c(sub_50_percentiles, at_or_above_50_percentiles) } + +forward_old_opt_flags <- function( + optimizations, + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated() +) { + call <- caller_env(2L) + warn_func <- function(arg_name) { + deprecate_warn( + c( + `!` = sprintf("%s is deprecated.", format_arg(arg_name)), + `i` = sprintf("Use %s instead.", format_arg("optimizations")) + ), + always = TRUE, + user_env = call + ) + } + + need_validation <- FALSE + + if (is_present(type_coercion)) { + warn_func("type_coercion") + prop(optimizations, "type_coercion", check = FALSE) <- type_coercion + need_validation <- TRUE + } + if (is_present(predicate_pushdown)) { + warn_func("predicate_pushdown") + prop(optimizations, "predicate_pushdown", check = FALSE) <- predicate_pushdown + need_validation <- TRUE + } + if (is_present(projection_pushdown)) { + warn_func("projection_pushdown") + prop(optimizations, "projection_pushdown", check = FALSE) <- projection_pushdown + need_validation <- TRUE + } + if (is_present(simplify_expression)) { + warn_func("simplify_expression") + prop(optimizations, "simplify_expression", check = FALSE) <- simplify_expression + need_validation <- TRUE + } + if (is_present(slice_pushdown)) { + warn_func("slice_pushdown") + prop(optimizations, "slice_pushdown", check = FALSE) <- slice_pushdown + need_validation <- TRUE + } + if (is_present(comm_subplan_elim)) { + warn_func("comm_subplan_elim") + prop(optimizations, "comm_subplan_elim", check = FALSE) <- comm_subplan_elim + need_validation <- TRUE + } + if (is_present(comm_subexpr_elim)) { + warn_func("comm_subexpr_elim") + prop(optimizations, "comm_subexpr_elim", check = FALSE) <- comm_subexpr_elim + need_validation <- TRUE + } + if (is_present(cluster_with_columns)) { + warn_func("cluster_with_columns") + prop(optimizations, "cluster_with_columns", check = FALSE) <- cluster_with_columns + need_validation <- TRUE + } + + if (is_present(collapse_joins)) { + warn_func("collapse_joins") + # collapse_joins was merged to predicate_pushdown, so there is no flag anymore + } + + if (need_validation) { + validate(optimizations) + } + + optimizations +} diff --git a/src/init.c b/src/init.c index 2caf57d94..79e01231d 100644 --- a/src/init.c +++ b/src/init.c @@ -2894,6 +2894,11 @@ SEXP savvy_PlRLazyFrame_with_columns_seq__impl(SEXP self__, SEXP c_arg__exprs) { return handle_result(res); } +SEXP savvy_PlRLazyFrame_with_optimizations__impl(SEXP self__, SEXP c_arg__optimizations) { + SEXP res = savvy_PlRLazyFrame_with_optimizations__ffi(self__, c_arg__optimizations); + return handle_result(res); +} + SEXP savvy_PlRLazyFrame_with_row_index__impl(SEXP self__, SEXP c_arg__name, SEXP c_arg__offset) { SEXP res = savvy_PlRLazyFrame_with_row_index__ffi(self__, c_arg__name, c_arg__offset); return handle_result(res); @@ -3953,6 +3958,7 @@ static const R_CallMethodDef CallEntries[] = { {"savvy_PlRLazyFrame_var__impl", (DL_FUNC) &savvy_PlRLazyFrame_var__impl, 2}, {"savvy_PlRLazyFrame_with_columns__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_columns__impl, 2}, {"savvy_PlRLazyFrame_with_columns_seq__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_columns_seq__impl, 2}, + {"savvy_PlRLazyFrame_with_optimizations__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_optimizations__impl, 2}, {"savvy_PlRLazyFrame_with_row_index__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_row_index__impl, 3}, {"savvy_PlRLazyGroupBy_agg__impl", (DL_FUNC) &savvy_PlRLazyGroupBy_agg__impl, 2}, {"savvy_PlRLazyGroupBy_head__impl", (DL_FUNC) &savvy_PlRLazyGroupBy_head__impl, 2}, diff --git a/src/rust/api.h b/src/rust/api.h index 977e08732..37e543bdb 100644 --- a/src/rust/api.h +++ b/src/rust/api.h @@ -586,6 +586,7 @@ SEXP savvy_PlRLazyFrame_unpivot__ffi(SEXP self__, SEXP c_arg__on, SEXP c_arg__in SEXP savvy_PlRLazyFrame_var__ffi(SEXP self__, SEXP c_arg__ddof); SEXP savvy_PlRLazyFrame_with_columns__ffi(SEXP self__, SEXP c_arg__exprs); SEXP savvy_PlRLazyFrame_with_columns_seq__ffi(SEXP self__, SEXP c_arg__exprs); +SEXP savvy_PlRLazyFrame_with_optimizations__ffi(SEXP self__, SEXP c_arg__optimizations); SEXP savvy_PlRLazyFrame_with_row_index__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__offset); // methods and associated functions for PlRLazyGroupBy diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs index a5029f9c9..27c091c1d 100644 --- a/src/rust/src/lazyframe/general.rs +++ b/src/rust/src/lazyframe/general.rs @@ -2,6 +2,7 @@ use super::sink::RSinkTarget; use crate::{ PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, PlRSeries, RPolarsErr, expr::selector::PlRSelector, + lazyframe::PlROptFlags, prelude::{sync_on_close::SyncOnCloseType, *}, }; use polars::io::{HiveOptions, RowIndex}; @@ -313,6 +314,14 @@ impl PlRLazyFrame { Ok(ldf.cache().into()) } + fn with_optimizations(&self, optimizations: Sexp) -> Result { + let ldf = self.ldf.clone(); + let optimizations = ::try_from(optimizations)?; + Ok(ldf + .with_optimizations(optimizations.inner.into_inner()) + .into()) + } + fn profile(&self) -> Result { use crate::{ r_threads::{ThreadCom, concurrent_handler}, From 122fe70d6cbaf638753b1bdd7b1740977a585027 Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Mon, 10 Nov 2025 03:38:18 +0000 Subject: [PATCH 02/15] wip [skip ci] --- R/lazyframe-frame.R | 2 +- R/lazyframe-utils.R | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R index d6ae0b192..48621f754 100644 --- a/R/lazyframe-frame.R +++ b/R/lazyframe-frame.R @@ -268,6 +268,7 @@ lazyframe__group_by <- function(..., .maintain_order = FALSE) { #' ) lazyframe__collect <- function( ..., + engine = c("auto", "in-memory", "streaming"), type_coercion = TRUE, `_type_check` = TRUE, predicate_pushdown = TRUE, @@ -278,7 +279,6 @@ lazyframe__collect <- function( comm_subexpr_elim = TRUE, cluster_with_columns = TRUE, no_optimization = FALSE, - engine = c("auto", "in-memory", "streaming"), `_check_order` = TRUE, `_eager` = FALSE, collapse_joins = deprecated() diff --git a/R/lazyframe-utils.R b/R/lazyframe-utils.R index 6931937db..ad0b8c70a 100644 --- a/R/lazyframe-utils.R +++ b/R/lazyframe-utils.R @@ -169,7 +169,8 @@ forward_old_opt_flags <- function( comm_subplan_elim = deprecated(), comm_subexpr_elim = deprecated(), cluster_with_columns = deprecated(), - collapse_joins = deprecated() + collapse_joins = deprecated(), + no_optimization = deprecated() ) { call <- caller_env(2L) warn_func <- function(arg_name) { @@ -231,6 +232,20 @@ forward_old_opt_flags <- function( # collapse_joins was merged to predicate_pushdown, so there is no flag anymore } + if (is_present(no_optimization)) { + warn_func("no_optimization") + props_uncheck(optimizations) <- list( + predicate_pushdown <- FALSE + projection_pushdown <- FALSE + slice_pushdown <- FALSE + comm_subplan_elim <- FALSE + comm_subexpr_elim <- FALSE + cluster_with_columns <- FALSE + check_order_observe <- FALSE + ) + need_validation <- TRUE + } + if (need_validation) { validate(optimizations) } From 67e788043aba5de53f2c01e0b2c1bb22cbbfefcc Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 10 Nov 2025 14:47:37 +0000 Subject: [PATCH 03/15] wip [skip ci] --- R/lazyframe-frame.R | 110 ++++++++++++++++++++++++-------------------- R/lazyframe-utils.R | 14 +++--- 2 files changed, 68 insertions(+), 56 deletions(-) diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R index 48621f754..53798cf3c 100644 --- a/R/lazyframe-frame.R +++ b/R/lazyframe-frame.R @@ -234,20 +234,50 @@ lazyframe__group_by <- function(..., .maintain_order = FALSE) { #' Individual optimizations may be disabled by setting the corresponding parameter to `FALSE`. #' @inherit pl__DataFrame return #' @inheritParams rlang::args_dots_empty -#' @inheritParams QueryOptFlags -#' @param type_coercion A logical, indicates type coercion optimization. -#' @param collapse_joins `r lifecycle::badge("deprecated")` -#' Use `predicate_pushdown` instead. -#' @param no_optimization A logical. If `TRUE`, turn off (certain) optimizations. #' @param engine The engine name to use for processing the query. -#' One of the followings: -#' - `"auto"` (default): Select the engine automatically. -#' The `"in-memory"` engine will be selected for most cases. -#' - `"in-memory"`: Use the in-memory engine. -#' - `"streaming"`: `r lifecycle::badge("experimental")` Use the (new) streaming engine. -#' @param _eager A logical, indicates to turn off multi-node optimizations and -#' the other optimizations. This option is intended for internal use only. -#' @param _check_order,_type_check For internal use only. +#' One of the followings: +#' - `"auto"` (default): Select the engine automatically. +#' The `"in-memory"` engine will be selected for most cases. +#' - `"in-memory"`: Use the in-memory engine. +#' - `"streaming"`: `r lifecycle::badge("experimental")` Use the (new) streaming engine. +#' @param optimizations `r lifecycle::badge("experimental")` +#' A [QueryOptFlags] object to indicate optimization passes done during query optimization. +#' @param type_coercion `r lifecycle::badge("deprecated")` +#' Use the `type_coercion` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param predicate_pushdown `r lifecycle::badge("deprecated")` +#' Use the `predicate_pushdown` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param projection_pushdown `r lifecycle::badge("deprecated")` +#' Use the `projection_pushdown` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param simplify_expression `r lifecycle::badge("deprecated")` +#' Use the `simplify_expression` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param slice_pushdown `r lifecycle::badge("deprecated")` +#' Use the `slice_pushdown` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param comm_subplan_elim `r lifecycle::badge("deprecated")` +#' Use the `comm_subplan_elim` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param comm_subexpr_elim `r lifecycle::badge("deprecated")` +#' Use the `comm_subexpr_elim` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param cluster_with_columns `r lifecycle::badge("deprecated")` +#' Use the `cluster_with_columns` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param check_order_observe `r lifecycle::badge("deprecated")` +#' Use the `check_order_observe` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param fast_projection `r lifecycle::badge("deprecated")` +#' Use the `fast_projection` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param collapse_joins `r lifecycle::badge("deprecated")` +#' Use the `predicate_pushdown` property of a [QueryOptFlags] object, then pass +#' that to the `optimizations` argument instead. +#' @param no_optimization `r lifecycle::badge("deprecated")` +#' Use the `optimizations` argument with +#' [`pl$QueryOptFlags()$no_optimizations()`][QueryOptFlags] instead. #' @seealso #' - [`$profile()`][lazyframe__profile] - same as `$collect()` but also returns #' a table with each operation profiled. @@ -269,46 +299,26 @@ lazyframe__group_by <- function(..., .maintain_order = FALSE) { lazyframe__collect <- function( ..., engine = c("auto", "in-memory", "streaming"), - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - no_optimization = FALSE, - `_check_order` = TRUE, - `_eager` = FALSE, - collapse_joins = deprecated() + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated(), + no_optimization = deprecated() ) { wrap({ check_dots_empty0(...) engine <- arg_match0(engine, c("auto", "in-memory", "streaming")) + check_is_S7(optimizations, QueryOptFlags) - if (is_present(collapse_joins)) { - deprecate_warn( - c( - `!` = sprintf("%s is deprecated.", format_arg("collapse_joins")), - `i` = sprintf("Use %s instead.", format_arg("predicate_pushdown")) - ) - ) - } - - if (isTRUE(no_optimization) || isTRUE(`_eager`)) { - predicate_pushdown <- FALSE - projection_pushdown <- FALSE - slice_pushdown <- FALSE - comm_subplan_elim <- FALSE - comm_subexpr_elim <- FALSE - cluster_with_columns <- FALSE - `_check_order` <- FALSE - } - - ldf <- self$`_ldf`$optimization_toggle( + optimizations <- forward_old_opt_flags( + optimizations, type_coercion = type_coercion, - `_type_check` = `_type_check`, predicate_pushdown = predicate_pushdown, projection_pushdown = projection_pushdown, simplify_expression = simplify_expression, @@ -316,10 +326,12 @@ lazyframe__collect <- function( comm_subplan_elim = comm_subplan_elim, comm_subexpr_elim = comm_subexpr_elim, cluster_with_columns = cluster_with_columns, - `_check_order` = `_check_order`, - `_eager` = `_eager` + collapse_joins = collapse_joins, + no_optimization = no_optimization ) + ldf <- self$`_ldf`$with_optimizations(optimizations) + ldf$collect(engine) }) } diff --git a/R/lazyframe-utils.R b/R/lazyframe-utils.R index ad0b8c70a..6e53381f0 100644 --- a/R/lazyframe-utils.R +++ b/R/lazyframe-utils.R @@ -235,13 +235,13 @@ forward_old_opt_flags <- function( if (is_present(no_optimization)) { warn_func("no_optimization") props_uncheck(optimizations) <- list( - predicate_pushdown <- FALSE - projection_pushdown <- FALSE - slice_pushdown <- FALSE - comm_subplan_elim <- FALSE - comm_subexpr_elim <- FALSE - cluster_with_columns <- FALSE - check_order_observe <- FALSE + predicate_pushdown = FALSE, + projection_pushdown = FALSE, + slice_pushdown = FALSE, + comm_subplan_elim = FALSE, + comm_subexpr_elim = FALSE, + cluster_with_columns = FALSE, + check_order_observe = FALSE ) need_validation <- TRUE } From 95d9e82972dabc6b8969e1a496154e10937ef243 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 10 Nov 2025 15:16:02 +0000 Subject: [PATCH 04/15] fix: fix old no_optimization [skip ci] --- R/lazyframe-utils.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/R/lazyframe-utils.R b/R/lazyframe-utils.R index 6e53381f0..2d8994396 100644 --- a/R/lazyframe-utils.R +++ b/R/lazyframe-utils.R @@ -234,16 +234,18 @@ forward_old_opt_flags <- function( if (is_present(no_optimization)) { warn_func("no_optimization") - props_uncheck(optimizations) <- list( - predicate_pushdown = FALSE, - projection_pushdown = FALSE, - slice_pushdown = FALSE, - comm_subplan_elim = FALSE, - comm_subexpr_elim = FALSE, - cluster_with_columns = FALSE, - check_order_observe = FALSE - ) - need_validation <- TRUE + if (isTRUE(no_optimization)) { + props_uncheck(optimizations) <- list( + predicate_pushdown = FALSE, + projection_pushdown = FALSE, + slice_pushdown = FALSE, + comm_subplan_elim = FALSE, + comm_subexpr_elim = FALSE, + cluster_with_columns = FALSE, + check_order_observe = FALSE + ) + need_validation <- TRUE + } } if (need_validation) { From b209e261273df45d0da0406c09586bcfb6879c15 Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Wed, 12 Nov 2025 04:04:02 +0000 Subject: [PATCH 05/15] fix: dataframe methods [skip ci] --- R/dataframe-frame.R | 70 +++++++++++++++++++++-------------------- R/lazyframe-opt_flags.R | 5 +++ 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/R/dataframe-frame.R b/R/dataframe-frame.R index 4e4b50ec8..2465e1a4a 100644 --- a/R/dataframe-frame.R +++ b/R/dataframe-frame.R @@ -364,7 +364,7 @@ dataframe__group_by <- function(..., .maintain_order = FALSE) { #' add_2_SL = pl$col("Sepal.Length") + 2 #' ) dataframe__select <- function(...) { - self$lazy()$select(...)$collect(`_eager` = TRUE) |> + self$lazy()$select(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -381,7 +381,7 @@ dataframe__select <- function(...) { #' ) #' df$select_seq("foo", bar2 = pl$col("bar") * 2) dataframe__select_seq <- function(...) { - self$lazy()$select_seq(...)$collect(`_eager` = TRUE) |> + self$lazy()$select_seq(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -415,7 +415,7 @@ dataframe__select_seq <- function(...) { #' `not c` = pl$col("c")$not(), #' ) dataframe__with_columns <- function(...) { - self$lazy()$with_columns(...)$collect(`_eager` = TRUE) |> + self$lazy()$with_columns(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -450,7 +450,7 @@ dataframe__with_columns <- function(...) { #' `not c` = pl$col("c")$not(), #' ) dataframe__with_columns_seq <- function(...) { - self$lazy()$with_columns_seq(...)$collect(`_eager` = TRUE) |> + self$lazy()$with_columns_seq(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -573,7 +573,7 @@ dataframe__tail <- function(n = 5) { #' # Drop multiple columns by passing a selector #' df$drop(cs$all()) dataframe__drop <- function(..., strict = TRUE) { - self$lazy()$drop(..., strict = strict)$collect(`_eager` = TRUE) |> + self$lazy()$drop(..., strict = strict)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -595,7 +595,7 @@ dataframe__drop <- function(..., strict = TRUE) { #' # Cast all columns to the same type #' df$cast(pl$String) dataframe__cast <- function(..., .strict = TRUE) { - self$lazy()$cast(..., .strict = .strict)$collect(`_eager` = TRUE) |> + self$lazy()$cast(..., .strict = .strict)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -619,7 +619,7 @@ dataframe__cast <- function(..., .strict = TRUE) { #' #' df$filter(pl$col("Species") == "setosa") dataframe__filter <- function(...) { - self$lazy()$filter(...)$collect(`_eager` = TRUE) |> + self$lazy()$filter(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -646,7 +646,7 @@ dataframe__filter <- function(...) { #' #' df$remove((pl$col("total") >= 0) | (pl$col("ccy") == "USD")) dataframe__remove <- function(...) { - self$lazy()$remove(...)$collect(`_eager` = TRUE) |> + self$lazy()$remove(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -686,7 +686,7 @@ dataframe__sort <- function( nulls_last = nulls_last, multithreaded = multithreaded, maintain_order = maintain_order - )$collect(`_eager` = TRUE) |> + )$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -802,7 +802,7 @@ dataframe__top_k <- function(k, ..., by, reverse = FALSE) { #' #' df1$merge_sorted(df2, key = "age") dataframe__merge_sorted <- function(other, key) { - self$lazy()$merge_sorted(other$lazy(), key)$collect(`_eager` = TRUE) |> + self$lazy()$merge_sorted(other$lazy(), key)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -819,7 +819,9 @@ dataframe__merge_sorted <- function(other, key) { #' #' df1$flags dataframe__set_sorted <- function(column, ..., descending = FALSE) { - self$lazy()$set_sorted(column, descending = descending)$collect(`_eager` = TRUE) |> + self$lazy()$set_sorted(column, descending = descending)$collect( + optimizations = DEFAULT_EAGER_OPT_FLAGS + ) |> wrap() } @@ -847,9 +849,7 @@ dataframe__unique <- function( subset = subset, keep = keep, maintain_order = maintain_order - )$collect( - `_eager` = TRUE - ) |> + )$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -914,7 +914,7 @@ dataframe__join <- function( nulls_equal = nulls_equal, coalesce = coalesce, maintain_order = maintain_order - )$collect(`_eager` = TRUE) + )$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) }) } @@ -945,7 +945,7 @@ dataframe__join <- function( #' ) #' df$filter(!pl$all_horizontal(pl$all()$is_nan())) dataframe__drop_nans <- function(...) { - self$lazy()$drop_nans(...)$collect(`_eager` = TRUE) |> + self$lazy()$drop_nans(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -967,7 +967,7 @@ dataframe__drop_nans <- function(...) { #' # a null in any of the integer columns: #' df$drop_nulls(cs$integer()) dataframe__drop_nulls <- function(...) { - self$lazy()$drop_nulls(...)$collect(`_eager` = TRUE) |> + self$lazy()$drop_nulls(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1051,7 +1051,7 @@ dataframe__gather_every <- function(n, offset = 0) { #' #' df$rename(foo = "apple") dataframe__rename <- function(..., .strict = TRUE) { - self$lazy()$rename(..., .strict = .strict)$collect(`_eager` = TRUE) |> + self$lazy()$rename(..., .strict = .strict)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1077,7 +1077,7 @@ dataframe__fill_null <- function( matches_supertype = TRUE ) { self$lazy()$fill_null(value, strategy, limit, ..., matches_supertype = matches_supertype)$collect( - `_eager` = TRUE + optimizations = DEFAULT_EAGER_OPT_FLAGS ) |> wrap() } @@ -1093,7 +1093,7 @@ dataframe__fill_null <- function( #' #' df$explode("numbers") dataframe__explode <- function(...) { - self$lazy()$explode(...)$collect(`_eager` = TRUE) |> + self$lazy()$explode(...)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1114,7 +1114,7 @@ dataframe__explode <- function(...) { #' df$unnest("a_and_c") #' df$unnest("a_and_c", separator = ":") dataframe__unnest <- function(..., separator = NULL) { - self$lazy()$unnest(..., separator = separator)$collect(`_eager` = TRUE) |> + self$lazy()$unnest(..., separator = separator)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1231,7 +1231,7 @@ dataframe__join_asof <- function( coalesce = coalesce, allow_exact_matches = allow_exact_matches, check_sortedness = check_sortedness, - )$collect(`_eager` = TRUE) |> + )$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1245,7 +1245,7 @@ dataframe__join_asof <- function( #' ) #' df$fill_nan(99) dataframe__fill_nan <- function(value) { - self$lazy()$fill_nan(value)$collect(`_eager` = TRUE) |> + self$lazy()$fill_nan(value)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1300,7 +1300,7 @@ dataframe__clear <- function(n = 0) { dataframe__shift <- function(n = 1, ..., fill_value = NULL) { wrap({ check_dots_empty0(...) - self$lazy()$shift(n, fill_value = fill_value)$collect(`_eager` = TRUE) + self$lazy()$shift(n, fill_value = fill_value)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) }) } @@ -1345,7 +1345,9 @@ dataframe__join_where <- function( ) { wrap({ check_polars_df(other) - self$lazy()$join_where(other$lazy(), ..., suffix = suffix)$collect(`_eager` = TRUE) + self$lazy()$join_where(other$lazy(), ..., suffix = suffix)$collect( + optimizations = DEFAULT_EAGER_OPT_FLAGS + ) }) } @@ -1711,7 +1713,7 @@ dataframe__sum_horizontal <- function(..., ignore_nulls = TRUE) { #' df <- pl$DataFrame(a = 1:4, b = c(1, 2, 1, 1)) #' df$max() dataframe__max <- function() { - self$lazy()$max()$collect(`_eager` = TRUE) |> + self$lazy()$max()$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1722,7 +1724,7 @@ dataframe__max <- function() { #' df <- pl$DataFrame(a = 1:4, b = c(1, 2, 1, 1)) #' df$min() dataframe__min <- function() { - self$lazy()$min()$collect(`_eager` = TRUE) |> + self$lazy()$min()$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1733,7 +1735,7 @@ dataframe__min <- function() { #' df <- pl$DataFrame(a = 1:4, b = c(1, 2, 1, 1)) #' df$mean() dataframe__mean <- function() { - self$lazy()$mean()$collect(`_eager` = TRUE) |> + self$lazy()$mean()$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1744,7 +1746,7 @@ dataframe__mean <- function() { #' df <- pl$DataFrame(a = 1:4, b = c(1, 2, 1, 1)) #' df$median() dataframe__median <- function() { - self$lazy()$median()$collect(`_eager` = TRUE) |> + self$lazy()$median()$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1755,7 +1757,7 @@ dataframe__median <- function() { #' df <- pl$DataFrame(a = 1:4, b = c(1, 2, 1, 1)) #' df$sum() dataframe__sum <- function() { - self$lazy()$sum()$collect(`_eager` = TRUE) |> + self$lazy()$sum()$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1768,7 +1770,7 @@ dataframe__sum <- function() { #' df$var() #' df$var(ddof = 0) dataframe__var <- function(ddof = 1) { - self$lazy()$var(ddof)$collect(`_eager` = TRUE) |> + self$lazy()$var(ddof)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1781,7 +1783,7 @@ dataframe__var <- function(ddof = 1) { #' df$std() #' df$std(ddof = 0) dataframe__std <- function(ddof = 1) { - self$lazy()$std(ddof)$collect(`_eager` = TRUE) |> + self$lazy()$std(ddof)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } @@ -1800,7 +1802,7 @@ dataframe__quantile <- function( interpolation, values = c("nearest", "higher", "lower", "midpoint", "linear") ) - self$lazy()$quantile(quantile, interpolation)$collect(`_eager` = TRUE) + self$lazy()$quantile(quantile, interpolation)$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) }) } @@ -2370,7 +2372,7 @@ dataframe__reverse <- function() { #' df <- pl$DataFrame(a = 1:4, b = c(1, 2, 1, NA), c = rep(NA, 4)) #' df$count() dataframe__count <- function() { - self$lazy()$count()$collect(`_eager` = TRUE) |> + self$lazy()$count()$collect(optimizations = DEFAULT_EAGER_OPT_FLAGS) |> wrap() } diff --git a/R/lazyframe-opt_flags.R b/R/lazyframe-opt_flags.R index fa27b5324..04ed0df5d 100644 --- a/R/lazyframe-opt_flags.R +++ b/R/lazyframe-opt_flags.R @@ -147,6 +147,11 @@ eager_opt_flags <- function() { ) } +on_load({ + # Used inside of DataFrame methods + DEFAULT_EAGER_OPT_FLAGS <- eager_opt_flags() +}) + # Because of immutability, unlike Python Polars, there are no side effects, # and a new modified OptFlags is returned QueryOptFlags__no_optimizations <- function() { From 6f60b70f95918c9a6807fe167cad5128de7cb30f Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 14:33:44 +0000 Subject: [PATCH 06/15] fix: fix all functions [skip] --- R/as_polars_df.R | 26 ++--- R/dataframe-frame.R | 20 ++-- R/dataframe-group_by-general.R | 6 +- R/dataframe-group_by-rolling.R | 2 +- R/dataframe-group_by_dynamic.R | 2 +- R/functions-eager.R | 36 +++---- R/lazyframe-frame.R | 131 +++++++++++------------- R/lazyframe-utils.R | 45 -------- R/output-batches.R | 9 +- R/output-csv.R | 48 ++------- R/output-ipc.R | 47 ++------- R/output-json.R | 53 ++-------- R/output-parquet.R | 45 ++------ man/as_polars_df.Rd | 78 ++++++++------ man/lazyframe__collect.Rd | 95 ++++++++++------- man/lazyframe__explain.Rd | 87 +++++++++++----- man/lazyframe__profile.Rd | 83 ++++++++++----- man/lazyframe__sink_batches.Rd | 6 +- man/lazyframe__sink_csv.Rd | 37 +------ man/lazyframe__sink_ipc.Rd | 37 +------ man/lazyframe__sink_ndjson.Rd | 37 +------ man/lazyframe__sink_parquet.Rd | 39 ++----- man/lazyframe__to_dot.Rd | 65 +++++++----- tests/testthat/_snaps/output-parquet.md | 10 -- 24 files changed, 428 insertions(+), 616 deletions(-) diff --git a/R/as_polars_df.R b/R/as_polars_df.R index f1f2c1e86..d9de8341f 100644 --- a/R/as_polars_df.R +++ b/R/as_polars_df.R @@ -146,18 +146,21 @@ as_polars_df.polars_group_by <- function(x, ...) { as_polars_df.polars_lazy_frame <- function( x, ..., - type_coercion = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - no_optimization = FALSE, - engine = c("auto", "in-memory", "streaming") + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + no_optimization = deprecated() ) { x$collect( + engine = engine, + optimizations = optimizations, type_coercion = type_coercion, predicate_pushdown = predicate_pushdown, projection_pushdown = projection_pushdown, @@ -166,8 +169,7 @@ as_polars_df.polars_lazy_frame <- function( comm_subplan_elim = comm_subplan_elim, comm_subexpr_elim = comm_subexpr_elim, cluster_with_columns = cluster_with_columns, - no_optimization = no_optimization, - engine = engine + no_optimization = no_optimization ) } diff --git a/R/dataframe-frame.R b/R/dataframe-frame.R index 2465e1a4a..35b8ecf55 100644 --- a/R/dataframe-frame.R +++ b/R/dataframe-frame.R @@ -745,10 +745,12 @@ dataframe__rechunk <- function() { #' df$bottom_k(4, by = c("a", "b")) dataframe__bottom_k <- function(k, ..., by, reverse = FALSE) { self$lazy()$bottom_k(k, by = by, reverse = reverse)$collect( - projection_pushdown = FALSE, - predicate_pushdown = FALSE, - comm_subplan_elim = FALSE, - slice_pushdown = TRUE + optimizations = QueryOptFlags( + projection_pushdown = FALSE, + predicate_pushdown = FALSE, + comm_subplan_elim = FALSE, + slice_pushdown = TRUE + ) ) |> wrap() } @@ -770,10 +772,12 @@ dataframe__bottom_k <- function(k, ..., by, reverse = FALSE) { #' df$top_k(4, by = c("a", "b")) dataframe__top_k <- function(k, ..., by, reverse = FALSE) { self$lazy()$top_k(k, by = by, reverse = reverse)$collect( - projection_pushdown = FALSE, - predicate_pushdown = FALSE, - comm_subplan_elim = FALSE, - slice_pushdown = TRUE + optimizations = QueryOptFlags( + projection_pushdown = FALSE, + predicate_pushdown = FALSE, + comm_subplan_elim = FALSE, + slice_pushdown = TRUE + ) ) |> wrap() } diff --git a/R/dataframe-group_by-general.R b/R/dataframe-group_by-general.R index cc7830493..9a7bae46d 100644 --- a/R/dataframe-group_by-general.R +++ b/R/dataframe-group_by-general.R @@ -38,7 +38,7 @@ groupby__agg <- function(...) { self$df$lazy()$group_by( !!!self$by, .maintain_order = self$maintain_order - )$agg(...)$collect(no_optimization = TRUE) |> + )$agg(...)$collect(optimizations = QueryOptFlags()$no_optimizations()) |> wrap() } @@ -56,7 +56,7 @@ groupby__head <- function(n = 5) { self$df$lazy()$group_by( !!!self$by, .maintain_order = self$maintain_order - )$head(n)$collect(no_optimization = TRUE) |> + )$head(n)$collect(optimizations = QueryOptFlags()$no_optimizations()) |> wrap() } @@ -74,7 +74,7 @@ groupby__tail <- function(n = 5) { self$df$lazy()$group_by( !!!self$by, .maintain_order = self$maintain_order - )$tail(n)$collect(no_optimization = TRUE) |> + )$tail(n)$collect(optimizations = QueryOptFlags()$no_optimizations()) |> wrap() } diff --git a/R/dataframe-group_by-rolling.R b/R/dataframe-group_by-rolling.R index a44279f46..8fdbb21dd 100644 --- a/R/dataframe-group_by-rolling.R +++ b/R/dataframe-group_by-rolling.R @@ -21,6 +21,6 @@ rolling_groupby__agg <- function(...) { offset = self$offset, closed = self$closed, group_by = self$group_by - )$agg(...)$collect(no_optimization = TRUE) |> + )$agg(...)$collect(optimizations = QueryOptFlags()$no_optimizations()) |> wrap() } diff --git a/R/dataframe-group_by_dynamic.R b/R/dataframe-group_by_dynamic.R index 960e88375..9ebaf18b2 100644 --- a/R/dataframe-group_by_dynamic.R +++ b/R/dataframe-group_by_dynamic.R @@ -40,6 +40,6 @@ group_by_dynamic__agg <- function(...) { label = self$label, group_by = self$group_by, start_by = self$start_by - )$agg(...)$collect(no_optimization = TRUE) |> + )$agg(...)$collect(optimizations = QueryOptFlags()$no_optimizations()) |> wrap() } diff --git a/R/functions-eager.R b/R/functions-eager.R index b5efb806d..3cdf469e9 100644 --- a/R/functions-eager.R +++ b/R/functions-eager.R @@ -159,16 +159,14 @@ pl__concat <- function( wrap() }, vertical_relaxed = { - ( - dots |> - lapply(\(x) x$lazy()$`_ldf`) |> - concat_lf( - rechunk = rechunk, - parallel = parallel, - to_supertypes = TRUE - ) |> - wrap() - )$collect(no_optimization = TRUE) + (dots |> + lapply(\(x) x$lazy()$`_ldf`) |> + concat_lf( + rechunk = rechunk, + parallel = parallel, + to_supertypes = TRUE + ) |> + wrap())$collect(optimizations = QueryOptFlags()$no_optimizations()) }, diagonal = { dots |> @@ -177,16 +175,14 @@ pl__concat <- function( wrap() }, diagonal_relaxed = { - ( - dots |> - lapply(\(x) x$lazy()$`_ldf`) |> - concat_lf_diagonal( - rechunk = rechunk, - parallel = parallel, - to_supertypes = TRUE - ) |> - wrap() - )$collect(no_optimization = TRUE) + (dots |> + lapply(\(x) x$lazy()$`_ldf`) |> + concat_lf_diagonal( + rechunk = rechunk, + parallel = parallel, + to_supertypes = TRUE + ) |> + wrap())$collect(optimizations = QueryOptFlags()$no_optimizations()) }, horizontal = { dots |> diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R index 53798cf3c..4869a91ec 100644 --- a/R/lazyframe-frame.R +++ b/R/lazyframe-frame.R @@ -370,46 +370,29 @@ lazyframe__collect <- function( #' )$sort("a")$profile() lazyframe__profile <- function( ..., - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - no_optimization = FALSE, - `_check_order` = TRUE, show_plot = FALSE, truncate_nodes = 0, - collapse_joins = deprecated() + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated(), + no_optimization = deprecated() ) { wrap({ check_dots_empty0(...) + engine <- arg_match0(engine, c("auto", "in-memory", "streaming")) + check_is_S7(optimizations, QueryOptFlags) - if (is_present(collapse_joins)) { - deprecate_warn( - c( - `!` = sprintf("%s is deprecated.", format_arg("collapse_joins")), - `i` = sprintf("Use %s instead.", format_arg("predicate_pushdown")) - ) - ) - } - - if (isTRUE(no_optimization)) { - predicate_pushdown <- FALSE - projection_pushdown <- FALSE - slice_pushdown <- FALSE - comm_subplan_elim <- FALSE - comm_subexpr_elim <- FALSE - cluster_with_columns <- FALSE - `_check_order` <- FALSE - } - - lf <- self$`_ldf`$optimization_toggle( + optimizations <- forward_old_opt_flags( + optimizations, type_coercion = type_coercion, - `_type_check` = `_type_check`, predicate_pushdown = predicate_pushdown, projection_pushdown = projection_pushdown, simplify_expression = simplify_expression, @@ -417,11 +400,13 @@ lazyframe__profile <- function( comm_subplan_elim = comm_subplan_elim, comm_subexpr_elim = comm_subexpr_elim, cluster_with_columns = cluster_with_columns, - `_check_order` = `_check_order`, - `_eager` = FALSE + collapse_joins = collapse_joins, + no_optimization = no_optimization ) - out <- lapply(lf$profile(), \(x) { + ldf <- self$`_ldf`$with_optimizations(optimizations) + + out <- lapply(ldf$profile(), \(x) { x |> .savvy_wrap_PlRDataFrame() |> wrap() @@ -2005,7 +1990,7 @@ lazyframe__group_by_dynamic <- function( #' This only returns the "dot" output that can be passed to other packages, such #' as `DiagrammeR::grViz()`. #' -#' @param ... Not used.. +#' @param ... `r lifecycle::badge("deprecated")` Ignored. #' @param optimized Optimize the query plan. #' @inheritParams lazyframe__explain #' @@ -2029,42 +2014,50 @@ lazyframe__group_by_dynamic <- function( lazyframe__to_dot <- function( ..., optimized = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - collapse_joins = deprecated(), - `_check_order` = TRUE + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated() ) { - if (is_present(collapse_joins)) { - deprecate_warn( - c( - `!` = sprintf("%s is deprecated.", format_arg("collapse_joins")), - `i` = sprintf("Use %s instead.", format_arg("predicate_pushdown")) + wrap({ + check_dots_empty( + error = deprecate_warn( + format_warning( + c( + `!` = sprintf( + "%s of %s will raise an error in a future version.", + format_arg("..."), + format_code("$to_dot()") + ) + ) + ) ) ) - } - - ldf <- self$`_ldf`$optimization_toggle( - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - comm_subplan_elim = comm_subplan_elim, - comm_subexpr_elim = comm_subexpr_elim, - cluster_with_columns = cluster_with_columns, - `_check_order` = `_check_order`, - `_eager` = FALSE - ) + check_is_S7(optimizations, QueryOptFlags) - ldf$to_dot(optimized) + optimizations <- forward_old_opt_flags( + optimizations, + type_coercion = type_coercion, + predicate_pushdown = predicate_pushdown, + projection_pushdown = projection_pushdown, + simplify_expression = simplify_expression, + slice_pushdown = slice_pushdown, + comm_subplan_elim = comm_subplan_elim, + comm_subexpr_elim = comm_subexpr_elim, + cluster_with_columns = cluster_with_columns, + collapse_joins = collapse_joins + ) + + ldf <- self$`_ldf`$with_optimizations(optimizations) + + ldf$to_dot(optimized) + }) } #' Create an empty or `n`-row null-filled copy of the frame diff --git a/R/lazyframe-utils.R b/R/lazyframe-utils.R index 2d8994396..d6c3bb935 100644 --- a/R/lazyframe-utils.R +++ b/R/lazyframe-utils.R @@ -91,51 +91,6 @@ parquet_statistics <- function( ) } -set_sink_optimizations <- function( - self, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, - `_check_order` = TRUE, - collapse_joins = deprecated(), - call = caller_env() -) { - if (isTRUE(no_optimization)) { - predicate_pushdown <- FALSE - projection_pushdown <- FALSE - slice_pushdown <- FALSE - `_check_order` <- FALSE - } - - if (is_present(collapse_joins)) { - deprecate_warn( - c( - `!` = sprintf("%s is deprecated.", format_arg("collapse_joins")), - `i` = sprintf("Use %s instead.", format_arg("predicate_pushdown")) - ), - user_env = call - ) - } - - self$`_ldf`$optimization_toggle( - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - comm_subplan_elim = FALSE, - comm_subexpr_elim = FALSE, - cluster_with_columns = FALSE, - `_eager` = FALSE, - `_check_order` = `_check_order` - ) -} - #' Transforms raw percentiles into our preferred format, adding the 50th #' percentile. #' Raises an error if the percentile sequence is invalid (e.g. outside the diff --git a/R/output-batches.R b/R/output-batches.R index 341fcac1e..fce65d02b 100644 --- a/R/output-batches.R +++ b/R/output-batches.R @@ -110,7 +110,8 @@ lazyframe__sink_batches <- function( ..., chunk_size = NULL, maintain_order = TRUE, - engine = c("auto", "in-memory", "streaming") + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags() ) { wrap({ check_dots_empty0(...) @@ -119,11 +120,9 @@ lazyframe__sink_batches <- function( lambda, chunk_size = chunk_size, maintain_order = maintain_order - )$collect( - engine = engine - ) - # TODO: support `optimizations` argument + )$collect(engine = engine, optimizations = optimizations) }) + invisible() } diff --git a/R/output-csv.R b/R/output-csv.R index a05f1fe95..ae181f9cf 100644 --- a/R/output-csv.R +++ b/R/output-csv.R @@ -76,19 +76,12 @@ lazyframe__sink_csv <- function( null_value = "", quote_style = c("necessary", "always", "never", "non_numeric"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) { wrap({ check_dots_empty0(...) @@ -110,21 +103,13 @@ lazyframe__sink_csv <- function( null_value = null_value, quote_style = quote_style, maintain_order = maintain_order, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - no_optimization = no_optimization, storage_options = storage_options, retries = retries, sync_on_close = sync_on_close, - mkdir = mkdir, - collapse_joins = collapse_joins - )$collect(engine = engine) + mkdir = mkdir + )$collect(engine = engine, optimizations = optimizations) }) - # TODO: support `optimizations` argument + invisible(NULL) } @@ -147,18 +132,10 @@ lazyframe__lazy_sink_csv <- function( null_value = "", quote_style = c("necessary", "always", "never", "non_numeric"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) { wrap({ check_dots_empty0(...) @@ -175,19 +152,7 @@ lazyframe__lazy_sink_csv <- function( values = c("none", "data", "all") ) - lf <- set_sink_optimizations( - self, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - collapse_joins = collapse_joins, - no_optimization = no_optimization - ) - - lf$sink_csv( + self$`_ldf`$sink_csv( target = target, include_bom = include_bom, include_header = include_header, @@ -266,6 +231,7 @@ dataframe__write_csv <- function( quote_style = quote_style, storage_options = storage_options, retries = retries, + optimizations = DEFAULT_EAGER_OPT_FLAGS, engine = "in-memory" ) }) diff --git a/R/output-ipc.R b/R/output-ipc.R index 69fa0e592..e484d43d2 100644 --- a/R/output-ipc.R +++ b/R/output-ipc.R @@ -4,6 +4,7 @@ #' #' @inherit lazyframe__sink_parquet description params return #' @inheritParams rlang::args_dots_empty +#' @inheritParams lazyframe__collect #' @param compression Determines the compression algorithm. #' Must be one of: #' - `"uncompressed"` or `NULL`: Write an uncompressed Arrow file. @@ -32,19 +33,12 @@ lazyframe__sink_ipc <- function( compression = c("zstd", "lz4", "uncompressed"), compat_level = c("newest", "oldest"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) { wrap({ check_dots_empty0(...) @@ -59,21 +53,13 @@ lazyframe__sink_ipc <- function( compression = compression, compat_level = compat_level, maintain_order = maintain_order, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - no_optimization = no_optimization, storage_options = storage_options, retries = retries, sync_on_close = sync_on_close, mkdir = mkdir, - collapse_joins = collapse_joins - )$collect(engine = engine) + )$collect(engine = engine, optimizations = optimizations) }) - # TODO: support `optimizations` argument + invisible(NULL) } @@ -84,18 +70,10 @@ lazyframe__lazy_sink_ipc <- function( compression = c("zstd", "lz4", "uncompressed"), compat_level = c("newest", "oldest"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) { wrap({ check_dots_empty0(...) @@ -117,19 +95,7 @@ lazyframe__lazy_sink_ipc <- function( values = c("none", "data", "all") ) - lf <- set_sink_optimizations( - self, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - collapse_joins = collapse_joins, - no_optimization = no_optimization - ) - - lf$sink_ipc( + self$`_ldf`$sink_ipc( target = target, compression = compression, compat_level = compat_level, @@ -174,6 +140,7 @@ dataframe__write_ipc <- function( compat_level = compat_level, storage_options = storage_options, retries = retries, + optimizations = DEFAULT_EAGER_OPT_FLAGS, engine = "in-memory" ) }) diff --git a/R/output-json.R b/R/output-json.R index 0b0dc2282..6b4b10dae 100644 --- a/R/output-json.R +++ b/R/output-json.R @@ -25,19 +25,12 @@ lazyframe__sink_ndjson <- function( path, ..., maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) { wrap({ check_dots_empty0(...) @@ -45,21 +38,13 @@ lazyframe__sink_ndjson <- function( self$lazy_sink_ndjson( path = path, maintain_order = maintain_order, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - no_optimization = no_optimization, storage_options = storage_options, retries = retries, sync_on_close = sync_on_close, - mkdir = mkdir, - collapse_joins = collapse_joins - )$collect(engine = engine) + mkdir = mkdir + )$collect(engine = engine, optimizations = optimizations) }) - # TODO: support `optimizations` argument + invisible(NULL) } @@ -68,18 +53,10 @@ lazyframe__lazy_sink_ndjson <- function( path, ..., maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) { wrap({ check_dots_empty0(...) @@ -90,19 +67,7 @@ lazyframe__lazy_sink_ndjson <- function( values = c("none", "data", "all") ) - lf <- set_sink_optimizations( - self, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - collapse_joins = collapse_joins, - no_optimization = no_optimization - ) - - lf$sink_json( + self$`_ldf`$sink_json( target = target, maintain_order = maintain_order, sync_on_close = sync_on_close, @@ -143,7 +108,11 @@ dataframe__write_json <- function(file) { #' jsonlite::stream_in(file(destination)) dataframe__write_ndjson <- function(file) { wrap({ - self$lazy()$sink_ndjson(file, engine = "in-memory") + self$lazy()$sink_ndjson( + file, + optimizations = DEFAULT_EAGER_OPT_FLAGS, + engine = "in-memory" + ) }) invisible(NULL) } diff --git a/R/output-parquet.R b/R/output-parquet.R index 3398ff3d5..79bee86f2 100644 --- a/R/output-parquet.R +++ b/R/output-parquet.R @@ -85,19 +85,12 @@ lazyframe__sink_parquet <- function( row_group_size = NULL, data_page_size = NULL, maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) { wrap({ check_dots_empty0(...) @@ -110,19 +103,11 @@ lazyframe__sink_parquet <- function( row_group_size = row_group_size, data_page_size = data_page_size, maintain_order = maintain_order, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - no_optimization = no_optimization, storage_options = storage_options, retries = retries, sync_on_close = sync_on_close, - mkdir = mkdir, - collapse_joins = collapse_joins - )$collect(engine = engine) + mkdir = mkdir + )$collect(engine = engine, optimizations = optimizations) }) invisible(NULL) } @@ -137,18 +122,10 @@ lazyframe__lazy_sink_parquet <- function( row_group_size = NULL, data_page_size = NULL, maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) { wrap({ check_dots_empty0(...) @@ -163,17 +140,6 @@ lazyframe__lazy_sink_parquet <- function( values = c("none", "data", "all") ) - lf <- set_sink_optimizations( - self, - type_coercion = type_coercion, - `_type_check` = `_type_check`, - predicate_pushdown = predicate_pushdown, - projection_pushdown = projection_pushdown, - simplify_expression = simplify_expression, - slice_pushdown = slice_pushdown, - collapse_joins = collapse_joins, - no_optimization = no_optimization - ) if (is_bool(statistics)) { statistics <- parquet_statistics( min = statistics, @@ -193,7 +159,7 @@ lazyframe__lazy_sink_parquet <- function( abort("`statistics` must be TRUE, FALSE, 'full', or a call to `parquet_statistics()`.") } - lf$sink_parquet( + self$`_ldf`$sink_parquet( target = target, compression = compression, compression_level = compression_level, @@ -279,6 +245,7 @@ dataframe__write_parquet <- function( storage_options = storage_options, retries = retries, mkdir = mkdir, + optimizations = DEFAULT_EAGER_OPT_FLAGS, engine = "in-memory" ) }) diff --git a/man/as_polars_df.Rd b/man/as_polars_df.Rd index c5fa8b6b6..7adf64383 100644 --- a/man/as_polars_df.Rd +++ b/man/as_polars_df.Rd @@ -25,16 +25,17 @@ as_polars_df(x, ...) \method{as_polars_df}{polars_lazy_frame}( x, ..., - type_coercion = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - no_optimization = FALSE, - engine = c("auto", "in-memory", "streaming") + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + no_optimization = deprecated() ) \method{as_polars_df}{list}(x, ...) @@ -56,34 +57,53 @@ If \code{NULL}, the column name is taken from the \link{Series} name.} the \code{\link[=series_struct_unnest]{$struct$unnest()}} method is used to create a \link{DataFrame} from the struct \link{Series}. In this case, the \code{column_name} argument is ignored.} -\item{type_coercion}{A logical, indicates type coercion optimization.} +\item{engine}{The engine name to use for processing the query. +One of the followings: +\itemize{ +\item \code{"auto"} (default): Select the engine automatically. +The \code{"in-memory"} engine will be selected for most cases. +\item \code{"in-memory"}: Use the in-memory engine. +\item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. +}} -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} +\item{type_coercion}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{type_coercion} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{simplify_expression}{A logical, indicates simplify expression optimization.} +\item{predicate_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} +\item{projection_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{projection_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subplan_elim}{A logical, indicates trying to cache branching subplans that occur -on self-joins or unions.} +\item{simplify_expression}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{simplify_expression} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subexpr_elim}{A logical, indicates trying to cache common subexpressions.} +\item{slice_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{slice_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{cluster_with_columns}{A logical, indicates to combine sequential independent calls -to with_columns.} +\item{comm_subplan_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subplan_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} +\item{comm_subexpr_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subexpr_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{engine}{The engine name to use for processing the query. -One of the followings: -\itemize{ -\item \code{"auto"} (default): Select the engine automatically. -The \code{"in-memory"} engine will be selected for most cases. -\item \code{"in-memory"}: Use the in-memory engine. -\item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. -}} +\item{cluster_with_columns}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{cluster_with_columns} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{no_optimization}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{optimizations} argument with +\code{\link[=QueryOptFlags]{pl$QueryOptFlags()$no_optimizations()}} instead.} } \value{ A polars \link{DataFrame} diff --git a/man/lazyframe__collect.Rd b/man/lazyframe__collect.Rd index 594845f25..9a713cb7f 100644 --- a/man/lazyframe__collect.Rd +++ b/man/lazyframe__collect.Rd @@ -6,45 +6,23 @@ \usage{ lazyframe__collect( ..., - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - no_optimization = FALSE, engine = c("auto", "in-memory", "streaming"), - `_check_order` = TRUE, - `_eager` = FALSE, - collapse_joins = deprecated() + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated(), + no_optimization = deprecated() ) } \arguments{ \item{...}{These dots are for future extensions and must be empty.} -\item{type_coercion}{A logical, indicates type coercion optimization.} - -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} - -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} - -\item{simplify_expression}{A logical, indicates simplify expression optimization.} - -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} - -\item{comm_subplan_elim}{A logical, indicates trying to cache branching subplans that occur -on self-joins or unions.} - -\item{comm_subexpr_elim}{A logical, indicates trying to cache common subexpressions.} - -\item{cluster_with_columns}{A logical, indicates to combine sequential independent calls -to with_columns.} - -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} - \item{engine}{The engine name to use for processing the query. One of the followings: \itemize{ @@ -54,13 +32,56 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} -\item{_check_order, _type_check}{For internal use only.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} + +\item{type_coercion}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{type_coercion} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{_eager}{A logical, indicates to turn off multi-node optimizations and -the other optimizations. This option is intended for internal use only.} +\item{predicate_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{projection_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{projection_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{simplify_expression}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{simplify_expression} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{slice_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{slice_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{comm_subplan_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subplan_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{comm_subexpr_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subexpr_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{cluster_with_columns}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{cluster_with_columns} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} \item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{no_optimization}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{optimizations} argument with +\code{\link[=QueryOptFlags]{pl$QueryOptFlags()$no_optimizations()}} instead.} + +\item{check_order_observe}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{check_order_observe} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{fast_projection}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{fast_projection} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} } \value{ A polars \link{DataFrame} diff --git a/man/lazyframe__explain.Rd b/man/lazyframe__explain.Rd index 7e3ecc712..ac270f435 100644 --- a/man/lazyframe__explain.Rd +++ b/man/lazyframe__explain.Rd @@ -7,18 +7,18 @@ lazyframe__explain( ..., format = c("plain", "tree"), + engine = c("auto", "in-memory", "streaming"), optimized = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - collapse_joins = deprecated(), - `_check_order` = TRUE + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated() ) } \arguments{ @@ -27,31 +27,56 @@ lazyframe__explain( \item{format}{The format to use for displaying the logical plan. Must be either \code{"plain"} (default) or \code{"tree"}.} +\item{engine}{The engine name to use for processing the query. +One of the followings: +\itemize{ +\item \code{"auto"} (default): Select the engine automatically. +The \code{"in-memory"} engine will be selected for most cases. +\item \code{"in-memory"}: Use the in-memory engine. +\item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. +}} + \item{optimized}{Return an optimized query plan. If \code{TRUE} (default), the subsequent optimization flags control which optimizations run.} -\item{type_coercion}{A logical, indicates type coercion optimization.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} +\item{type_coercion}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{type_coercion} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} +\item{predicate_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{simplify_expression}{A logical, indicates simplify expression optimization.} +\item{projection_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{projection_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} +\item{simplify_expression}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{simplify_expression} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subplan_elim}{A logical, indicates trying to cache branching subplans that occur -on self-joins or unions.} +\item{slice_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{slice_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subexpr_elim}{A logical, indicates trying to cache common subexpressions.} +\item{comm_subplan_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subplan_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{cluster_with_columns}{A logical, indicates to combine sequential independent calls -to with_columns.} +\item{comm_subexpr_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subexpr_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +\item{cluster_with_columns}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{cluster_with_columns} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{_check_order, _type_check}{For internal use only.} +\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} } \value{ A character value containing the query plan. @@ -70,13 +95,19 @@ lazy_frame <- as_polars_lf(iris) lazy_query <- lazy_frame$sort("Species")$filter(pl$col("Species") != "setosa") # This is the query that was written by the user, without any optimizations -# (use cat() for better printing) -lazy_query$explain(optimized = FALSE) |> cat() +# (use writeLines() for better printing) +lazy_query$explain(optimized = FALSE) |> writeLines() # This is the query after `polars` optimizes it: instead of sorting first and # then filtering, it is faster to filter first and then sort the rest. -lazy_query$explain() |> cat() +lazy_query$explain() |> writeLines() + +# You can disable specific optimizations. +lazy_query$explain( + optimizations = pl$QueryOptFlags(predicate_pushdown = FALSE) +) |> + writeLines() # Also possible to see this as tree format -lazy_query$explain(format = "tree") |> cat() +lazy_query$explain(format = "tree") |> writeLines() } diff --git a/man/lazyframe__profile.Rd b/man/lazyframe__profile.Rd index 5f0cbb118..bd4ad868c 100644 --- a/man/lazyframe__profile.Rd +++ b/man/lazyframe__profile.Rd @@ -6,54 +6,81 @@ \usage{ lazyframe__profile( ..., - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - no_optimization = FALSE, - `_check_order` = TRUE, show_plot = FALSE, truncate_nodes = 0, - collapse_joins = deprecated() + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated(), + no_optimization = deprecated() ) } \arguments{ \item{...}{These dots are for future extensions and must be empty.} -\item{type_coercion}{A logical, indicates type coercion optimization.} +\item{show_plot}{Show a Gantt chart of the profiling result} -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} +\item{truncate_nodes}{Truncate the label lengths in the Gantt chart to this +number of characters. If \code{0} (default), do not truncate.} -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} +\item{engine}{The engine name to use for processing the query. +One of the followings: +\itemize{ +\item \code{"auto"} (default): Select the engine automatically. +The \code{"in-memory"} engine will be selected for most cases. +\item \code{"in-memory"}: Use the in-memory engine. +\item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. +}} -\item{simplify_expression}{A logical, indicates simplify expression optimization.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} +\item{type_coercion}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{type_coercion} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subplan_elim}{A logical, indicates trying to cache branching subplans that occur -on self-joins or unions.} +\item{predicate_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subexpr_elim}{A logical, indicates trying to cache common subexpressions.} +\item{projection_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{projection_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{cluster_with_columns}{A logical, indicates to combine sequential independent calls -to with_columns.} +\item{simplify_expression}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{simplify_expression} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} +\item{slice_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{slice_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{_check_order, _type_check}{For internal use only.} +\item{comm_subplan_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subplan_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{show_plot}{Show a Gantt chart of the profiling result} +\item{comm_subexpr_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subexpr_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{truncate_nodes}{Truncate the label lengths in the Gantt chart to this -number of characters. If \code{0} (default), do not truncate.} +\item{cluster_with_columns}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{cluster_with_columns} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} \item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} + +\item{no_optimization}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{optimizations} argument with +\code{\link[=QueryOptFlags]{pl$QueryOptFlags()$no_optimizations()}} instead.} } \value{ List of two \code{DataFrame}s: one with the collected result, the other diff --git a/man/lazyframe__sink_batches.Rd b/man/lazyframe__sink_batches.Rd index 32da2270f..fce100689 100644 --- a/man/lazyframe__sink_batches.Rd +++ b/man/lazyframe__sink_batches.Rd @@ -10,7 +10,8 @@ lazyframe__sink_batches( ..., chunk_size = NULL, maintain_order = TRUE, - engine = c("auto", "in-memory", "streaming") + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags() ) lazyframe__lazy_sink_batches( @@ -42,6 +43,9 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"in-memory"}: Use the in-memory engine. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} + +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} } \value{ \itemize{ diff --git a/man/lazyframe__sink_csv.Rd b/man/lazyframe__sink_csv.Rd index 578ba069d..c2d024b2e 100644 --- a/man/lazyframe__sink_csv.Rd +++ b/man/lazyframe__sink_csv.Rd @@ -23,19 +23,12 @@ lazyframe__sink_csv( null_value = "", quote_style = c("necessary", "always", "never", "non_numeric"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) lazyframe__lazy_sink_csv( @@ -56,18 +49,10 @@ lazyframe__lazy_sink_csv( null_value = "", quote_style = c("necessary", "always", "never", "non_numeric"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) } \arguments{ @@ -131,20 +116,6 @@ integer, then quotes will be used even if they aren`t strictly necessary. \item{maintain_order}{Maintain the order in which data is processed. Setting this to \code{FALSE} will be slightly faster.} -\item{type_coercion}{A logical, indicates type coercion optimization.} - -\item{_type_check}{For internal use only.} - -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} - -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} - -\item{simplify_expression}{A logical, indicates simplify expression optimization.} - -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} - -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} - \item{storage_options}{Named vector containing options that indicate how to connect to a cloud provider. The cloud providers currently supported are AWS, GCP, and Azure. @@ -181,8 +152,8 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} -\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} } \value{ \itemize{ diff --git a/man/lazyframe__sink_ipc.Rd b/man/lazyframe__sink_ipc.Rd index 4c5ed4557..5d1acb587 100644 --- a/man/lazyframe__sink_ipc.Rd +++ b/man/lazyframe__sink_ipc.Rd @@ -11,19 +11,12 @@ lazyframe__sink_ipc( compression = c("zstd", "lz4", "uncompressed"), compat_level = c("newest", "oldest"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) lazyframe__lazy_sink_ipc( @@ -32,18 +25,10 @@ lazyframe__lazy_sink_ipc( compression = c("zstd", "lz4", "uncompressed"), compat_level = c("newest", "oldest"), maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) } \arguments{ @@ -76,20 +61,6 @@ Use the highest level, currently same as \item{maintain_order}{Maintain the order in which data is processed. Setting this to \code{FALSE} will be slightly faster.} -\item{type_coercion}{A logical, indicates type coercion optimization.} - -\item{_type_check}{For internal use only.} - -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} - -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} - -\item{simplify_expression}{A logical, indicates simplify expression optimization.} - -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} - -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} - \item{storage_options}{Named vector containing options that indicate how to connect to a cloud provider. The cloud providers currently supported are AWS, GCP, and Azure. @@ -126,8 +97,8 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} -\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} } \value{ \itemize{ diff --git a/man/lazyframe__sink_ndjson.Rd b/man/lazyframe__sink_ndjson.Rd index c9fd9c67e..e849a2df7 100644 --- a/man/lazyframe__sink_ndjson.Rd +++ b/man/lazyframe__sink_ndjson.Rd @@ -9,37 +9,22 @@ lazyframe__sink_ndjson( path, ..., maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) lazyframe__lazy_sink_ndjson( path, ..., maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) } \arguments{ @@ -50,20 +35,6 @@ lazyframe__lazy_sink_ndjson( \item{maintain_order}{Maintain the order in which data is processed. Setting this to \code{FALSE} will be slightly faster.} -\item{type_coercion}{A logical, indicates type coercion optimization.} - -\item{_type_check}{For internal use only.} - -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} - -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} - -\item{simplify_expression}{A logical, indicates simplify expression optimization.} - -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} - -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} - \item{storage_options}{Named vector containing options that indicate how to connect to a cloud provider. The cloud providers currently supported are AWS, GCP, and Azure. @@ -100,8 +71,8 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} -\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} } \value{ \itemize{ diff --git a/man/lazyframe__sink_parquet.Rd b/man/lazyframe__sink_parquet.Rd index 8bfef467e..945a52708 100644 --- a/man/lazyframe__sink_parquet.Rd +++ b/man/lazyframe__sink_parquet.Rd @@ -23,19 +23,12 @@ lazyframe__sink_parquet( row_group_size = NULL, data_page_size = NULL, maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), mkdir = FALSE, engine = c("auto", "in-memory", "streaming"), - collapse_joins = deprecated() + optimizations = QueryOptFlags() ) lazyframe__lazy_sink_parquet( @@ -47,18 +40,10 @@ lazyframe__lazy_sink_parquet( row_group_size = NULL, data_page_size = NULL, maintain_order = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - no_optimization = FALSE, storage_options = NULL, retries = 2, sync_on_close = c("none", "data", "all"), - mkdir = FALSE, - collapse_joins = deprecated() + mkdir = FALSE ) } \arguments{ @@ -117,20 +102,6 @@ is set to 1024^2 bytes.} \item{maintain_order}{Maintain the order in which data is processed. Setting this to \code{FALSE} will be slightly faster.} -\item{type_coercion}{A logical, indicates type coercion optimization.} - -\item{_type_check}{For internal use only.} - -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} - -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} - -\item{simplify_expression}{A logical, indicates simplify expression optimization.} - -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} - -\item{no_optimization}{A logical. If \code{TRUE}, turn off (certain) optimizations.} - \item{storage_options}{Named vector containing options that indicate how to connect to a cloud provider. The cloud providers currently supported are AWS, GCP, and Azure. @@ -167,8 +138,10 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} -\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} + +\item{_type_check}{For internal use only.} } \value{ \itemize{ diff --git a/man/lazyframe__to_dot.Rd b/man/lazyframe__to_dot.Rd index 8df8d626e..0af78d578 100644 --- a/man/lazyframe__to_dot.Rd +++ b/man/lazyframe__to_dot.Rd @@ -7,46 +7,61 @@ lazyframe__to_dot( ..., optimized = TRUE, - type_coercion = TRUE, - `_type_check` = TRUE, - predicate_pushdown = TRUE, - projection_pushdown = TRUE, - simplify_expression = TRUE, - slice_pushdown = TRUE, - comm_subplan_elim = TRUE, - comm_subexpr_elim = TRUE, - cluster_with_columns = TRUE, - collapse_joins = deprecated(), - `_check_order` = TRUE + optimizations = QueryOptFlags(), + type_coercion = deprecated(), + predicate_pushdown = deprecated(), + projection_pushdown = deprecated(), + simplify_expression = deprecated(), + slice_pushdown = deprecated(), + comm_subplan_elim = deprecated(), + comm_subexpr_elim = deprecated(), + cluster_with_columns = deprecated(), + collapse_joins = deprecated() ) } \arguments{ -\item{...}{Not used..} +\item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Ignored.} \item{optimized}{Optimize the query plan.} -\item{type_coercion}{A logical, indicates type coercion optimization.} +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} -\item{predicate_pushdown}{A logical, indicates predicate pushdown optimization.} +\item{type_coercion}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{type_coercion} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{projection_pushdown}{A logical, indicates projection pushdown optimization.} +\item{predicate_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{simplify_expression}{A logical, indicates simplify expression optimization.} +\item{projection_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{projection_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{slice_pushdown}{A logical, indicates slice pushdown optimization.} +\item{simplify_expression}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{simplify_expression} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subplan_elim}{A logical, indicates trying to cache branching subplans that occur -on self-joins or unions.} +\item{slice_pushdown}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{slice_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{comm_subexpr_elim}{A logical, indicates trying to cache common subexpressions.} +\item{comm_subplan_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subplan_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{cluster_with_columns}{A logical, indicates to combine sequential independent calls -to with_columns.} +\item{comm_subexpr_elim}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{comm_subexpr_elim} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use \code{predicate_pushdown} instead.} +\item{cluster_with_columns}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{cluster_with_columns} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} -\item{_check_order, _type_check}{For internal use only.} +\item{collapse_joins}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} +Use the \code{predicate_pushdown} property of a \link{QueryOptFlags} object, then pass +that to the \code{optimizations} argument instead.} } \value{ A character vector diff --git a/tests/testthat/_snaps/output-parquet.md b/tests/testthat/_snaps/output-parquet.md index 383d58738..078b95011 100644 --- a/tests/testthat/_snaps/output-parquet.md +++ b/tests/testthat/_snaps/output-parquet.md @@ -29,11 +29,6 @@ ! Evaluation failed in `$collect()`. Caused by error: ! Column(s) not found: unable to find column "foo"; valid columns: ["mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb"] - - Resolved plan until failure: - - ---> FAILED HERE RESOLVING THIS_NODE <--- - DF ["mpg", "cyl", "disp", "hp", ...]; PROJECT */11 COLUMNS --- @@ -48,9 +43,4 @@ ! Evaluation failed in `$collect()`. Caused by error: ! Column(s) not found: unable to find column ""; valid columns: ["mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb"] - - Resolved plan until failure: - - ---> FAILED HERE RESOLVING THIS_NODE <--- - DF ["mpg", "cyl", "disp", "hp", ...]; PROJECT */11 COLUMNS From f9af6947ec6be912fb040fb2f88a9239c7f6e2ee Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 14:40:54 +0000 Subject: [PATCH 07/15] feat: support optimizations argument in collect_all --- R/000-wrappers.R | 4 ++-- R/functions-lazy.R | 9 ++++----- src/init.c | 4 ++-- src/rust/api.h | 2 +- src/rust/src/functions/lazy.rs | 6 +++--- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/R/000-wrappers.R b/R/000-wrappers.R index ca5dab30b..1dc046c35 100644 --- a/R/000-wrappers.R +++ b/R/000-wrappers.R @@ -74,8 +74,8 @@ NULL } -`collect_all` <- function(`lfs`, `engine`, `optflags`) { - .Call(savvy_collect_all__impl, `lfs`, `engine`, `optflags`) +`collect_all` <- function(`lfs`, `engine`, `optimizations`) { + .Call(savvy_collect_all__impl, `lfs`, `engine`, `optimizations`) } diff --git a/R/functions-lazy.R b/R/functions-lazy.R index 6b0533c0b..4de6d137a 100644 --- a/R/functions-lazy.R +++ b/R/functions-lazy.R @@ -148,19 +148,18 @@ pl__arg_sort_by <- function( pl__collect_all <- function( lazy_frames, ..., - engine = c("auto", "in-memory", "streaming") + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags() ) { wrap({ check_dots_empty0(...) check_list_of_polars_lf(lazy_frames) engine <- arg_match0(engine, c("auto", "in-memory", "streaming")) - # TODO: add support for argument `optimizations` - optflags <- QueryOptFlags() - check_is_S7(optflags, QueryOptFlags) + check_is_S7(optimizations, QueryOptFlags) lfs <- lapply(lazy_frames, \(x) x$`_ldf`) - collect_all(lfs, engine = engine, optflags = optflags) |> + collect_all(lfs, engine = engine, optimizations = optimizations) |> lapply(\(ptr) .savvy_wrap_PlRDataFrame(ptr) |> wrap()) }) } diff --git a/src/init.c b/src/init.c index 79e01231d..18cf26045 100644 --- a/src/init.c +++ b/src/init.c @@ -69,8 +69,8 @@ SEXP savvy_col__impl(SEXP c_arg__name) { return handle_result(res); } -SEXP savvy_collect_all__impl(SEXP c_arg__lfs, SEXP c_arg__engine, SEXP c_arg__optflags) { - SEXP res = savvy_collect_all__ffi(c_arg__lfs, c_arg__engine, c_arg__optflags); +SEXP savvy_collect_all__impl(SEXP c_arg__lfs, SEXP c_arg__engine, SEXP c_arg__optimizations) { + SEXP res = savvy_collect_all__ffi(c_arg__lfs, c_arg__engine, c_arg__optimizations); return handle_result(res); } diff --git a/src/rust/api.h b/src/rust/api.h index 37e543bdb..c99b0866f 100644 --- a/src/rust/api.h +++ b/src/rust/api.h @@ -5,7 +5,7 @@ SEXP savvy_arg_where__ffi(SEXP c_arg__condition); SEXP savvy_as_struct__ffi(SEXP c_arg__exprs); SEXP savvy_coalesce__ffi(SEXP c_arg__exprs); SEXP savvy_col__ffi(SEXP c_arg__name); -SEXP savvy_collect_all__ffi(SEXP c_arg__lfs, SEXP c_arg__engine, SEXP c_arg__optflags); +SEXP savvy_collect_all__ffi(SEXP c_arg__lfs, SEXP c_arg__engine, SEXP c_arg__optimizations); SEXP savvy_cols__ffi(SEXP c_arg__names); SEXP savvy_compat_level_range__ffi(void); SEXP savvy_concat_arr__ffi(SEXP c_arg__s); diff --git a/src/rust/src/functions/lazy.rs b/src/rust/src/functions/lazy.rs index f9074c554..86197bfec 100644 --- a/src/rust/src/functions/lazy.rs +++ b/src/rust/src/functions/lazy.rs @@ -286,12 +286,12 @@ fn lfs_to_plans(lfs: Vec) -> Result> { } #[savvy] -pub fn collect_all(lfs: ListSexp, engine: &str, optflags: Sexp) -> Result { +pub fn collect_all(lfs: ListSexp, engine: &str, optimizations: Sexp) -> Result { let lfs = >>::try_from(lfs)?.0; let engine = >::try_from(engine)?.0; let plans = lfs_to_plans(lfs)?; - let optflags = ::try_from(optflags)?; - let dfs = LazyFrame::collect_all_with_engine(plans, engine, optflags.inner.into_inner()) + let optimizations = ::try_from(optimizations)?; + let dfs = LazyFrame::collect_all_with_engine(plans, engine, optimizations.inner.into_inner()) .map_err(RPolarsErr::from)?; let mut out = OwnedListSexp::new(dfs.len(), false)?; From 1094b606b1a50ae870f98f8eab9e25ac70b986a1 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 14:43:04 +0000 Subject: [PATCH 08/15] docs: regen --- man/pl__collect_all.Rd | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/man/pl__collect_all.Rd b/man/pl__collect_all.Rd index 19a21f8f8..760cdd774 100644 --- a/man/pl__collect_all.Rd +++ b/man/pl__collect_all.Rd @@ -4,7 +4,12 @@ \alias{pl__collect_all} \title{Collect multiple LazyFrames at the same time} \usage{ -pl__collect_all(lazy_frames, ..., engine = c("auto", "in-memory", "streaming")) +pl__collect_all( + lazy_frames, + ..., + engine = c("auto", "in-memory", "streaming"), + optimizations = QueryOptFlags() +) } \arguments{ \item{lazy_frames}{A list of LazyFrames to collect.} @@ -19,6 +24,9 @@ The \code{"in-memory"} engine will be selected for most cases. \item \code{"in-memory"}: Use the in-memory engine. \item \code{"streaming"}: \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Use the (new) streaming engine. }} + +\item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} } \value{ A list containing all the collected DataFrames, in the same order From 818391ce7d8f02f2e264e7e1a3006ee6a8641985 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 15:07:29 +0000 Subject: [PATCH 09/15] docs(news): add items --- NEWS.md | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index cf8aea892..4d874beb7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,9 +2,43 @@ ## polars (development version) +### Deprecations + +- The following arguments of certain LazyFrame methods, which were previously used for query optimization, + are deprecated in favor of the new `optimizations` argument (#1635). + Some arguments that were intended for internal use have been removed without deprecation. + + - `time_coercion` + - `predicate_pushdown` + - `projection_pushdown` + - `simplify_expression` + - `slice_pushdown` + - `comm_subplan_elim` + - `comm_subexpr_elim` + - `cluster_with_columns` + - `no_optimization` + - `_type_check` (removed) + - `_check_order` (removed) + - `_eager` (removed) + + Functions affected are those that gained the `optimizations` argument. + See the New features section below for details. + Also, for the experimental `sink_*` methods, the above arguments are removed instead of being deprecated. + ### New features -- `pl$collect_all()` to efficiently collect a list of LazyFrames (#1598). +- The following functions gain the `optimizations` taking a `QueryOptFlags` object (#1633, #1634, #1635). + - `$collect()` + - `$explain()` + - `$profile()` + - `$to_dot()` + - `sink_batches()` + - `sink_csv()` + - `sink_ipc()` + - `sink_parquet()` + - `pl$collect_all()` + - `as_polars_df()` +- `pl$collect_all()` to efficiently collect a list of LazyFrames (#1598, #1635). - `$remove()` and `$remove()` as a complement to `$filter()` (#1632). From bc755fc598d0adbab13a6a9253f53db133e423c2 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 15:13:48 +0000 Subject: [PATCH 10/15] docs: fix docs --- R/lazyframe-frame.R | 6 ------ R/lazyframe-opt_flags.R | 3 --- R/output-parquet.R | 1 - altdoc/mkdocs.yml | 1 + man/QueryOptFlags.Rd | 3 --- man/lazyframe__collect.Rd | 8 -------- man/lazyframe__sink_parquet.Rd | 2 -- 7 files changed, 1 insertion(+), 23 deletions(-) diff --git a/R/lazyframe-frame.R b/R/lazyframe-frame.R index 4869a91ec..9be32b679 100644 --- a/R/lazyframe-frame.R +++ b/R/lazyframe-frame.R @@ -266,12 +266,6 @@ lazyframe__group_by <- function(..., .maintain_order = FALSE) { #' @param cluster_with_columns `r lifecycle::badge("deprecated")` #' Use the `cluster_with_columns` property of a [QueryOptFlags] object, then pass #' that to the `optimizations` argument instead. -#' @param check_order_observe `r lifecycle::badge("deprecated")` -#' Use the `check_order_observe` property of a [QueryOptFlags] object, then pass -#' that to the `optimizations` argument instead. -#' @param fast_projection `r lifecycle::badge("deprecated")` -#' Use the `fast_projection` property of a [QueryOptFlags] object, then pass -#' that to the `optimizations` argument instead. #' @param collapse_joins `r lifecycle::badge("deprecated")` #' Use the `predicate_pushdown` property of a [QueryOptFlags] object, then pass #' that to the `optimizations` argument instead. diff --git a/R/lazyframe-opt_flags.R b/R/lazyframe-opt_flags.R index 04ed0df5d..85b28bb99 100644 --- a/R/lazyframe-opt_flags.R +++ b/R/lazyframe-opt_flags.R @@ -26,9 +26,6 @@ polars_query_opt_flags__methods <- new.env(parent = emptyenv()) #' @examples #' opt_flags <- pl$QueryOptFlags() #' opt_flags -#' -#' S7::check_is_S7(opt_flags, pl$QueryOptFlags) -#' @keywords internal NULL QueryOptFlags <- new_class( diff --git a/R/output-parquet.R b/R/output-parquet.R index 79bee86f2..81ff8a8e9 100644 --- a/R/output-parquet.R +++ b/R/output-parquet.R @@ -49,7 +49,6 @@ #' this to `FALSE` will be slightly faster. #' @inheritParams lazyframe__collect #' @inheritParams pl__scan_parquet -#' @param _type_check For internal use only. #' @param sync_on_close Sync to disk when before closing a file. Must be one of: #' * `"none"`: does not sync; #' * `"data"`: syncs the file contents; diff --git a/altdoc/mkdocs.yml b/altdoc/mkdocs.yml index e6d71ae67..63693cd1a 100644 --- a/altdoc/mkdocs.yml +++ b/altdoc/mkdocs.yml @@ -207,6 +207,7 @@ nav: - with_columns: man/lazyframe__with_columns.md - with_row_index: man/lazyframe__with_row_index.md - Misc: + - QueryOptFlags: man/QueryOptFlags.md - collect: man/lazyframe__collect.md - collect_schema: man/lazyframe__collect_schema.md - sink_batches: man/lazyframe__sink_batches.md diff --git a/man/QueryOptFlags.Rd b/man/QueryOptFlags.Rd index 810d15a3e..9df125d53 100644 --- a/man/QueryOptFlags.Rd +++ b/man/QueryOptFlags.Rd @@ -52,7 +52,4 @@ A \code{QueryOptFlags} object. \examples{ opt_flags <- pl$QueryOptFlags() opt_flags - -S7::check_is_S7(opt_flags, pl$QueryOptFlags) } -\keyword{internal} diff --git a/man/lazyframe__collect.Rd b/man/lazyframe__collect.Rd index 9a713cb7f..0dec34d34 100644 --- a/man/lazyframe__collect.Rd +++ b/man/lazyframe__collect.Rd @@ -74,14 +74,6 @@ that to the \code{optimizations} argument instead.} \item{no_optimization}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Use the \code{optimizations} argument with \code{\link[=QueryOptFlags]{pl$QueryOptFlags()$no_optimizations()}} instead.} - -\item{check_order_observe}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use the \code{check_order_observe} property of a \link{QueryOptFlags} object, then pass -that to the \code{optimizations} argument instead.} - -\item{fast_projection}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} -Use the \code{fast_projection} property of a \link{QueryOptFlags} object, then pass -that to the \code{optimizations} argument instead.} } \value{ A polars \link{DataFrame} diff --git a/man/lazyframe__sink_parquet.Rd b/man/lazyframe__sink_parquet.Rd index 945a52708..a01f2e48b 100644 --- a/man/lazyframe__sink_parquet.Rd +++ b/man/lazyframe__sink_parquet.Rd @@ -140,8 +140,6 @@ The \code{"in-memory"} engine will be selected for most cases. \item{optimizations}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A \link{QueryOptFlags} object to indicate optimization passes done during query optimization.} - -\item{_type_check}{For internal use only.} } \value{ \itemize{ From f846fc4ae1801f89e185b0895a7f800926484582 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 15:29:54 +0000 Subject: [PATCH 11/15] refactor: remove old function [skip ci] --- R/000-wrappers.R | 7 ------- src/init.c | 6 ------ src/rust/api.h | 1 - src/rust/src/lazyframe/general.rs | 32 ------------------------------- 4 files changed, 46 deletions(-) diff --git a/R/000-wrappers.R b/R/000-wrappers.R index 1dc046c35..eabc4e796 100644 --- a/R/000-wrappers.R +++ b/R/000-wrappers.R @@ -3999,12 +3999,6 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed") } } -`PlRLazyFrame_optimization_toggle` <- function(self) { - function(`type_coercion`, `_type_check`, `predicate_pushdown`, `projection_pushdown`, `simplify_expression`, `slice_pushdown`, `comm_subplan_elim`, `comm_subexpr_elim`, `cluster_with_columns`, `_eager`, `_check_order`) { - .savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_optimization_toggle__impl, `self`, `type_coercion`, `_type_check`, `predicate_pushdown`, `projection_pushdown`, `simplify_expression`, `slice_pushdown`, `comm_subplan_elim`, `comm_subexpr_elim`, `cluster_with_columns`, `_eager`, `_check_order`)) - } -} - `PlRLazyFrame_profile` <- function(self) { function() { .Call(savvy_PlRLazyFrame_profile__impl, `self`) @@ -4238,7 +4232,6 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed") e$`merge_sorted` <- `PlRLazyFrame_merge_sorted`(ptr) e$`min` <- `PlRLazyFrame_min`(ptr) e$`null_count` <- `PlRLazyFrame_null_count`(ptr) - e$`optimization_toggle` <- `PlRLazyFrame_optimization_toggle`(ptr) e$`profile` <- `PlRLazyFrame_profile`(ptr) e$`quantile` <- `PlRLazyFrame_quantile`(ptr) e$`remove` <- `PlRLazyFrame_remove`(ptr) diff --git a/src/init.c b/src/init.c index 18cf26045..cce0c1e38 100644 --- a/src/init.c +++ b/src/init.c @@ -2739,11 +2739,6 @@ SEXP savvy_PlRLazyFrame_null_count__impl(SEXP self__) { return handle_result(res); } -SEXP savvy_PlRLazyFrame_optimization_toggle__impl(SEXP self__, SEXP c_arg__type_coercion, SEXP c_arg___type_check, SEXP c_arg__predicate_pushdown, SEXP c_arg__projection_pushdown, SEXP c_arg__simplify_expression, SEXP c_arg__slice_pushdown, SEXP c_arg__comm_subplan_elim, SEXP c_arg__comm_subexpr_elim, SEXP c_arg__cluster_with_columns, SEXP c_arg___eager, SEXP c_arg___check_order) { - SEXP res = savvy_PlRLazyFrame_optimization_toggle__ffi(self__, c_arg__type_coercion, c_arg___type_check, c_arg__predicate_pushdown, c_arg__projection_pushdown, c_arg__simplify_expression, c_arg__slice_pushdown, c_arg__comm_subplan_elim, c_arg__comm_subexpr_elim, c_arg__cluster_with_columns, c_arg___eager, c_arg___check_order); - return handle_result(res); -} - SEXP savvy_PlRLazyFrame_profile__impl(SEXP self__) { SEXP res = savvy_PlRLazyFrame_profile__ffi(self__); return handle_result(res); @@ -3927,7 +3922,6 @@ static const R_CallMethodDef CallEntries[] = { {"savvy_PlRLazyFrame_new_from_ndjson__impl", (DL_FUNC) &savvy_PlRLazyFrame_new_from_ndjson__impl, 15}, {"savvy_PlRLazyFrame_new_from_parquet__impl", (DL_FUNC) &savvy_PlRLazyFrame_new_from_parquet__impl, 18}, {"savvy_PlRLazyFrame_null_count__impl", (DL_FUNC) &savvy_PlRLazyFrame_null_count__impl, 1}, - {"savvy_PlRLazyFrame_optimization_toggle__impl", (DL_FUNC) &savvy_PlRLazyFrame_optimization_toggle__impl, 12}, {"savvy_PlRLazyFrame_profile__impl", (DL_FUNC) &savvy_PlRLazyFrame_profile__impl, 1}, {"savvy_PlRLazyFrame_quantile__impl", (DL_FUNC) &savvy_PlRLazyFrame_quantile__impl, 3}, {"savvy_PlRLazyFrame_remove__impl", (DL_FUNC) &savvy_PlRLazyFrame_remove__impl, 2}, diff --git a/src/rust/api.h b/src/rust/api.h index c99b0866f..0cdfe87f2 100644 --- a/src/rust/api.h +++ b/src/rust/api.h @@ -555,7 +555,6 @@ SEXP savvy_PlRLazyFrame_new_from_ipc__ffi(SEXP c_arg__source, SEXP c_arg__cache, SEXP savvy_PlRLazyFrame_new_from_ndjson__ffi(SEXP c_arg__source, SEXP c_arg__low_memory, SEXP c_arg__rechunk, SEXP c_arg__ignore_errors, SEXP c_arg__retries, SEXP c_arg__row_index_offset, SEXP c_arg__row_index_name, SEXP c_arg__infer_schema_length, SEXP c_arg__schema, SEXP c_arg__schema_overrides, SEXP c_arg__batch_size, SEXP c_arg__n_rows, SEXP c_arg__include_file_paths, SEXP c_arg__storage_options, SEXP c_arg__file_cache_ttl); SEXP savvy_PlRLazyFrame_new_from_parquet__ffi(SEXP c_arg__source, SEXP c_arg__cache, SEXP c_arg__parallel, SEXP c_arg__rechunk, SEXP c_arg__low_memory, SEXP c_arg__use_statistics, SEXP c_arg__try_parse_hive_dates, SEXP c_arg__retries, SEXP c_arg__glob, SEXP c_arg__allow_missing_columns, SEXP c_arg__row_index_offset, SEXP c_arg__storage_options, SEXP c_arg__n_rows, SEXP c_arg__row_index_name, SEXP c_arg__hive_partitioning, SEXP c_arg__schema, SEXP c_arg__hive_schema, SEXP c_arg__include_file_paths); SEXP savvy_PlRLazyFrame_null_count__ffi(SEXP self__); -SEXP savvy_PlRLazyFrame_optimization_toggle__ffi(SEXP self__, SEXP c_arg__type_coercion, SEXP c_arg___type_check, SEXP c_arg__predicate_pushdown, SEXP c_arg__projection_pushdown, SEXP c_arg__simplify_expression, SEXP c_arg__slice_pushdown, SEXP c_arg__comm_subplan_elim, SEXP c_arg__comm_subexpr_elim, SEXP c_arg__cluster_with_columns, SEXP c_arg___eager, SEXP c_arg___check_order); SEXP savvy_PlRLazyFrame_profile__ffi(SEXP self__); SEXP savvy_PlRLazyFrame_quantile__ffi(SEXP self__, SEXP c_arg__quantile, SEXP c_arg__interpolation); SEXP savvy_PlRLazyFrame_remove__ffi(SEXP self__, SEXP c_arg__predicate); diff --git a/src/rust/src/lazyframe/general.rs b/src/rust/src/lazyframe/general.rs index 27c091c1d..3a5c9ea31 100644 --- a/src/rust/src/lazyframe/general.rs +++ b/src/rust/src/lazyframe/general.rs @@ -44,38 +44,6 @@ impl PlRLazyFrame { Ok(sexp.into()) } - fn optimization_toggle( - &self, - type_coercion: bool, - _type_check: bool, - predicate_pushdown: bool, - projection_pushdown: bool, - simplify_expression: bool, - slice_pushdown: bool, - comm_subplan_elim: bool, - comm_subexpr_elim: bool, - cluster_with_columns: bool, - _eager: bool, - _check_order: bool, - ) -> Result { - let ldf = self - .ldf - .clone() - .with_type_coercion(type_coercion) - .with_type_check(_type_check) - .with_predicate_pushdown(predicate_pushdown) - .with_simplify_expr(simplify_expression) - .with_slice_pushdown(slice_pushdown) - .with_check_order(_check_order) - .with_comm_subplan_elim(comm_subplan_elim) - .with_comm_subexpr_elim(comm_subexpr_elim) - .with_cluster_with_columns(cluster_with_columns) - ._with_eager(_eager) - .with_projection_pushdown(projection_pushdown); - - Ok(ldf.into()) - } - fn sink_batches( &self, lambda: savvy::FunctionSexp, From 1e290c28c6ca5db0f001f7c7f4264799b3af485b Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 15:31:44 +0000 Subject: [PATCH 12/15] docs(news): fix typo [skip ci] --- NEWS.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4d874beb7..f6d55ac9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,7 @@ are deprecated in favor of the new `optimizations` argument (#1635). Some arguments that were intended for internal use have been removed without deprecation. - - `time_coercion` + - `type_coercion` - `predicate_pushdown` - `projection_pushdown` - `simplify_expression` @@ -27,15 +27,16 @@ ### New features -- The following functions gain the `optimizations` taking a `QueryOptFlags` object (#1633, #1634, #1635). +- The following functions gain the `optimizations` argument taking a `QueryOptFlags` object (#1633, #1634, #1635). - `$collect()` - `$explain()` - `$profile()` - `$to_dot()` - - `sink_batches()` - - `sink_csv()` - - `sink_ipc()` - - `sink_parquet()` + - `$sink_batches()` + - `$sink_csv()` + - `$sink_ipc()` + - `$sink_parquet()` + - `$sink_ndjson()` - `pl$collect_all()` - `as_polars_df()` - `pl$collect_all()` to efficiently collect a list of LazyFrames (#1598, #1635). From 5ba381dc4f6b416f1fc5e1474f921ee05404c2cc Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 12 Nov 2025 15:43:02 +0000 Subject: [PATCH 13/15] docs(news): tweak [skip ci] --- NEWS.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index f6d55ac9b..b2d0a2e16 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,11 +23,13 @@ Functions affected are those that gained the `optimizations` argument. See the New features section below for details. - Also, for the experimental `sink_*` methods, the above arguments are removed instead of being deprecated. + Also, for the experimental `$sink_*` methods, + the above arguments are removed instead of being deprecated. ### New features -- The following functions gain the `optimizations` argument taking a `QueryOptFlags` object (#1633, #1634, #1635). +- The following functions gain the experimental `optimizations` argument + taking a `QueryOptFlags` object (#1633, #1634, #1635). - `$collect()` - `$explain()` - `$profile()` From b904df4b162360bacf7990c70f7469e89e889d14 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Thu, 13 Nov 2025 15:06:11 +0000 Subject: [PATCH 14/15] test: add some tests [skip ci] --- tests/testthat/_snaps/lazyframe-frame.md | 10 +++++ tests/testthat/_snaps/lazyframe-opt_flags.md | 39 ++++++++++++++++++++ tests/testthat/test-lazyframe-frame.R | 3 ++ tests/testthat/test-lazyframe-opt_flags.R | 20 ++++++++++ 4 files changed, 72 insertions(+) diff --git a/tests/testthat/_snaps/lazyframe-frame.md b/tests/testthat/_snaps/lazyframe-frame.md index 2015e35ed..6fc3081c4 100644 --- a/tests/testthat/_snaps/lazyframe-frame.md +++ b/tests/testthat/_snaps/lazyframe-frame.md @@ -403,6 +403,16 @@ FROM DF ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", ...]; PROJECT */5 COLUMNS +--- + + Code + cat(lazy_query$explain(optimizations = pl$QueryOptFlags(predicate_pushdown = FALSE))) + Output + FILTER [(col("Species")) != ("setosa")] + FROM + SORT BY [col("Species")] + DF ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", ...]; PROJECT */5 COLUMNS + --- Code diff --git a/tests/testthat/_snaps/lazyframe-opt_flags.md b/tests/testthat/_snaps/lazyframe-opt_flags.md index 6f8f75037..4e06d2a01 100644 --- a/tests/testthat/_snaps/lazyframe-opt_flags.md +++ b/tests/testthat/_snaps/lazyframe-opt_flags.md @@ -154,3 +154,42 @@ @ eager : logi FALSE @ streaming : logi FALSE +--- + + Code + DEFAULT_EAGER_OPT_FLAGS + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi FALSE + @ projection_pushdown : logi FALSE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi FALSE + @ comm_subplan_elim : logi FALSE + @ comm_subexpr_elim : logi FALSE + @ cluster_with_columns: logi FALSE + @ check_order_observe : logi FALSE + @ fast_projection : logi FALSE + @ eager : logi TRUE + @ streaming : logi FALSE + +# Rust side validation + + Code + validate(opt_flags) + Condition + Error: + ! object properties are invalid: + - @eager must be , not + +--- + + Code + pl$collect_all(list(), optimizations = opt_flags) + Condition + Error in `pl$collect_all()`: + ! Evaluation failed in `$collect_all()`. + Caused by error: + ! Must be logical, not integer + diff --git a/tests/testthat/test-lazyframe-frame.R b/tests/testthat/test-lazyframe-frame.R index bb50cb531..40f7c5ea9 100644 --- a/tests/testthat/test-lazyframe-frame.R +++ b/tests/testthat/test-lazyframe-frame.R @@ -762,6 +762,9 @@ test_that("explain() works", { expect_snapshot(cat(lazy_query$explain(optimized = FALSE))) expect_snapshot(cat(lazy_query$explain())) + expect_snapshot(cat(lazy_query$explain( + optimizations = pl$QueryOptFlags(predicate_pushdown = FALSE) + ))) expect_snapshot(cat(lazy_query$explain(format = "tree", optimized = FALSE))) expect_snapshot(cat(lazy_query$explain(format = "tree", ))) diff --git a/tests/testthat/test-lazyframe-opt_flags.R b/tests/testthat/test-lazyframe-opt_flags.R index 88134eded..a8096a8e5 100644 --- a/tests/testthat/test-lazyframe-opt_flags.R +++ b/tests/testthat/test-lazyframe-opt_flags.R @@ -37,4 +37,24 @@ test_that("QueryOptFlags", { expect_snapshot(opt_flags$type, error = TRUE, cnd_class = TRUE) expect_snapshot(opt_flags$no_optimizations()) + + # Pre-defined + expect_snapshot(DEFAULT_EAGER_OPT_FLAGS) +}) + +test_that("Rust side validation", { + opt_flags <- pl$QueryOptFlags() + prop(opt_flags, "eager", check = FALSE) <- 1L + + expect_snapshot( + validate(opt_flags), + error = TRUE, + cnd_class = TRUE + ) + + expect_snapshot( + pl$collect_all(list(), optimizations = opt_flags), + error = TRUE, + cnd_class = TRUE + ) }) From dfcda09fcb829bb871fe4e454727f74e6b4caf7a Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Fri, 14 Nov 2025 03:57:12 +0000 Subject: [PATCH 15/15] test: more tests [skip ci] --- R/lazyframe-utils.R | 1 - tests/testthat/_snaps/lazyframe-utils.md | 240 +++++++++++++++++++++++ tests/testthat/test-lazyframe-frame.R | 1 + tests/testthat/test-lazyframe-utils.R | 22 +++ 4 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/lazyframe-utils.md create mode 100644 tests/testthat/test-lazyframe-utils.R diff --git a/R/lazyframe-utils.R b/R/lazyframe-utils.R index d6c3bb935..a55f51618 100644 --- a/R/lazyframe-utils.R +++ b/R/lazyframe-utils.R @@ -199,7 +199,6 @@ forward_old_opt_flags <- function( cluster_with_columns = FALSE, check_order_observe = FALSE ) - need_validation <- TRUE } } diff --git a/tests/testthat/_snaps/lazyframe-utils.md b/tests/testthat/_snaps/lazyframe-utils.md new file mode 100644 index 000000000..d3b2bb56c --- /dev/null +++ b/tests/testthat/_snaps/lazyframe-utils.md @@ -0,0 +1,240 @@ +# forward_old_opt_flags + + Code + test_fn(type_coercion = FALSE) + Condition + Warning: + ! `type_coercion` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi FALSE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(predicate_pushdown = FALSE) + Condition + Warning: + ! `predicate_pushdown` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi FALSE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(projection_pushdown = FALSE) + Condition + Warning: + ! `projection_pushdown` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi FALSE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(simplify_expression = FALSE) + Condition + Warning: + ! `simplify_expression` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi FALSE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(slice_pushdown = FALSE) + Condition + Warning: + ! `slice_pushdown` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi FALSE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(comm_subplan_elim = FALSE) + Condition + Warning: + ! `comm_subplan_elim` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi FALSE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(comm_subexpr_elim = FALSE) + Condition + Warning: + ! `comm_subexpr_elim` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi FALSE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(cluster_with_columns = FALSE) + Condition + Warning: + ! `cluster_with_columns` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi FALSE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(collapse_joins = FALSE) + Condition + Warning: + ! `collapse_joins` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi TRUE + @ projection_pushdown : logi TRUE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi TRUE + @ comm_subplan_elim : logi TRUE + @ comm_subexpr_elim : logi TRUE + @ cluster_with_columns: logi TRUE + @ check_order_observe : logi TRUE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + +--- + + Code + test_fn(no_optimization = TRUE) + Condition + Warning: + ! `no_optimization` is deprecated. + i Use `optimizations` instead. + Output + + @ type_coercion : logi TRUE + @ type_check : logi TRUE + @ predicate_pushdown : logi FALSE + @ projection_pushdown : logi FALSE + @ simplify_expression : logi TRUE + @ slice_pushdown : logi FALSE + @ comm_subplan_elim : logi FALSE + @ comm_subexpr_elim : logi FALSE + @ cluster_with_columns: logi FALSE + @ check_order_observe : logi FALSE + @ fast_projection : logi TRUE + @ eager : logi FALSE + @ streaming : logi FALSE + diff --git a/tests/testthat/test-lazyframe-frame.R b/tests/testthat/test-lazyframe-frame.R index 40f7c5ea9..fa4b50e48 100644 --- a/tests/testthat/test-lazyframe-frame.R +++ b/tests/testthat/test-lazyframe-frame.R @@ -765,6 +765,7 @@ test_that("explain() works", { expect_snapshot(cat(lazy_query$explain( optimizations = pl$QueryOptFlags(predicate_pushdown = FALSE) ))) + expect_snapshot(cat(lazy_query$explain(optimizations = predicate_pushdown = FALSE))) expect_snapshot(cat(lazy_query$explain(format = "tree", optimized = FALSE))) expect_snapshot(cat(lazy_query$explain(format = "tree", ))) diff --git a/tests/testthat/test-lazyframe-utils.R b/tests/testthat/test-lazyframe-utils.R new file mode 100644 index 000000000..7665f95d6 --- /dev/null +++ b/tests/testthat/test-lazyframe-utils.R @@ -0,0 +1,22 @@ +test_that("forward_old_opt_flags", { + flags <- pl$QueryOptFlags() + test_fn <- function(...) { + forward_old_opt_flags( + flags, + ... + ) |> + print() |> + validate() + } + + expect_snapshot(test_fn(type_coercion = FALSE)) + expect_snapshot(test_fn(predicate_pushdown = FALSE)) + expect_snapshot(test_fn(projection_pushdown = FALSE)) + expect_snapshot(test_fn(simplify_expression = FALSE)) + expect_snapshot(test_fn(slice_pushdown = FALSE)) + expect_snapshot(test_fn(comm_subplan_elim = FALSE)) + expect_snapshot(test_fn(comm_subexpr_elim = FALSE)) + expect_snapshot(test_fn(cluster_with_columns = FALSE)) + expect_snapshot(test_fn(collapse_joins = FALSE)) + expect_snapshot(test_fn(no_optimization = TRUE)) +})