Skip to content

Commit b605a42

Browse files
committed
feat: optimizations arguments [skip ci]
1 parent a042672 commit b605a42

File tree

6 files changed

+139
-36
lines changed

6 files changed

+139
-36
lines changed

R/000-wrappers.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4194,6 +4194,12 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed")
41944194
}
41954195
}
41964196

4197+
`PlRLazyFrame_with_optimizations` <- function(self) {
4198+
function(`optimizations`) {
4199+
.savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_optimizations__impl, `self`, `optimizations`))
4200+
}
4201+
}
4202+
41974203
`PlRLazyFrame_with_row_index` <- function(self) {
41984204
function(`name`, `offset` = NULL) {
41994205
.savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_with_row_index__impl, `self`, `name`, `offset`))
@@ -4263,6 +4269,7 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed")
42634269
e$`var` <- `PlRLazyFrame_var`(ptr)
42644270
e$`with_columns` <- `PlRLazyFrame_with_columns`(ptr)
42654271
e$`with_columns_seq` <- `PlRLazyFrame_with_columns_seq`(ptr)
4272+
e$`with_optimizations` <- `PlRLazyFrame_with_optimizations`(ptr)
42664273
e$`with_row_index` <- `PlRLazyFrame_with_row_index`(ptr)
42674274

42684275
class(e) <- c("PlRLazyFrame", "savvy_polars__sealed")

R/lazyframe-frame.R

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -446,59 +446,60 @@ lazyframe__profile <- function(
446446
#' lazy_query <- lazy_frame$sort("Species")$filter(pl$col("Species") != "setosa")
447447
#'
448448
#' # This is the query that was written by the user, without any optimizations
449-
#' # (use cat() for better printing)
450-
#' lazy_query$explain(optimized = FALSE) |> cat()
449+
#' # (use writeLines() for better printing)
450+
#' lazy_query$explain(optimized = FALSE) |> writeLines()
451451
#'
452452
#' # This is the query after `polars` optimizes it: instead of sorting first and
453453
#' # then filtering, it is faster to filter first and then sort the rest.
454-
#' lazy_query$explain() |> cat()
454+
#' lazy_query$explain() |> writeLines()
455+
#'
456+
#' # You can disable specific optimizations.
457+
#' lazy_query$explain(
458+
#' optimizations = pl$QueryOptFlags(predicate_pushdown = FALSE)
459+
#' ) |>
460+
#' writeLines()
455461
#'
456462
#' # Also possible to see this as tree format
457-
#' lazy_query$explain(format = "tree") |> cat()
463+
#' lazy_query$explain(format = "tree") |> writeLines()
458464
lazyframe__explain <- function(
459465
...,
460466
format = c("plain", "tree"),
467+
engine = c("auto", "in-memory", "streaming"),
461468
optimized = TRUE,
462-
type_coercion = TRUE,
463-
`_type_check` = TRUE,
464-
predicate_pushdown = TRUE,
465-
projection_pushdown = TRUE,
466-
simplify_expression = TRUE,
467-
slice_pushdown = TRUE,
468-
comm_subplan_elim = TRUE,
469-
comm_subexpr_elim = TRUE,
470-
cluster_with_columns = TRUE,
471-
collapse_joins = deprecated(),
472-
`_check_order` = TRUE
469+
optimizations = QueryOptFlags(),
470+
type_coercion = deprecated(),
471+
predicate_pushdown = deprecated(),
472+
projection_pushdown = deprecated(),
473+
simplify_expression = deprecated(),
474+
slice_pushdown = deprecated(),
475+
comm_subplan_elim = deprecated(),
476+
comm_subexpr_elim = deprecated(),
477+
cluster_with_columns = deprecated(),
478+
collapse_joins = deprecated()
473479
) {
474480
wrap({
475481
check_dots_empty0(...)
476482

477483
format <- arg_match0(format, c("plain", "tree"))
484+
engine <- arg_match0(engine, c("auto", "in-memory", "streaming"))
485+
check_is_S7(optimizations, QueryOptFlags)
478486

479-
if (is_present(collapse_joins)) {
480-
deprecate_warn(
481-
c(
482-
`!` = sprintf("%s is deprecated.", format_arg("collapse_joins")),
483-
`i` = sprintf("Use %s instead.", format_arg("predicate_pushdown"))
484-
)
485-
)
486-
}
487+
optimizations <- forward_old_opt_flags(
488+
optimizations,
489+
type_coercion = type_coercion,
490+
predicate_pushdown = predicate_pushdown,
491+
projection_pushdown = projection_pushdown,
492+
simplify_expression = simplify_expression,
493+
slice_pushdown = slice_pushdown,
494+
comm_subplan_elim = comm_subplan_elim,
495+
comm_subexpr_elim = comm_subexpr_elim,
496+
cluster_with_columns = cluster_with_columns,
497+
collapse_joins = collapse_joins
498+
)
487499

488500
if (isTRUE(optimized)) {
489-
ldf <- self$`_ldf`$optimization_toggle(
490-
type_coercion = type_coercion,
491-
`_type_check` = `_type_check`,
492-
predicate_pushdown = predicate_pushdown,
493-
projection_pushdown = projection_pushdown,
494-
simplify_expression = simplify_expression,
495-
slice_pushdown = slice_pushdown,
496-
comm_subplan_elim = comm_subplan_elim,
497-
comm_subexpr_elim = comm_subexpr_elim,
498-
cluster_with_columns = cluster_with_columns,
499-
`_check_order` = `_check_order`,
500-
`_eager` = FALSE
501-
)
501+
prop(optimizations, "streaming", check = FALSE) <- engine == "streaming"
502+
ldf <- self$`_ldf`$with_optimizations(optimizations)
502503

503504
if (format == "tree") {
504505
ldf$describe_optimized_plan_tree()

R/lazyframe-utils.R

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,82 @@ parse_percentiles <- function(percentiles, inject_median = FALSE) {
158158

159159
c(sub_50_percentiles, at_or_above_50_percentiles)
160160
}
161+
162+
forward_old_opt_flags <- function(
163+
optimizations,
164+
type_coercion = deprecated(),
165+
predicate_pushdown = deprecated(),
166+
projection_pushdown = deprecated(),
167+
simplify_expression = deprecated(),
168+
slice_pushdown = deprecated(),
169+
comm_subplan_elim = deprecated(),
170+
comm_subexpr_elim = deprecated(),
171+
cluster_with_columns = deprecated(),
172+
collapse_joins = deprecated()
173+
) {
174+
call <- caller_env(2L)
175+
warn_func <- function(arg_name) {
176+
deprecate_warn(
177+
c(
178+
`!` = sprintf("%s is deprecated.", format_arg(arg_name)),
179+
`i` = sprintf("Use %s instead.", format_arg("optimizations"))
180+
),
181+
always = TRUE,
182+
user_env = call
183+
)
184+
}
185+
186+
need_validation <- FALSE
187+
188+
if (is_present(type_coercion)) {
189+
warn_func("type_coercion")
190+
prop(optimizations, "type_coercion", check = FALSE) <- type_coercion
191+
need_validation <- TRUE
192+
}
193+
if (is_present(predicate_pushdown)) {
194+
warn_func("predicate_pushdown")
195+
prop(optimizations, "predicate_pushdown", check = FALSE) <- predicate_pushdown
196+
need_validation <- TRUE
197+
}
198+
if (is_present(projection_pushdown)) {
199+
warn_func("projection_pushdown")
200+
prop(optimizations, "projection_pushdown", check = FALSE) <- projection_pushdown
201+
need_validation <- TRUE
202+
}
203+
if (is_present(simplify_expression)) {
204+
warn_func("simplify_expression")
205+
prop(optimizations, "simplify_expression", check = FALSE) <- simplify_expression
206+
need_validation <- TRUE
207+
}
208+
if (is_present(slice_pushdown)) {
209+
warn_func("slice_pushdown")
210+
prop(optimizations, "slice_pushdown", check = FALSE) <- slice_pushdown
211+
need_validation <- TRUE
212+
}
213+
if (is_present(comm_subplan_elim)) {
214+
warn_func("comm_subplan_elim")
215+
prop(optimizations, "comm_subplan_elim", check = FALSE) <- comm_subplan_elim
216+
need_validation <- TRUE
217+
}
218+
if (is_present(comm_subexpr_elim)) {
219+
warn_func("comm_subexpr_elim")
220+
prop(optimizations, "comm_subexpr_elim", check = FALSE) <- comm_subexpr_elim
221+
need_validation <- TRUE
222+
}
223+
if (is_present(cluster_with_columns)) {
224+
warn_func("cluster_with_columns")
225+
prop(optimizations, "cluster_with_columns", check = FALSE) <- cluster_with_columns
226+
need_validation <- TRUE
227+
}
228+
229+
if (is_present(collapse_joins)) {
230+
warn_func("collapse_joins")
231+
# collapse_joins was merged to predicate_pushdown, so there is no flag anymore
232+
}
233+
234+
if (need_validation) {
235+
validate(optimizations)
236+
}
237+
238+
optimizations
239+
}

src/init.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2894,6 +2894,11 @@ SEXP savvy_PlRLazyFrame_with_columns_seq__impl(SEXP self__, SEXP c_arg__exprs) {
28942894
return handle_result(res);
28952895
}
28962896

2897+
SEXP savvy_PlRLazyFrame_with_optimizations__impl(SEXP self__, SEXP c_arg__optimizations) {
2898+
SEXP res = savvy_PlRLazyFrame_with_optimizations__ffi(self__, c_arg__optimizations);
2899+
return handle_result(res);
2900+
}
2901+
28972902
SEXP savvy_PlRLazyFrame_with_row_index__impl(SEXP self__, SEXP c_arg__name, SEXP c_arg__offset) {
28982903
SEXP res = savvy_PlRLazyFrame_with_row_index__ffi(self__, c_arg__name, c_arg__offset);
28992904
return handle_result(res);
@@ -3953,6 +3958,7 @@ static const R_CallMethodDef CallEntries[] = {
39533958
{"savvy_PlRLazyFrame_var__impl", (DL_FUNC) &savvy_PlRLazyFrame_var__impl, 2},
39543959
{"savvy_PlRLazyFrame_with_columns__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_columns__impl, 2},
39553960
{"savvy_PlRLazyFrame_with_columns_seq__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_columns_seq__impl, 2},
3961+
{"savvy_PlRLazyFrame_with_optimizations__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_optimizations__impl, 2},
39563962
{"savvy_PlRLazyFrame_with_row_index__impl", (DL_FUNC) &savvy_PlRLazyFrame_with_row_index__impl, 3},
39573963
{"savvy_PlRLazyGroupBy_agg__impl", (DL_FUNC) &savvy_PlRLazyGroupBy_agg__impl, 2},
39583964
{"savvy_PlRLazyGroupBy_head__impl", (DL_FUNC) &savvy_PlRLazyGroupBy_head__impl, 2},

src/rust/api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,7 @@ SEXP savvy_PlRLazyFrame_unpivot__ffi(SEXP self__, SEXP c_arg__on, SEXP c_arg__in
586586
SEXP savvy_PlRLazyFrame_var__ffi(SEXP self__, SEXP c_arg__ddof);
587587
SEXP savvy_PlRLazyFrame_with_columns__ffi(SEXP self__, SEXP c_arg__exprs);
588588
SEXP savvy_PlRLazyFrame_with_columns_seq__ffi(SEXP self__, SEXP c_arg__exprs);
589+
SEXP savvy_PlRLazyFrame_with_optimizations__ffi(SEXP self__, SEXP c_arg__optimizations);
589590
SEXP savvy_PlRLazyFrame_with_row_index__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__offset);
590591

591592
// methods and associated functions for PlRLazyGroupBy

src/rust/src/lazyframe/general.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use super::sink::RSinkTarget;
22
use crate::{
33
PlRDataFrame, PlRDataType, PlRExpr, PlRLazyFrame, PlRLazyGroupBy, PlRSeries, RPolarsErr,
44
expr::selector::PlRSelector,
5+
lazyframe::PlROptFlags,
56
prelude::{sync_on_close::SyncOnCloseType, *},
67
};
78
use polars::io::{HiveOptions, RowIndex};
@@ -313,6 +314,14 @@ impl PlRLazyFrame {
313314
Ok(ldf.cache().into())
314315
}
315316

317+
fn with_optimizations(&self, optimizations: Sexp) -> Result<Self> {
318+
let ldf = self.ldf.clone();
319+
let optimizations = <PlROptFlags>::try_from(optimizations)?;
320+
Ok(ldf
321+
.with_optimizations(optimizations.inner.into_inner())
322+
.into())
323+
}
324+
316325
fn profile(&self) -> Result<Sexp> {
317326
use crate::{
318327
r_threads::{ThreadCom, concurrent_handler},

0 commit comments

Comments
 (0)