Skip to content

Commit 4598fbf

Browse files
feat: Implement pl$collect_all() (#1598)
Co-authored-by: eitsupi <[email protected]>
1 parent a8d2ec2 commit 4598fbf

File tree

21 files changed

+362
-14
lines changed

21 files changed

+362
-14
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ VignetteBuilder:
5252
Config/Needs/dev: devtools, lifecycle, readr, glue, RcppTOML, smvr
5353
Config/Needs/lint: fs, lintr
5454
Config/Needs/website: etiennebacher/altdoc, future.apply
55-
Config/polars/lib-version: 1.5.0-rc.2
55+
Config/polars/lib-version: 1.6.0-rc.1
5656
Config/testthat/edition: 3
5757
Config/testthat/parallel: true
5858
Config/testthat/start-first: lazyframe-frame, *-s3-base, polars_options,

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ export(as_polars_expr)
252252
export(as_polars_lf)
253253
export(as_polars_series)
254254
export(check_list_of_polars_dtype)
255+
export(check_list_of_polars_lf)
255256
export(check_polars_df)
256257
export(check_polars_dtype)
257258
export(check_polars_dtype_expr)
@@ -265,6 +266,7 @@ export(infer_polars_dtype)
265266
export(is_convertible_to_polars_expr)
266267
export(is_convertible_to_polars_series)
267268
export(is_list_of_polars_dtype)
269+
export(is_list_of_polars_lf)
268270
export(is_polars_df)
269271
export(is_polars_dtype)
270272
export(is_polars_dtype_expr)

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## polars (development version)
44

5+
### New features
6+
7+
- `pl$collect_all()` to efficiently collect a list of LazyFrames (#1598).
8+
59
## polars 1.5.0
610

711
This is an update that corresponds to Python Polars 1.35.1.

R/000-wrappers.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ NULL
7474
}
7575

7676

77+
`collect_all` <- function(`lfs`, `engine`, `optflags`) {
78+
.Call(savvy_collect_all__impl, `lfs`, `engine`, `optflags`)
79+
}
80+
81+
7782
`cols` <- function(`names`) {
7883
.savvy_wrap_PlRExpr(.Call(savvy_cols__impl, `names`))
7984
}

R/check_polars.R

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,21 @@ is_list_of_polars_dtype <- function(x, n = NULL) {
108108
is_list(x, n = n) && is_list_of_polars_dtype_impl(x)
109109
}
110110

111+
#' @rdname check_polars
112+
#' @export
113+
is_list_of_polars_lf <- function(x, n = NULL) {
114+
is_list_of_polars_lf_impl <- function(x) {
115+
for (i in seq_along(x)) {
116+
if (!is_polars_lf(x[[i]])) {
117+
return(FALSE)
118+
}
119+
}
120+
TRUE
121+
}
122+
123+
is_list(x, n = n) && is_list_of_polars_lf_impl(x)
124+
}
125+
111126
#' @rdname check_polars
112127
#' @export
113128
check_polars_df <- function(
@@ -371,3 +386,32 @@ check_list_of_polars_dtype <- function(
371386
call = call
372387
)
373388
}
389+
390+
#' @rdname check_polars
391+
#' @export
392+
check_list_of_polars_lf <- function(
393+
x,
394+
...,
395+
allow_null = FALSE,
396+
arg = caller_arg(x),
397+
call = caller_env()
398+
) {
399+
if (!missing(x)) {
400+
if (is_list_of_polars_lf(x)) {
401+
return(invisible(NULL))
402+
}
403+
if (allow_null && is_null(x)) {
404+
return(invisible(NULL))
405+
}
406+
}
407+
408+
stop_input_type(
409+
x,
410+
"a list of polars lazyframes",
411+
...,
412+
allow_na = FALSE,
413+
allow_null = allow_null,
414+
arg = arg,
415+
call = call
416+
)
417+
}

R/functions-lazy.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,54 @@ pl__arg_sort_by <- function(
121121
)
122122
})
123123
}
124+
125+
#' Collect multiple LazyFrames at the same time
126+
#'
127+
#' This can run all the computation graphs in parallel or combined. Common
128+
#' Subplan Elimination is applied on the combined plan, meaning that diverging
129+
#' queries will run only once.
130+
#'
131+
#' @inheritParams rlang::args_dots_empty
132+
#' @inheritParams lazyframe__collect
133+
#' @param lazy_frames A list of LazyFrames to collect.
134+
#'
135+
#' @return A list containing all the collected DataFrames, in the same order
136+
#' as the input LazyFrames.
137+
#' @examples
138+
#' lf <- as_polars_lf(mtcars)$with_columns(sqrt_mpg = pl$col("mpg")$sqrt())
139+
#'
140+
#' cyl_4 <- lf$filter(pl$col("cyl") == 4)
141+
#' cyl_6 <- lf$filter(pl$col("cyl") == 6)
142+
#'
143+
#' # We could do `cyl_4$collect()` and `cyl_6$collect()`, but this would be
144+
#' # wasteful because `sqrt_mpg` would be computed twice.
145+
#' # `pl$collect_all()` executes only once the parts of the query that are
146+
#' # identical across LazyFrames.
147+
#' pl$collect_all(list(cyl_4, cyl_6))
148+
pl__collect_all <- function(
149+
lazy_frames,
150+
...,
151+
engine = c("auto", "in-memory", "streaming")
152+
) {
153+
wrap({
154+
check_dots_empty0(...)
155+
check_list_of_polars_lf(lazy_frames)
156+
engine <- arg_match0(engine, c("auto", "in-memory", "streaming"))
157+
158+
lfs <- lapply(lazy_frames, \(x) x$`_ldf`)
159+
# TODO: add support for argument `optimizations`
160+
optflags <- list(
161+
comm_subexpr_elim = TRUE,
162+
comm_subplan_elim = TRUE,
163+
cluster_with_columns = TRUE,
164+
predicate_pushdown = TRUE,
165+
projection_pushdown = TRUE,
166+
simplify_expression = TRUE,
167+
slice_pushdown = TRUE,
168+
type_coercion = TRUE
169+
)
170+
171+
collect_all(lfs, engine = engine, optflags) |>
172+
lapply(\(ptr) .savvy_wrap_PlRDataFrame(ptr) |> wrap())
173+
})
174+
}

altdoc/mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,7 @@ nav:
706706
- inner_dtype: man/datatype_expr__inner_dtype.md
707707
- matches: man/datatype_expr__matches.md
708708
Functions:
709+
- collect_all: man/pl__collect_all.md
709710
- concat: man/pl__concat.md
710711
- Expr: man/polars_expr.md
711712
- DataFrame: man/pl__DataFrame.md

man/check_polars.Rd

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/pl.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/pl__collect_all.Rd

Lines changed: 43 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)