Skip to content

Commit 6cd8df8

Browse files
feat: Implement <dataframe>$remove() and <lazyframe>$remove() (#1632)
1 parent 4598fbf commit 6cd8df8

File tree

11 files changed

+234
-0
lines changed

11 files changed

+234
-0
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
### New features
66

77
- `pl$collect_all()` to efficiently collect a list of LazyFrames (#1598).
8+
- `<lazyframe>$remove()` and `<dataframe>$remove()` as a complement to
9+
`$filter()` (#1632).
810

911
## polars 1.5.0
1012

R/000-wrappers.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4018,6 +4018,13 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed")
40184018
}
40194019
}
40204020

4021+
`PlRLazyFrame_remove` <- function(self) {
4022+
function(`predicate`) {
4023+
`predicate` <- .savvy_extract_ptr(`predicate`, "PlRExpr")
4024+
.savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_remove__impl, `self`, `predicate`))
4025+
}
4026+
}
4027+
40214028
`PlRLazyFrame_rename` <- function(self) {
40224029
function(`existing`, `new`, `strict`) {
40234030
.savvy_wrap_PlRLazyFrame(.Call(savvy_PlRLazyFrame_rename__impl, `self`, `existing`, `new`, `strict`))
@@ -4228,6 +4235,7 @@ class(`PlRExpr`) <- c("PlRExpr__bundle", "savvy_polars__sealed")
42284235
e$`optimization_toggle` <- `PlRLazyFrame_optimization_toggle`(ptr)
42294236
e$`profile` <- `PlRLazyFrame_profile`(ptr)
42304237
e$`quantile` <- `PlRLazyFrame_quantile`(ptr)
4238+
e$`remove` <- `PlRLazyFrame_remove`(ptr)
42314239
e$`rename` <- `PlRLazyFrame_rename`(ptr)
42324240
e$`reverse` <- `PlRLazyFrame_reverse`(ptr)
42334241
e$`rolling` <- `PlRLazyFrame_rolling`(ptr)

R/dataframe-frame.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,33 @@ dataframe__filter <- function(...) {
623623
wrap()
624624
}
625625

626+
#' @inherit lazyframe__remove title description params
627+
#' @inherit as_polars_df return
628+
#' @examples
629+
#' df <- pl$DataFrame(
630+
#' ccy = c("USD", "EUR", "USD", "JPY"),
631+
#' year = c(2021, 2022, 2023, 2023),
632+
#' total = c(3245, NA, -6680, 25000),
633+
#' )
634+
#'
635+
#' # Remove rows matching a condition. Note that the row where `total` is null
636+
#' # is kept:
637+
#' df$remove(pl$col("total") >= 0)
638+
#'
639+
#' # Note that this is *not* the same as simply inverting the condition in
640+
#' # `$filter()` because `$filter()` doesn't keep predicates that evaluate to
641+
#' # null:
642+
#' df$filter(pl$col("total") < 0)
643+
#'
644+
#' # We can use multiple conditions, combined with and/or operators:
645+
#' df$remove((pl$col("total") >= 0) & (pl$col("ccy") == "USD"))
646+
#'
647+
#' df$remove((pl$col("total") >= 0) | (pl$col("ccy") == "USD"))
648+
dataframe__remove <- function(...) {
649+
self$lazy()$remove(...)$collect(`_eager` = TRUE) |>
650+
wrap()
651+
}
652+
626653
#' Sort a DataFrame by the given columns
627654
#'
628655
#' @inherit lazyframe__sort description params details

R/lazyframe-frame.R

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,42 @@ lazyframe__filter <- function(...) {
629629
wrap()
630630
}
631631

632+
#' Remove rows, dropping those that match the given predicate expression(s)
633+
#'
634+
#' The original order of the remaining rows is preserved. Rows where the filter
635+
#' does not evaluate to `TRUE` are retained (this includes rows where the
636+
#' predicate evaluates as `null`).
637+
#'
638+
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Expression that evaluates to
639+
#' a boolean Series.
640+
#'
641+
#' @inherit as_polars_lf return
642+
#' @examples
643+
#' lf <- pl$LazyFrame(
644+
#' ccy = c("USD", "EUR", "USD", "JPY"),
645+
#' year = c(2021, 2022, 2023, 2023),
646+
#' total = c(3245, NA, -6680, 25000),
647+
#' )
648+
#'
649+
#' # Remove rows matching a condition. Note that the row where `total` is null
650+
#' # is kept:
651+
#' lf$remove(pl$col("total") >= 0)$collect()
652+
#'
653+
#' # Note that this is *not* the same as simply inverting the condition in
654+
#' # `$filter()` because `$filter()` doesn't keep predicates that evaluate to
655+
#' # null:
656+
#' lf$filter(pl$col("total") < 0)$collect()
657+
#'
658+
#' # We can use multiple conditions, combined with and/or operators:
659+
#' lf$remove((pl$col("total") >= 0) & (pl$col("ccy") == "USD"))$collect()
660+
#'
661+
#' lf$remove((pl$col("total") >= 0) | (pl$col("ccy") == "USD"))$collect()
662+
lazyframe__remove <- function(...) {
663+
parse_predicates_constraints_into_expression(...) |>
664+
self$`_ldf`$remove() |>
665+
wrap()
666+
}
667+
632668
#' Sort the LazyFrame by the given columns
633669
#'
634670
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Column(s) to sort by. Can be

altdoc/mkdocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ nav:
108108
- partition_by: man/dataframe__partition_by.md
109109
- pivot: man/dataframe__pivot.md
110110
- rechunk: man/dataframe__rechunk.md
111+
- remove: man/dataframe__remove.md
111112
- rename: man/dataframe__rename.md
112113
- reverse: man/dataframe__reverse.md
113114
- rolling: man/dataframe__rolling.md
@@ -185,6 +186,7 @@ nav:
185186
- join: man/lazyframe__join.md
186187
- last: man/lazyframe__last.md
187188
- merge_sorted: man/lazyframe__merge_sorted.md
189+
- remove: man/lazyframe__remove.md
188190
- rename: man/lazyframe__rename.md
189191
- reverse: man/lazyframe__reverse.md
190192
- rolling: man/lazyframe__rolling.md

man/dataframe__remove.Rd

Lines changed: 41 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/lazyframe__remove.Rd

Lines changed: 41 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/init.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2754,6 +2754,11 @@ SEXP savvy_PlRLazyFrame_quantile__impl(SEXP self__, SEXP c_arg__quantile, SEXP c
27542754
return handle_result(res);
27552755
}
27562756

2757+
SEXP savvy_PlRLazyFrame_remove__impl(SEXP self__, SEXP c_arg__predicate) {
2758+
SEXP res = savvy_PlRLazyFrame_remove__ffi(self__, c_arg__predicate);
2759+
return handle_result(res);
2760+
}
2761+
27572762
SEXP savvy_PlRLazyFrame_rename__impl(SEXP self__, SEXP c_arg__existing, SEXP c_arg__new, SEXP c_arg__strict) {
27582763
SEXP res = savvy_PlRLazyFrame_rename__ffi(self__, c_arg__existing, c_arg__new, c_arg__strict);
27592764
return handle_result(res);
@@ -3920,6 +3925,7 @@ static const R_CallMethodDef CallEntries[] = {
39203925
{"savvy_PlRLazyFrame_optimization_toggle__impl", (DL_FUNC) &savvy_PlRLazyFrame_optimization_toggle__impl, 12},
39213926
{"savvy_PlRLazyFrame_profile__impl", (DL_FUNC) &savvy_PlRLazyFrame_profile__impl, 1},
39223927
{"savvy_PlRLazyFrame_quantile__impl", (DL_FUNC) &savvy_PlRLazyFrame_quantile__impl, 3},
3928+
{"savvy_PlRLazyFrame_remove__impl", (DL_FUNC) &savvy_PlRLazyFrame_remove__impl, 2},
39233929
{"savvy_PlRLazyFrame_rename__impl", (DL_FUNC) &savvy_PlRLazyFrame_rename__impl, 4},
39243930
{"savvy_PlRLazyFrame_reverse__impl", (DL_FUNC) &savvy_PlRLazyFrame_reverse__impl, 1},
39253931
{"savvy_PlRLazyFrame_rolling__impl", (DL_FUNC) &savvy_PlRLazyFrame_rolling__impl, 6},

src/rust/api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,7 @@ SEXP savvy_PlRLazyFrame_null_count__ffi(SEXP self__);
558558
SEXP savvy_PlRLazyFrame_optimization_toggle__ffi(SEXP self__, SEXP c_arg__type_coercion, SEXP c_arg___type_check, SEXP c_arg__predicate_pushdown, SEXP c_arg__projection_pushdown, SEXP c_arg__simplify_expression, SEXP c_arg__slice_pushdown, SEXP c_arg__comm_subplan_elim, SEXP c_arg__comm_subexpr_elim, SEXP c_arg__cluster_with_columns, SEXP c_arg___eager, SEXP c_arg___check_order);
559559
SEXP savvy_PlRLazyFrame_profile__ffi(SEXP self__);
560560
SEXP savvy_PlRLazyFrame_quantile__ffi(SEXP self__, SEXP c_arg__quantile, SEXP c_arg__interpolation);
561+
SEXP savvy_PlRLazyFrame_remove__ffi(SEXP self__, SEXP c_arg__predicate);
561562
SEXP savvy_PlRLazyFrame_rename__ffi(SEXP self__, SEXP c_arg__existing, SEXP c_arg__new, SEXP c_arg__strict);
562563
SEXP savvy_PlRLazyFrame_reverse__ffi(SEXP self__);
563564
SEXP savvy_PlRLazyFrame_rolling__ffi(SEXP self__, SEXP c_arg__index_column, SEXP c_arg__period, SEXP c_arg__offset, SEXP c_arg__closed, SEXP c_arg__by);

src/rust/src/lazyframe/general.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ impl PlRLazyFrame {
111111
Ok(ldf.filter(predicate.inner.clone()).into())
112112
}
113113

114+
fn remove(&self, predicate: &PlRExpr) -> Result<Self> {
115+
let ldf = self.ldf.clone();
116+
Ok(ldf.remove(predicate.inner.clone()).into())
117+
}
118+
114119
fn select(&mut self, exprs: ListSexp) -> Result<Self> {
115120
let ldf = self.ldf.clone();
116121
let exprs = <Wrap<Vec<Expr>>>::from(exprs).0;

0 commit comments

Comments
 (0)