diff --git a/NEWS.md b/NEWS.md index 6e8c4430b..35fc7262c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ - `$remove()` and `$remove()` as a complement to `$filter()` (#1632). - New method `$is_close()` (#1637). +- New methods `$len()` and `$len()` (#1638). ## polars 1.5.0 diff --git a/R/dataframe-group_by-general.R b/R/dataframe-group_by-general.R index cc7830493..ad5a2f739 100644 --- a/R/dataframe-group_by-general.R +++ b/R/dataframe-group_by-general.R @@ -207,3 +207,23 @@ groupby__n_unique <- function() { self$agg(pl$all()$n_unique()) |> wrap() } + +#' @inherit lazygroupby__len title params +#' @inherit as_polars_df return +#' @examples +#' df <- pl$DataFrame( +#' a = c("Apple", "Apple", "Orange"), +#' b = c(1, NA, 2) +#' ) +#' df$group_by("a")$len() +#' +#' df$group_by("a")$len("n") +groupby__len <- function(name = NULL) { + wrap({ + len_expr <- pl$len() + if (!is.null(name)) { + len_expr <- len_expr$alias(name) + } + self$agg(len_expr) + }) +} diff --git a/R/lazyframe-group_by.R b/R/lazyframe-group_by.R index d69bbb769..30d89fef7 100644 --- a/R/lazyframe-group_by.R +++ b/R/lazyframe-group_by.R @@ -207,3 +207,26 @@ lazygroupby__n_unique <- function() { self$agg(pl$all()$n_unique()) |> wrap() } + +#' Return the number of rows in each group +#' +#' @param name Assign a name to the resulting column. If `NULL`, defaults to +#' `"len"`. +#' @inherit as_polars_lf return +#' @examples +#' lf <- pl$LazyFrame( +#' a = c("Apple", "Apple", "Orange"), +#' b = c(1, NA, 2) +#' ) +#' lf$group_by("a")$len()$collect() +#' +#' lf$group_by("a")$len("n")$collect() +lazygroupby__len <- function(name = NULL) { + wrap({ + len_expr <- pl$len() + if (!is.null(name)) { + len_expr <- len_expr$alias(name) + } + self$agg(len_expr) + }) +} diff --git a/altdoc/mkdocs.yml b/altdoc/mkdocs.yml index b4c9c4010..903834ac0 100644 --- a/altdoc/mkdocs.yml +++ b/altdoc/mkdocs.yml @@ -74,6 +74,7 @@ nav: - GroupBy: - agg: man/groupby__agg.md - head: man/groupby__head.md + - len: man/groupby__len.md - mean: man/groupby__mean.md - median: man/groupby__median.md - min: man/groupby__min.md @@ -155,6 +156,7 @@ nav: - GroupBy: - agg: man/lazygroupby__agg.md - head: man/lazygroupby__head.md + - len: man/lazygroupby__len.md - mean: man/lazygroupby__mean.md - median: man/lazygroupby__median.md - min: man/lazygroupby__min.md diff --git a/man/groupby__len.Rd b/man/groupby__len.Rd new file mode 100644 index 000000000..f75e090b2 --- /dev/null +++ b/man/groupby__len.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe-group_by-general.R +\name{groupby__len} +\alias{groupby__len} +\title{Return the number of rows in each group} +\usage{ +groupby__len(name = NULL) +} +\arguments{ +\item{name}{Assign a name to the resulting column. If \code{NULL}, defaults to +\code{"len"}.} +} +\value{ +A polars \link{DataFrame} +} +\description{ +Return the number of rows in each group +} +\examples{ +df <- pl$DataFrame( + a = c("Apple", "Apple", "Orange"), + b = c(1, NA, 2) +) +df$group_by("a")$len() + +df$group_by("a")$len("n") +} diff --git a/man/lazygroupby__len.Rd b/man/lazygroupby__len.Rd new file mode 100644 index 000000000..4ece8dd8b --- /dev/null +++ b/man/lazygroupby__len.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe-group_by.R +\name{lazygroupby__len} +\alias{lazygroupby__len} +\title{Return the number of rows in each group} +\usage{ +lazygroupby__len(name = NULL) +} +\arguments{ +\item{name}{Assign a name to the resulting column. If \code{NULL}, defaults to +\code{"len"}.} +} +\value{ +A polars \link{LazyFrame} +} +\description{ +Return the number of rows in each group +} +\examples{ +lf <- pl$LazyFrame( + a = c("Apple", "Apple", "Orange"), + b = c(1, NA, 2) +) +lf$group_by("a")$len()$collect() + +lf$group_by("a")$len("n")$collect() +} diff --git a/tests/testthat/_snaps/lazyframe-frame.md b/tests/testthat/_snaps/lazyframe-frame.md index 2015e35ed..60ff6ae4b 100644 --- a/tests/testthat/_snaps/lazyframe-frame.md +++ b/tests/testthat/_snaps/lazyframe-frame.md @@ -628,3 +628,27 @@ [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" [11] "carb" +# group_by() + len() + + Code + df$group_by("a", .maintain_order = TRUE)$len(1) + Condition + Error: + ! Evaluation failed in `$len()`. + Caused by error in `len_expr$alias()`: + ! Evaluation failed in `$alias()`. + Caused by error: + ! Argument `name` must be character, not double + +--- + + Code + df$group_by("a", .maintain_order = TRUE)$len(TRUE) + Condition + Error: + ! Evaluation failed in `$len()`. + Caused by error in `len_expr$alias()`: + ! Evaluation failed in `$alias()`. + Caused by error: + ! Argument `name` must be character, not logical + diff --git a/tests/testthat/test-lazyframe-frame.R b/tests/testthat/test-lazyframe-frame.R index bb50cb531..a3b5d53d8 100644 --- a/tests/testthat/test-lazyframe-frame.R +++ b/tests/testthat/test-lazyframe-frame.R @@ -2616,3 +2616,29 @@ test_that("active bindings", { expect_snapshot(as_polars_lf(mtcars)$width) expect_snapshot(as_polars_lf(mtcars)$columns) }) + +test_that("group_by() + len()", { + df <- pl$DataFrame( + a = c("Apple", "Apple", "Orange"), + b = c(1, NA, 2) + ) + + expect_query_equal( + .input$group_by("a", .maintain_order = TRUE)$len(), + df, + pl$DataFrame(a = c("Apple", "Orange"), len = c(2, 1))$cast(len = pl$UInt32) + ) + expect_query_equal( + .input$group_by("a", .maintain_order = TRUE)$len("n"), + df, + pl$DataFrame(a = c("Apple", "Orange"), n = c(2, 1))$cast(n = pl$UInt32) + ) + expect_snapshot( + df$group_by("a", .maintain_order = TRUE)$len(1), + error = TRUE + ) + expect_snapshot( + df$group_by("a", .maintain_order = TRUE)$len(TRUE), + error = TRUE + ) +})