From d2e855ef467dc213d69fabb2d9a6577ce0899bc2 Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Wed, 12 Nov 2025 23:10:46 +0100 Subject: [PATCH 1/2] init --- NEWS.md | 1 + R/dataframe-group_by-general.R | 20 ++++++++++++++++++ R/lazyframe-group_by.R | 23 ++++++++++++++++++++ altdoc/mkdocs.yml | 2 ++ man/groupby__len.Rd | 23 ++++++++++++++++++++ man/lazygroupby__len.Rd | 27 ++++++++++++++++++++++++ tests/testthat/_snaps/lazyframe-frame.md | 24 +++++++++++++++++++++ tests/testthat/test-lazyframe-frame.R | 26 +++++++++++++++++++++++ 8 files changed, 146 insertions(+) create mode 100644 man/groupby__len.Rd create mode 100644 man/lazygroupby__len.Rd diff --git a/NEWS.md b/NEWS.md index 6e8c4430b..35fc7262c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ - `$remove()` and `$remove()` as a complement to `$filter()` (#1632). - New method `$is_close()` (#1637). +- New methods `$len()` and `$len()` (#1638). ## polars 1.5.0 diff --git a/R/dataframe-group_by-general.R b/R/dataframe-group_by-general.R index cc7830493..e0daf7237 100644 --- a/R/dataframe-group_by-general.R +++ b/R/dataframe-group_by-general.R @@ -207,3 +207,23 @@ groupby__n_unique <- function() { self$agg(pl$all()$n_unique()) |> wrap() } + +#' @inherit lazygroupby__n_unique title params +#' @inherit as_polars_df return +#' @examples +#' df <- pl$DataFrame( +#' a = c("Apple", "Apple", "Orange"), +#' b = c(1, NA, 2) +#' ) +#' df$group_by("a")$len() +#' +#' df$group_by("a")$len("n") +groupby__len <- function(name = NULL) { + wrap({ + len_expr <- pl$len() + if (!is.null(name)) { + len_expr <- len_expr$alias(name) + } + self$agg(len_expr) + }) +} diff --git a/R/lazyframe-group_by.R b/R/lazyframe-group_by.R index d69bbb769..30d89fef7 100644 --- a/R/lazyframe-group_by.R +++ b/R/lazyframe-group_by.R @@ -207,3 +207,26 @@ lazygroupby__n_unique <- function() { self$agg(pl$all()$n_unique()) |> wrap() } + +#' Return the number of rows in each group +#' +#' @param name Assign a name to the resulting column. If `NULL`, defaults to +#' `"len"`. +#' @inherit as_polars_lf return +#' @examples +#' lf <- pl$LazyFrame( +#' a = c("Apple", "Apple", "Orange"), +#' b = c(1, NA, 2) +#' ) +#' lf$group_by("a")$len()$collect() +#' +#' lf$group_by("a")$len("n")$collect() +lazygroupby__len <- function(name = NULL) { + wrap({ + len_expr <- pl$len() + if (!is.null(name)) { + len_expr <- len_expr$alias(name) + } + self$agg(len_expr) + }) +} diff --git a/altdoc/mkdocs.yml b/altdoc/mkdocs.yml index b4c9c4010..903834ac0 100644 --- a/altdoc/mkdocs.yml +++ b/altdoc/mkdocs.yml @@ -74,6 +74,7 @@ nav: - GroupBy: - agg: man/groupby__agg.md - head: man/groupby__head.md + - len: man/groupby__len.md - mean: man/groupby__mean.md - median: man/groupby__median.md - min: man/groupby__min.md @@ -155,6 +156,7 @@ nav: - GroupBy: - agg: man/lazygroupby__agg.md - head: man/lazygroupby__head.md + - len: man/lazygroupby__len.md - mean: man/lazygroupby__mean.md - median: man/lazygroupby__median.md - min: man/lazygroupby__min.md diff --git a/man/groupby__len.Rd b/man/groupby__len.Rd new file mode 100644 index 000000000..438c00ef3 --- /dev/null +++ b/man/groupby__len.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe-group_by-general.R +\name{groupby__len} +\alias{groupby__len} +\title{Count the unique values per group} +\usage{ +groupby__len(name = NULL) +} +\value{ +A polars \link{DataFrame} +} +\description{ +Count the unique values per group +} +\examples{ +df <- pl$DataFrame( + a = c("Apple", "Apple", "Orange"), + b = c(1, NA, 2) +) +df$group_by("a")$len() + +df$group_by("a")$len("n") +} diff --git a/man/lazygroupby__len.Rd b/man/lazygroupby__len.Rd new file mode 100644 index 000000000..4ece8dd8b --- /dev/null +++ b/man/lazygroupby__len.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lazyframe-group_by.R +\name{lazygroupby__len} +\alias{lazygroupby__len} +\title{Return the number of rows in each group} +\usage{ +lazygroupby__len(name = NULL) +} +\arguments{ +\item{name}{Assign a name to the resulting column. If \code{NULL}, defaults to +\code{"len"}.} +} +\value{ +A polars \link{LazyFrame} +} +\description{ +Return the number of rows in each group +} +\examples{ +lf <- pl$LazyFrame( + a = c("Apple", "Apple", "Orange"), + b = c(1, NA, 2) +) +lf$group_by("a")$len()$collect() + +lf$group_by("a")$len("n")$collect() +} diff --git a/tests/testthat/_snaps/lazyframe-frame.md b/tests/testthat/_snaps/lazyframe-frame.md index 2015e35ed..60ff6ae4b 100644 --- a/tests/testthat/_snaps/lazyframe-frame.md +++ b/tests/testthat/_snaps/lazyframe-frame.md @@ -628,3 +628,27 @@ [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" [11] "carb" +# group_by() + len() + + Code + df$group_by("a", .maintain_order = TRUE)$len(1) + Condition + Error: + ! Evaluation failed in `$len()`. + Caused by error in `len_expr$alias()`: + ! Evaluation failed in `$alias()`. + Caused by error: + ! Argument `name` must be character, not double + +--- + + Code + df$group_by("a", .maintain_order = TRUE)$len(TRUE) + Condition + Error: + ! Evaluation failed in `$len()`. + Caused by error in `len_expr$alias()`: + ! Evaluation failed in `$alias()`. + Caused by error: + ! Argument `name` must be character, not logical + diff --git a/tests/testthat/test-lazyframe-frame.R b/tests/testthat/test-lazyframe-frame.R index bb50cb531..a3b5d53d8 100644 --- a/tests/testthat/test-lazyframe-frame.R +++ b/tests/testthat/test-lazyframe-frame.R @@ -2616,3 +2616,29 @@ test_that("active bindings", { expect_snapshot(as_polars_lf(mtcars)$width) expect_snapshot(as_polars_lf(mtcars)$columns) }) + +test_that("group_by() + len()", { + df <- pl$DataFrame( + a = c("Apple", "Apple", "Orange"), + b = c(1, NA, 2) + ) + + expect_query_equal( + .input$group_by("a", .maintain_order = TRUE)$len(), + df, + pl$DataFrame(a = c("Apple", "Orange"), len = c(2, 1))$cast(len = pl$UInt32) + ) + expect_query_equal( + .input$group_by("a", .maintain_order = TRUE)$len("n"), + df, + pl$DataFrame(a = c("Apple", "Orange"), n = c(2, 1))$cast(n = pl$UInt32) + ) + expect_snapshot( + df$group_by("a", .maintain_order = TRUE)$len(1), + error = TRUE + ) + expect_snapshot( + df$group_by("a", .maintain_order = TRUE)$len(TRUE), + error = TRUE + ) +}) From 1d4e2ea8d481ea9852cce1ffc6dcf64b0f66247d Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Wed, 12 Nov 2025 23:17:11 +0100 Subject: [PATCH 2/2] typo --- R/dataframe-group_by-general.R | 2 +- man/groupby__len.Rd | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/dataframe-group_by-general.R b/R/dataframe-group_by-general.R index e0daf7237..ad5a2f739 100644 --- a/R/dataframe-group_by-general.R +++ b/R/dataframe-group_by-general.R @@ -208,7 +208,7 @@ groupby__n_unique <- function() { wrap() } -#' @inherit lazygroupby__n_unique title params +#' @inherit lazygroupby__len title params #' @inherit as_polars_df return #' @examples #' df <- pl$DataFrame( diff --git a/man/groupby__len.Rd b/man/groupby__len.Rd index 438c00ef3..f75e090b2 100644 --- a/man/groupby__len.Rd +++ b/man/groupby__len.Rd @@ -2,15 +2,19 @@ % Please edit documentation in R/dataframe-group_by-general.R \name{groupby__len} \alias{groupby__len} -\title{Count the unique values per group} +\title{Return the number of rows in each group} \usage{ groupby__len(name = NULL) } +\arguments{ +\item{name}{Assign a name to the resulting column. If \code{NULL}, defaults to +\code{"len"}.} +} \value{ A polars \link{DataFrame} } \description{ -Count the unique values per group +Return the number of rows in each group } \examples{ df <- pl$DataFrame(