tidyverse · teunbrand · Aug 28, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -262,6 +262,7 @@ Collate:
     'stat-smooth-methods.R'
     'stat-smooth.R'
     'stat-sum.R'
+    'stat-summarise.R'
     'stat-summary-2d.R'
     'stat-summary-bin.R'
     'stat-summary-hex.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -274,6 +274,7 @@ export(StatSf)
 export(StatSfCoordinates)
 export(StatSmooth)
 export(StatSum)
+export(StatSummarise)
 export(StatSummary)
 export(StatSummary2d)
 export(StatSummaryBin)
@@ -686,6 +687,7 @@ export(stat_sf_coordinates)
 export(stat_smooth)
 export(stat_spoke)
 export(stat_sum)
+export(stat_summarise)
 export(stat_summary)
 export(stat_summary2d)
 export(stat_summary_2d)

diff --git a/R/stat-summarise.R b/R/stat-summarise.R
@@ -0,0 +1,129 @@
+#' Arbitrary summaries by group
+#'
+#' `stat_summarise()` handles arbitrary summaries by group. Unlike
+#' [`stat_summary()`], it does not automatically summarise by `x`.
+#'
+#' @inheritParams stat_identity
+#' @param fun A function that is given the complete data per group and should
+#'   return a data.frame with variables compatible with the layer's `geom`
+#'   argument. Defaults to `express_group()` which will evaluate
+#'   [data-masked][rlang::args_data_masking] expressions passed to `fun.args`.
+#' @param fun.args Optional additional arguments to pass to `fun`. When
+#'   `fun = express_group` (default), can be a list of expressions created by
+#'   `vars()`.
+#' @eval rd_aesthetics("stat", "summarise")
+#' @export
+#'
+#' @examples
+#' # Using a helper summary function
+#' my_summary_fun <- function(data) {
+#'   data.frame(
+#'     x    = min(data$x) - 0.5,
+#'     xend = max(data$x) + 0.5,
+#'     y    = mean(data$y)
+#'   )
+#' }
+#'
+#' ggplot(mpg, aes(drv, displ)) +
+#'   geom_jitter(aes(colour = drv)) +
+#'   stat_summarise(
+#'     geom = "segment",
+#'     fun = my_summary_fun
+#'   )
+#'
+#' # Making the same summary using the `exprs` argument
+#' ggplot(mpg, aes(drv, displ)) +
+#'   geom_jitter(aes(colour = drv)) +
+#'   stat_summarise(
+#'     geom = "segment",
+#'     fun.args = vars(
+#'       # Note: we use `xend` first to not mutate `x` too soon
+#'       xend = max(x) + 0.5,
+#'       x    = min(x) - 0.5,
+#'       y    = mean(y)
+#'     )
+#'   )
+#'
+#' # You can use a temporary variable that needn't conform to data.frame columns
+#' # as long as it is removed afterwards
+#' ggplot(mpg, aes(displ, colour = drv)) +
+#'   stat_summarise(
+#'     geom = "line",
+#'     fun.args = vars(
+#'       dens = density(x), # non-vector temporary variable
+#'       x    = dens$x,
+#'       y    = dens$y,
+#'       dens = NULL # deleting the temporary variable
+#'     )
+#'   )
+stat_summarise <- function(
+    mapping = NULL,
+    data = NULL,
+    geom = "point",
+    position = "identity",
+    ...,
+    fun = express_group,
+    fun.args = list(),
+    na.rm = FALSE,
+    show.legend = NA,
+    inherit.aes = TRUE
+) {
+
+  layer(
+    data = data,
+    mapping = mapping,
+    stat = StatSummarise,
+    geom = geom,
+    position = position,
+    show.legend = show.legend,
+    inherit.aes = inherit.aes,
+    params = list2(
+      na.rm = na.rm,
+      fun = fun,
+      fun.args = fun.args,
+      ...
+    )
+  )
+}
+
+#' @rdname ggplot2-ggproto
+#' @format NULL
+#' @usage NULL
+#' @export
+StatSummarise <- ggproto(
+  "StatSummarise", Stat,
+
+  setup_params = function(data, params) {
+    params$fun <- allow_lambda(params$fun)
+    check_function(params$fun, arg = "fun")
+    if (!is_named2(params$fun.args)) {
+      cli::cli_abort("Every element in {.arg fun.args} must be named.")
+    }
+    if (identical(params$fun, express_group) && length(params$fun.args) > 0) {
+      check_object(
+        params$fun.args, is_quosures, arg = "fun.args",
+        "generated by `vars()`", allow_null = TRUE
+      )
+    }
+    params
+  },
+
+  compute_group = function(data, scales, fun = express_group, fun.args = list()) {
+    inject(fun(data, !!!fun.args))
+  }
+)
+
+express_group <- function(data, ...) {
+  exprs <- list2(...)
+  if (length(exprs) < 1) {
+    return(data)
+  }
+  data <- unclass(data)
+  # We append each evaluated expression to `data`, so that the next expression
+  # can use the result of all previously evaluated expressions
+  nms <- names(exprs)
+  for (i in seq_along(exprs)) {
+    data[[nms[[i]]]] <- eval_tidy(exprs[[i]], data)
+  }
+  data_frame0(!!!data[unique0(names(exprs))])
+}
diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd
diff --git a/man/stat_summarise.Rd b/man/stat_summarise.Rd