Skip to content

Commit ad5c2be

Browse files
authored
Visualization: Collapse by [random] group (#597)
* fix * fixes * add for plotting * fixes, docs * desc, news * vignette * docs * fix * add snaps * fix * address comments * fix * fix pkgdown
1 parent c4b3474 commit ad5c2be

16 files changed

+2532
-108
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: modelbased
33
Title: Estimation of Model-Based Predictions, Contrasts and Means
4-
Version: 0.13.1.3
4+
Version: 0.13.1.4
55
Authors@R:
66
c(person(given = "Dominique",
77
family = "Makowski",

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ S3method(visualisation_recipe,estimate_grouplevel)
8383
S3method(visualisation_recipe,estimate_means)
8484
S3method(visualisation_recipe,estimate_predicted)
8585
S3method(visualisation_recipe,estimate_slopes)
86+
export(collapse_by_group)
8687
export(describe_nonlinear)
8788
export(display)
8889
export(estimate_contrasts)

NEWS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# modelbased (devel)
22

3+
## Changes
4+
5+
* New function `collapse_by_group()`, which extracts the raw data points and
6+
"averages" (i.e. "collapses") the response variable over the levels of the
7+
grouping factor given in `collapse_by`. Only works with mixed models.
8+
Additionally, the `plot()` and `visualization_recipe()` methods get a
9+
`collapse_group` argument to use this feature when adding data points to plots
10+
using `show_data` or `show_residuals`.
11+
312
## Bug fixes
413

514
* Fixed issue in `estimate_slope()` when `p_adjust = "esarey"`.

R/collapse_by_group.R

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#' @title Collapse raw data by random effect groups
2+
#' @name collapse_by_group
3+
#'
4+
#' @description This function extracts the raw data points (i.e. the data
5+
#' that was used to fit the model) and "averages" (i.e. "collapses") the
6+
#' response variable over the levels of the grouping factor given in
7+
#' `collapse_by`. Only works with mixed models.
8+
#'
9+
#' @param collapse_by Name of the (random effects) grouping factor. Data is
10+
#' collapsed by the levels of this factor.
11+
#' @param residuals Logical, if `TRUE`, collapsed partial residuals instead
12+
#' of raw data by the levels of the grouping factor.
13+
#' @inheritParams residualize_over_grid
14+
#'
15+
#' @return A data frame with raw data points, averaged over the levels of
16+
#' the given grouping factor from the random effects. The group level of
17+
#' the random effect is saved in the column `"random"`.
18+
#'
19+
#' @examplesIf require("lme4", quietly = TRUE)
20+
#' data(efc, package = "modelbased")
21+
#' efc$e15relat <- as.factor(efc$e15relat)
22+
#' efc$c161sex <- as.factor(efc$c161sex)
23+
#' levels(efc$c161sex) <- c("male", "female")
24+
#' model <- lme4::lmer(neg_c_7 ~ c161sex + (1 | e15relat), data = efc)
25+
#' me <- estimate_means(model, "c161sex")
26+
#' head(efc)
27+
#' collapse_by_group(me, model, "e15relat")
28+
#'
29+
#' @export
30+
collapse_by_group <- function(grid, model, collapse_by = NULL, residuals = FALSE) {
31+
if (!insight::is_mixed_model(model)) {
32+
insight::format_error("This function only works with mixed effects models.")
33+
}
34+
35+
model_data <- insight::get_data(model, source = "frame", verbose = FALSE)
36+
37+
if (is.null(collapse_by) || isTRUE(collapse_by) || isTRUE(residuals)) {
38+
collapse_by <- insight::find_random(model, flatten = TRUE)
39+
}
40+
41+
if (length(collapse_by) > 1) {
42+
collapse_by <- collapse_by[1]
43+
insight::format_alert(
44+
"More than one random grouping variable found.",
45+
paste0("Using `", collapse_by, "`.")
46+
)
47+
}
48+
49+
if (!collapse_by %in% colnames(model_data)) {
50+
insight::format_error(paste0("Could not find `", collapse_by, "` column."))
51+
}
52+
53+
if (residuals) {
54+
rawdata <- residualize_over_grid(grid, model)
55+
y_name <- "Mean"
56+
} else {
57+
rawdata <- insight::get_data(model, source = "environment", verbose = FALSE)
58+
y_name <- insight::find_response(model)
59+
60+
# we need this column for labelling data points, but not for collapsing
61+
rawdata$rowname <- NULL
62+
63+
predictor_names <- setdiff(colnames(rawdata), y_name)
64+
if (any(vapply(rawdata[predictor_names], Negate(is.factor), logical(1)))) {
65+
insight::format_alert(
66+
"Collapsing usually not informative across a continuous variable."
67+
)
68+
}
69+
}
70+
71+
if (is.factor(rawdata[[y_name]])) {
72+
rawdata[[y_name]] <- as.numeric(rawdata[[y_name]])
73+
if (insight::model_info(model)$is_binomial) {
74+
rawdata[[y_name]] <- rawdata[[y_name]] - 1
75+
} # else ordinal?
76+
}
77+
78+
if (nrow(rawdata) == nrow(model_data)) {
79+
rawdata$random <- factor(model_data[[collapse_by]])
80+
} else if (collapse_by %in% colnames(rowdata)) {
81+
rawdata$random <- factor(rawdata[[collapse_by]])
82+
} else {
83+
insight::format_error(paste0("Could not find `", collapse_by, "` column."))
84+
}
85+
86+
agg_data <- stats::aggregate(
87+
rawdata[[y_name]],
88+
by = rawdata[colnames(rawdata) != y_name],
89+
FUN = mean
90+
)
91+
92+
if (residuals) {
93+
y_name <- insight::find_response(model)
94+
}
95+
96+
colnames(agg_data)[ncol(agg_data)] <- y_name
97+
colnames(agg_data)[colnames(agg_data) == "group"] <- "group_col"
98+
99+
# sanity check, add dummy if not present
100+
if (is.null(agg_data$group_col)) {
101+
agg_data$group_col <- factor(1)
102+
}
103+
104+
agg_data
105+
}

R/tinyplot.R

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ tinyplot.estimate_means <- function(
4949
type = NULL,
5050
dodge = NULL,
5151
show_data = FALSE,
52+
collapse_group = NULL,
5253
numeric_as_discrete = NULL,
5354
...
5455
) {
@@ -191,7 +192,11 @@ tinyplot.estimate_means <- function(
191192
if (show_data) {
192193
# extract raw data from the model
193194
model <- attributes(x)$model
194-
rawdata <- as.data.frame(insight::get_data(model, verbose = FALSE))
195+
if (is.null(collapse_group)) {
196+
rawdata <- as.data.frame(insight::get_data(model, verbose = FALSE))
197+
} else {
198+
rawdata <- collapse_by_group(x, model, collapse_by = collapse_group)
199+
}
195200

196201
# set alpha
197202
if (is.null(dots$alpha)) {

R/visualisation_recipe.R

Lines changed: 65 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
#'
2424
#' @param x A modelbased object.
2525
#' @param show_data Logical, if `TRUE`, display the "raw" data as a background
26-
#' to the model-based estimation. This argument will be ignored for plotting
27-
#' objects returned by `estimate_slopes()` or `estimate_grouplevel()`.
26+
#' to the model-based estimation. For mixed models, you can additionally use the
27+
#' `collapse_group` argument to "collapse" data points by random effects
28+
#' grouping factors. Argument `show_data` will be ignored for plotting objects
29+
#' returned by `estimate_slopes()` or `estimate_grouplevel()`.
2830
#' @param join_dots Logical, if `TRUE` (default) and for categorical focal terms
2931
#' in `by`, dots (estimates) are connected by lines, i.e. plots will be a
3032
#' combination of dots with error bars and connecting lines. If `FALSE`, only
@@ -39,9 +41,17 @@
3941
#' predictor. Use `FALSE` to always use continuous color scales for numeric
4042
#' predictors. It is possible to set a global default value using `options()`,
4143
#' e.g. `options(modelbased_numeric_as_discrete = 10)`.
42-
#' @param show_residuals Logical, if `TRUE`, display residuals of the model
43-
#' as a background to the model-based estimation. Residuals will be computed
44-
#' for the predictors in the data grid, using [`residualize_over_grid()`].
44+
#' @param show_residuals Logical, if `TRUE`, display residuals of the model as a
45+
#' background to the model-based estimation. Residuals will be computed for the
46+
#' predictors in the data grid, using [`residualize_over_grid()`]. For mixed
47+
#' models, you can additionally use the `collapse_group` argument to "collapse"
48+
#' data points from residuals by random effects grouping factors.
49+
#' @param collapse_group This argument only takes effect when either `show_data`
50+
#' or `show_residuals` is `TRUE`. For mixed effects models, name of the grouping
51+
#' variable of random effects. If `collapse_group = TRUE`, data points
52+
#' "collapsed" by the first random effect groups are added to the plot. Else, if
53+
#' `collapse_group` is a name of a group factor, data is collapsed by that
54+
#' specific random effect. See [`collapse_by_group()`] for further details.
4555
#' @param point,line,pointrange,ribbon,facet,grid Additional
4656
#' aesthetics and parameters for the geoms (see customization example).
4757
#' @param ... Arguments passed from `plot()` to `visualisation_recipe()`, or
@@ -74,7 +84,7 @@
7484
#' will set a default value for the `dodge` argument (spacing between geoms)
7585
#' when using `tinyplot::plt()`. Should be a number between `0` and `1`.
7686
#'
77-
#' @examplesIf all(insight::check_if_installed(c("marginaleffects", "see", "ggplot2"), quietly = TRUE)) && getRversion() >= "4.1.0"
87+
#' @examplesIf all(insight::check_if_installed(c("marginaleffects", "see", "ggplot2", "lme4"), quietly = TRUE)) && getRversion() >= "4.1.0"
7888
#' library(ggplot2)
7989
#' library(see)
8090
#' # ==============================================
@@ -152,25 +162,42 @@
152162
#' x <- estimate_means(model, by = c("cyl", "wt"))
153163
#' plot(x)
154164
#'
155-
#'
156165
#' # GLMs ---------------------
157166
#' data <- data.frame(vs = mtcars$vs, cyl = as.factor(mtcars$cyl))
158167
#' x <- estimate_means(glm(vs ~ cyl, data = data, family = "binomial"), by = c("cyl"))
159168
#' plot(x)
169+
#'
170+
#' # ==============================================
171+
#' # Adding original data to the plot
172+
#' # ==============================================
173+
#' data(efc, package = "modelbased")
174+
#' efc$e15relat <- as.factor(efc$e15relat)
175+
#' efc$c161sex <- as.factor(efc$c161sex)
176+
#' levels(efc$c161sex) <- c("male", "female")
177+
#' model <- lme4::lmer(neg_c_7 ~ c161sex + (1 | e15relat), data = efc)
178+
#'
179+
#' me <- estimate_means(model, "c161sex")
180+
#' plot(me, show_data = TRUE)
181+
#'
182+
#' # data points: collapse by / average over random effects groups -------
183+
#' plot(me, show_data = TRUE, collapse_group = "e15relat")
160184
#' }
161185
#' @export
162-
visualisation_recipe.estimate_predicted <- function(x,
163-
show_data = FALSE,
164-
show_residuals = FALSE,
165-
point = NULL,
166-
line = NULL,
167-
pointrange = NULL,
168-
ribbon = NULL,
169-
facet = NULL,
170-
grid = NULL,
171-
join_dots = NULL,
172-
numeric_as_discrete = NULL,
173-
...) {
186+
visualisation_recipe.estimate_predicted <- function(
187+
x,
188+
show_data = FALSE,
189+
show_residuals = FALSE,
190+
collapse_group = NULL,
191+
point = NULL,
192+
line = NULL,
193+
pointrange = NULL,
194+
ribbon = NULL,
195+
facet = NULL,
196+
grid = NULL,
197+
join_dots = NULL,
198+
numeric_as_discrete = NULL,
199+
...
200+
) {
174201
# Process argument ---------------------------------------------------------
175202
# --------------------------------------------------------------------------
176203

@@ -186,6 +213,7 @@ visualisation_recipe.estimate_predicted <- function(x,
186213
x,
187214
show_data = show_data,
188215
show_residuals = show_residuals,
216+
collapse_by = collapse_group,
189217
point = point,
190218
line = line,
191219
pointrange = pointrange,
@@ -233,13 +261,15 @@ visualisation_recipe.estimate_means <- visualisation_recipe.estimate_predicted
233261
#' plot(visualisation_recipe(x))
234262
#' }
235263
#' @export
236-
visualisation_recipe.estimate_slopes <- function(x,
237-
line = NULL,
238-
pointrange = NULL,
239-
ribbon = NULL,
240-
facet = NULL,
241-
grid = NULL,
242-
...) {
264+
visualisation_recipe.estimate_slopes <- function(
265+
x,
266+
line = NULL,
267+
pointrange = NULL,
268+
ribbon = NULL,
269+
facet = NULL,
270+
grid = NULL,
271+
...
272+
) {
243273
.visualization_recipe(
244274
x,
245275
show_data = FALSE,
@@ -285,13 +315,15 @@ visualisation_recipe.estimate_slopes <- function(x,
285315
#' plot(x)
286316
#' }
287317
#' @export
288-
visualisation_recipe.estimate_grouplevel <- function(x,
289-
line = NULL,
290-
pointrange = NULL,
291-
ribbon = NULL,
292-
facet = NULL,
293-
grid = NULL,
294-
...) {
318+
visualisation_recipe.estimate_grouplevel <- function(
319+
x,
320+
line = NULL,
321+
pointrange = NULL,
322+
ribbon = NULL,
323+
facet = NULL,
324+
grid = NULL,
325+
...
326+
) {
295327
if (is.null(facet)) {
296328
facet <- list(scales = "free")
297329
} else {

0 commit comments

Comments
 (0)