Skip to content

Commit 0c73645

Browse files
committed
add function
1 parent 9974498 commit 0c73645

File tree

3 files changed

+172
-0
lines changed

3 files changed

+172
-0
lines changed

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ S3method(print,check_concurvity)
297297
S3method(print,check_dag)
298298
S3method(print,check_distribution)
299299
S3method(print,check_distribution_numeric)
300+
S3method(print,check_group_variation)
300301
S3method(print,check_heterogeneity_bias)
301302
S3method(print,check_heteroscedasticity)
302303
S3method(print,check_homogeneity)
@@ -557,6 +558,7 @@ export(check_convergence)
557558
export(check_dag)
558559
export(check_distribution)
559560
export(check_factorstructure)
561+
export(check_group_variation)
560562
export(check_heterogeneity_bias)
561563
export(check_heteroscedasticity)
562564
export(check_heteroskedasticity)

R/check_group_variation.R

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#' Check variables for within- and/or between-group variation
2+
#'
3+
#' `check_group_variation()` checks if variables a within- and/or between-effect,
4+
#' i.e. if they vary within or between certain groups.
5+
#'
6+
#' @param x A data frame.
7+
#' @param select Character vector (or formula) with names of variables to select
8+
#' that should be checked.
9+
#' @param by Character vector (or formula) with the name of the variable that
10+
#' indicates the group- or cluster-ID. For cross-classified or nested designs,
11+
#' `by` can also identify two or more variables as group- or cluster-IDs. If
12+
#' the data is nested and should be treated as such, set `nested = TRUE`. Else,
13+
#' if `by` defines two or more variables and `nested = FALSE`, a cross-classified
14+
#' design is assumed.
15+
#'
16+
#' For nested designs, `by` can be:
17+
#' - a character vector with the name of the variable that indicates the
18+
#' levels, ordered from *highest* level to *lowest* (e.g.
19+
#' `by = c("L4", "L3", "L2")`.
20+
#' - a character vector with variable names in the format `by = "L4/L3/L2"`,
21+
#' where the levels are separated by `/`.
22+
#'
23+
#' See also section _De-meaning for cross-classified designs_ and
24+
#' _De-meaning for nested designs_ in [`datawizard::demean()`].
25+
#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`,
26+
#' the data is treated as cross-classified. Only applies if `by` contains more
27+
#' than one variable.
28+
#'
29+
#' @examples
30+
#' set.seed(1234)
31+
#' dat <- data.frame(
32+
#' id = rep(letters, each = 3),
33+
#' between_num = rep(rnorm(26), each = 3),
34+
#' within1_num = rep(rnorm(3), times = 26),
35+
#' within2_num = rep(rnorm(3), times = 26),
36+
#' both_num = rnorm(3 * 26)
37+
#' )
38+
#' check_group_variation(
39+
#' dat,
40+
#' select = c("between_num", "innen_num", "within_num", "both_num"),
41+
#' by = "id"
42+
#' )
43+
#' @export
44+
check_group_variation <- function(x, select = NULL, by = NULL, nested = FALSE, tolerance = 1e-5) {
45+
insight::check_if_installed("datawizard", minimum_version = "0.12.0")
46+
47+
if (inherits(select, "formula")) {
48+
select <- all.vars(select)
49+
}
50+
if (inherits(by, "formula")) {
51+
by <- all.vars(by)
52+
}
53+
my_data <- x
54+
55+
# for nested designs?
56+
if (nested) {
57+
# separate level-indicators with "/", as supported by datawizard
58+
by <- paste(by, collapse = "/")
59+
}
60+
61+
# create all combinations that should be checked
62+
combinations <- expand.grid(select, by[1])
63+
64+
# initialize lists
65+
list_within <- list_between <- list_both <- NULL
66+
67+
for (predictor in combinations[[1]]) {
68+
# demean predictor
69+
d <- datawizard::demean(my_data, select = predictor, by = by, verbose = FALSE, add_attributes = FALSE)
70+
71+
# get new names
72+
within_name <- paste0(predictor, "_within")
73+
between_name <- paste0(predictor, "_between")
74+
75+
if (var(d[[within_name]], na.rm = TRUE) > tolerance && var(d[[between_name]], na.rm = TRUE) > tolerance) {
76+
list_both <- c(list_both, predictor)
77+
} else if (var(d[[within_name]], na.rm = TRUE) > tolerance) {
78+
list_within <- c(list_within, predictor)
79+
} else if (var(d[[between_name]], na.rm = TRUE) > tolerance) {
80+
list_between <- c(list_between, predictor)
81+
}
82+
}
83+
84+
out <- insight::compact_list(list(
85+
within = list_within,
86+
between = list_between,
87+
both = list_both
88+
))
89+
90+
if (is.null(out)) {
91+
insight::format_alert("No predictors found that either have within or between group variation.")
92+
return(invisible(NULL))
93+
}
94+
95+
class(out) <- c("check_group_variation", class(out))
96+
97+
out
98+
}
99+
100+
101+
#' @export
102+
print.check_group_variation <- function(x, ...) {
103+
out <- as.data.frame(lapply(as.data.frame(do.call(cbind, x)), function(i) {
104+
i[duplicated(i)] <- NA_character_
105+
i
106+
}))
107+
cat(insight::export_table(out, caption = c("Check group variation", "blue")))
108+
}

man/check_group_variation.Rd

Lines changed: 62 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)