Skip to content

Commit 18a6ca2

Browse files
committed
Update check_group_variation.R
1 parent 61b25b7 commit 18a6ca2

File tree

1 file changed

+74
-42
lines changed

1 file changed

+74
-42
lines changed

R/check_group_variation.R

Lines changed: 74 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@
5353
#' by = "id"
5454
#' )
5555
#' @export
56-
check_group_variation <- function(x, select = NULL, by = NULL, nested = FALSE, tolerance = 1e-4) {
56+
check_group_variation <- function(x, select = NULL, by = NULL,
57+
tolerance = 1e-4, only_balanced = TRUE) {
5758
insight::check_if_installed("datawizard", minimum_version = "0.12.0")
5859

5960
if (inherits(select, "formula")) {
@@ -76,57 +77,88 @@ check_group_variation <- function(x, select = NULL, by = NULL, nested = FALSE, t
7677
select <- setdiff(colnames(x), by)
7778
}
7879

79-
# for nested designs?
80-
if (nested) {
81-
# separate level-indicators with "/", as supported by datawizard
82-
by <- paste(by, collapse = "/")
83-
}
84-
8580
# create all combinations that should be checked
86-
combinations <- expand.grid(select, by[1])
81+
combinations <- expand.grid(variable = select, group = by, stringsAsFactors = FALSE)
82+
combinations <- combinations[combinations$variable != combinations$group, ]
83+
combinations$type <- NA_character_
8784

8885
# initialize lists
89-
list_within <- list_between <- list_both <- NULL
90-
91-
for (predictor in combinations[[1]]) {
92-
# demean predictor
93-
d <- datawizard::demean(x, select = predictor, by = by, verbose = FALSE, add_attributes = FALSE)
94-
95-
# get new names
96-
within_name <- paste0(predictor, "_within")
97-
between_name <- paste0(predictor, "_between")
98-
99-
if (var(d[[within_name]], na.rm = TRUE) > tolerance && var(d[[between_name]], na.rm = TRUE) > tolerance) {
100-
list_both <- c(list_both, predictor)
101-
} else if (var(d[[within_name]], na.rm = TRUE) > tolerance) {
102-
list_within <- c(list_within, predictor)
103-
} else if (var(d[[between_name]], na.rm = TRUE) > tolerance) {
104-
list_between <- c(list_between, predictor)
105-
}
86+
for (i in seq_len(nrow(combinations))) {
87+
combinations[i,"type"] <-
88+
.check_nested(x, combinations[i,"group"], combinations[i,"variable"],
89+
tolerance = tolerance,
90+
only_balanced = only_balanced)
10691
}
10792

108-
out <- insight::compact_list(list(
109-
within = list_within,
110-
between = list_between,
111-
both = list_both
112-
))
11393

114-
if (is.null(out)) {
115-
insight::format_alert("No predictors found that either have within or between group variation.")
116-
return(invisible(NULL))
94+
combinations <- datawizard::data_relocate(combinations, select = "group", before = "variable")
95+
class(combinations) <- c("check_group_variation", class(combinations))
96+
combinations
97+
}
98+
99+
100+
#' @export
101+
print.check_group_variation <- function(x, ...) {
102+
if (insight::n_unique(x$group) > 1L) {
103+
cat(insight::export_table(x, caption = c("Check group variation", "blue"), by = "group"))
104+
} else {
105+
x_new <- x
106+
x_new$group <- NULL
107+
cat(insight::export_table(x_new, caption = c(sprintf("Check %s variation", x$group[1]), "blue")))
117108
}
118109

119-
class(out) <- c("check_group_variation", class(out))
110+
return(invisible(x))
111+
}
112+
113+
114+
# utils -------------------------------------------------------------
120115

121-
out
116+
.check_nested <- function(data, by, predictor, ...) {
117+
UseMethod(".check_nested", data[[predictor]])
122118
}
123119

120+
.check_nested.numeric <- function(data, by, predictor, tolerance = 1e-05, ...) {
121+
# demean predictor
122+
d <- datawizard::demean(data, select = predictor, by = by, verbose = FALSE, add_attributes = FALSE)
124123

125-
#' @export
126-
print.check_group_variation <- function(x, ...) {
127-
out <- as.data.frame(lapply(as.data.frame(do.call(cbind, x)), function(i) {
128-
i[duplicated(i)] <- NA_character_
129-
i
130-
}))
131-
cat(insight::export_table(out, caption = c("Check group variation", "blue")))
124+
# get new names
125+
within_name <- paste0(predictor, "_within")
126+
between_name <- paste0(predictor, "_between")
127+
128+
is_between <- var(d[[between_name]], na.rm = TRUE) > tolerance
129+
is_within <- var(d[[within_name]], na.rm = TRUE) > tolerance
130+
is_both <- is_between && is_within
131+
132+
if (is_both) return("both")
133+
if (is_between) return("between")
134+
if (is_within) return("within")
135+
NULL
132136
}
137+
138+
.check_nested.default <- function(data, by, predictor, only_balanced = TRUE, ...) {
139+
f <- data[[by]]
140+
variable <- data[[predictor]]
141+
142+
oo <- complete.cases(f, variable)
143+
f <- as.factor(f[oo])
144+
variable <- variable[oo]
145+
146+
n_uniques <- tapply(variable, f, insight::n_unique)
147+
is_between <- all(n_uniques == 1L)
148+
if (is_between) return("between")
149+
150+
if (!insight::has_single_value(is_between)) return("both")
151+
152+
variable_levels <- unique(variable)
153+
has_all <- tapply(variable, f, function(v) all(variable_levels %in% v))
154+
if (!all(has_all)) return("both")
155+
156+
if (only_balanced) {
157+
tab <- table(variable, f)
158+
is_balanced <- all(apply(tab, 2, insight::has_single_value))
159+
if (!is_balanced) {
160+
return("both")
161+
}
162+
}
163+
return("within")
164+
}

0 commit comments

Comments
 (0)