|
| 1 | +#' Check variables for within- and/or between-group variation |
| 2 | +#' |
| 3 | +#' `check_group_variation()` checks if variables a within- and/or between-effect, |
| 4 | +#' i.e. if they vary within or between certain groups. |
| 5 | +#' |
| 6 | +#' @param x A data frame. |
| 7 | +#' @param select Character vector (or formula) with names of variables to select |
| 8 | +#' that should be checked. |
| 9 | +#' @param by Character vector (or formula) with the name of the variable that |
| 10 | +#' indicates the group- or cluster-ID. For cross-classified or nested designs, |
| 11 | +#' `by` can also identify two or more variables as group- or cluster-IDs. If |
| 12 | +#' the data is nested and should be treated as such, set `nested = TRUE`. Else, |
| 13 | +#' if `by` defines two or more variables and `nested = FALSE`, a cross-classified |
| 14 | +#' design is assumed. |
| 15 | +#' |
| 16 | +#' For nested designs, `by` can be: |
| 17 | +#' - a character vector with the name of the variable that indicates the |
| 18 | +#' levels, ordered from *highest* level to *lowest* (e.g. |
| 19 | +#' `by = c("L4", "L3", "L2")`. |
| 20 | +#' - a character vector with variable names in the format `by = "L4/L3/L2"`, |
| 21 | +#' where the levels are separated by `/`. |
| 22 | +#' |
| 23 | +#' See also section _De-meaning for cross-classified designs_ and |
| 24 | +#' _De-meaning for nested designs_ in [`datawizard::demean()`]. |
| 25 | +#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`, |
| 26 | +#' the data is treated as cross-classified. Only applies if `by` contains more |
| 27 | +#' than one variable. |
| 28 | +#' |
| 29 | +#' @examples |
| 30 | +#' set.seed(1234) |
| 31 | +#' dat <- data.frame( |
| 32 | +#' id = rep(letters, each = 3), |
| 33 | +#' between_num = rep(rnorm(26), each = 3), |
| 34 | +#' within1_num = rep(rnorm(3), times = 26), |
| 35 | +#' within2_num = rep(rnorm(3), times = 26), |
| 36 | +#' both_num = rnorm(3 * 26) |
| 37 | +#' ) |
| 38 | +#' check_group_variation( |
| 39 | +#' dat, |
| 40 | +#' select = c("between_num", "innen_num", "within_num", "both_num"), |
| 41 | +#' by = "id" |
| 42 | +#' ) |
| 43 | +#' @export |
| 44 | +check_group_variation <- function(x, select = NULL, by = NULL, nested = FALSE, tolerance = 1e-5) { |
| 45 | + insight::check_if_installed("datawizard", minimum_version = "0.12.0") |
| 46 | + |
| 47 | + if (inherits(select, "formula")) { |
| 48 | + select <- all.vars(select) |
| 49 | + } |
| 50 | + if (inherits(by, "formula")) { |
| 51 | + by <- all.vars(by) |
| 52 | + } |
| 53 | + my_data <- x |
| 54 | + |
| 55 | + # for nested designs? |
| 56 | + if (nested) { |
| 57 | + # separate level-indicators with "/", as supported by datawizard |
| 58 | + by <- paste(by, collapse = "/") |
| 59 | + } |
| 60 | + |
| 61 | + # create all combinations that should be checked |
| 62 | + combinations <- expand.grid(select, by[1]) |
| 63 | + |
| 64 | + # initialize lists |
| 65 | + list_within <- list_between <- list_both <- NULL |
| 66 | + |
| 67 | + for (predictor in combinations[[1]]) { |
| 68 | + # demean predictor |
| 69 | + d <- datawizard::demean(my_data, select = predictor, by = by, verbose = FALSE, add_attributes = FALSE) |
| 70 | + |
| 71 | + # get new names |
| 72 | + within_name <- paste0(predictor, "_within") |
| 73 | + between_name <- paste0(predictor, "_between") |
| 74 | + |
| 75 | + if (var(d[[within_name]], na.rm = TRUE) > tolerance && var(d[[between_name]], na.rm = TRUE) > tolerance) { |
| 76 | + list_both <- c(list_both, predictor) |
| 77 | + } else if (var(d[[within_name]], na.rm = TRUE) > tolerance) { |
| 78 | + list_within <- c(list_within, predictor) |
| 79 | + } else if (var(d[[between_name]], na.rm = TRUE) > tolerance) { |
| 80 | + list_between <- c(list_between, predictor) |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + out <- insight::compact_list(list( |
| 85 | + within = list_within, |
| 86 | + between = list_between, |
| 87 | + both = list_both |
| 88 | + )) |
| 89 | + |
| 90 | + if (is.null(out)) { |
| 91 | + insight::format_alert("No predictors found that either have within or between group variation.") |
| 92 | + return(invisible(NULL)) |
| 93 | + } |
| 94 | + |
| 95 | + class(out) <- c("check_group_variation", class(out)) |
| 96 | + |
| 97 | + out |
| 98 | +} |
| 99 | + |
| 100 | + |
| 101 | +#' @export |
| 102 | +print.check_group_variation <- function(x, ...) { |
| 103 | + out <- as.data.frame(lapply(as.data.frame(do.call(cbind, x)), function(i) { |
| 104 | + i[duplicated(i)] <- NA_character_ |
| 105 | + i |
| 106 | + })) |
| 107 | + cat(insight::export_table(out, caption = c("Check group variation", "blue"))) |
| 108 | +} |
0 commit comments