5353# ' by = "id"
5454# ' )
5555# ' @export
56- check_group_variation <- function (x , select = NULL , by = NULL , nested = FALSE , tolerance = 1e-4 ) {
56+ check_group_variation <- function (x , select = NULL , by = NULL ,
57+ tolerance = 1e-4 , only_balanced = TRUE ) {
5758 insight :: check_if_installed(" datawizard" , minimum_version = " 0.12.0" )
5859
5960 if (inherits(select , " formula" )) {
@@ -76,57 +77,88 @@ check_group_variation <- function(x, select = NULL, by = NULL, nested = FALSE, t
7677 select <- setdiff(colnames(x ), by )
7778 }
7879
79- # for nested designs?
80- if (nested ) {
81- # separate level-indicators with "/", as supported by datawizard
82- by <- paste(by , collapse = " /" )
83- }
84-
8580 # create all combinations that should be checked
86- combinations <- expand.grid(select , by [1 ])
81+ combinations <- expand.grid(variable = select , group = by , stringsAsFactors = FALSE )
82+ combinations <- combinations [combinations $ variable != combinations $ group , ]
83+ combinations $ type <- NA_character_
8784
8885 # initialize lists
89- list_within <- list_between <- list_both <- NULL
90-
91- for (predictor in combinations [[1 ]]) {
92- # demean predictor
93- d <- datawizard :: demean(x , select = predictor , by = by , verbose = FALSE , add_attributes = FALSE )
94-
95- # get new names
96- within_name <- paste0(predictor , " _within" )
97- between_name <- paste0(predictor , " _between" )
98-
99- if (var(d [[within_name ]], na.rm = TRUE ) > tolerance && var(d [[between_name ]], na.rm = TRUE ) > tolerance ) {
100- list_both <- c(list_both , predictor )
101- } else if (var(d [[within_name ]], na.rm = TRUE ) > tolerance ) {
102- list_within <- c(list_within , predictor )
103- } else if (var(d [[between_name ]], na.rm = TRUE ) > tolerance ) {
104- list_between <- c(list_between , predictor )
105- }
86+ for (i in seq_len(nrow(combinations ))) {
87+ combinations [i ," type" ] <-
88+ .check_nested(x , combinations [i ," group" ], combinations [i ," variable" ],
89+ tolerance = tolerance ,
90+ only_balanced = only_balanced )
10691 }
10792
108- out <- insight :: compact_list(list (
109- within = list_within ,
110- between = list_between ,
111- both = list_both
112- ))
11393
114- if (is.null(out )) {
115- insight :: format_alert(" No predictors found that either have within or between group variation." )
116- return (invisible (NULL ))
94+ combinations <- datawizard :: data_relocate(combinations , select = " group" , before = " variable" )
95+ class(combinations ) <- c(" check_group_variation" , class(combinations ))
96+ combinations
97+ }
98+
99+
100+ # ' @export
101+ print.check_group_variation <- function (x , ... ) {
102+ if (insight :: n_unique(x $ group ) > 1L ) {
103+ cat(insight :: export_table(x , caption = c(" Check group variation" , " blue" ), by = " group" ))
104+ } else {
105+ x_new <- x
106+ x_new $ group <- NULL
107+ cat(insight :: export_table(x_new , caption = c(sprintf(" Check %s variation" , x $ group [1 ]), " blue" )))
117108 }
118109
119- class(out ) <- c(" check_group_variation" , class(out ))
110+ return (invisible (x ))
111+ }
112+
113+
114+ # utils -------------------------------------------------------------
120115
121- out
116+ .check_nested <- function (data , by , predictor , ... ) {
117+ UseMethod(" .check_nested" , data [[predictor ]])
122118}
123119
120+ .check_nested.numeric <- function (data , by , predictor , tolerance = 1e-05 , ... ) {
121+ # demean predictor
122+ d <- datawizard :: demean(data , select = predictor , by = by , verbose = FALSE , add_attributes = FALSE )
124123
125- # ' @export
126- print.check_group_variation <- function (x , ... ) {
127- out <- as.data.frame(lapply(as.data.frame(do.call(cbind , x )), function (i ) {
128- i [duplicated(i )] <- NA_character_
129- i
130- }))
131- cat(insight :: export_table(out , caption = c(" Check group variation" , " blue" )))
124+ # get new names
125+ within_name <- paste0(predictor , " _within" )
126+ between_name <- paste0(predictor , " _between" )
127+
128+ is_between <- var(d [[between_name ]], na.rm = TRUE ) > tolerance
129+ is_within <- var(d [[within_name ]], na.rm = TRUE ) > tolerance
130+ is_both <- is_between && is_within
131+
132+ if (is_both ) return (" both" )
133+ if (is_between ) return (" between" )
134+ if (is_within ) return (" within" )
135+ NULL
132136}
137+
138+ .check_nested.default <- function (data , by , predictor , only_balanced = TRUE , ... ) {
139+ f <- data [[by ]]
140+ variable <- data [[predictor ]]
141+
142+ oo <- complete.cases(f , variable )
143+ f <- as.factor(f [oo ])
144+ variable <- variable [oo ]
145+
146+ n_uniques <- tapply(variable , f , insight :: n_unique )
147+ is_between <- all(n_uniques == 1L )
148+ if (is_between ) return (" between" )
149+
150+ if (! insight :: has_single_value(is_between )) return (" both" )
151+
152+ variable_levels <- unique(variable )
153+ has_all <- tapply(variable , f , function (v ) all(variable_levels %in% v ))
154+ if (! all(has_all )) return (" both" )
155+
156+ if (only_balanced ) {
157+ tab <- table(variable , f )
158+ is_balanced <- all(apply(tab , 2 , insight :: has_single_value ))
159+ if (! is_balanced ) {
160+ return (" both" )
161+ }
162+ }
163+ return (" within" )
164+ }
0 commit comments