|
32 | 32 | #' used for large numbers. If `NULL` (default), a big mark is added automatically for |
33 | 33 | #' large numbers (i.e. numbers with more than 5 digits). If you want to remove |
34 | 34 | #' the big mark, set `big_mark = ""`. |
35 | | -#' @param simplify Logical, if `TRUE`, the returned table is simplified to a |
36 | | -#' single table object if there is only one frequency or contingency table |
37 | | -#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a |
38 | | -#' list of tables is returned. This is only relevant for the `as.table()` |
39 | | -#' methods. To ensure consistent output, the default is `FALSE`. |
40 | 35 | #' @param object An object returned by `data_tabulate()`. |
41 | 36 | #' @param format String, indicating the output format. Can be `"markdown"` |
42 | 37 | #' `"html"`, or `"tt"`. `format = "html"` create an HTML table using the *gt* |
|
52 | 47 | #' data frame. The structure of the returned object is a nested data frame, |
53 | 48 | #' where the first column contains name of the variable for which frequencies |
54 | 49 | #' were calculated, and the second column is a list column that contains the |
55 | | -#' frequency tables as data frame. See 'Examples'. |
| 50 | +#' frequency tables as data frame. See [as.table.datawizard_table]. |
56 | 51 | #' |
57 | 52 | #' There is also an `as.table()` method, which returns a table object with the |
58 | 53 | #' frequencies of the variable. This is useful for further statistical analysis, |
59 | | -#' e.g. for using `chisq.test()` on the frequency table. See 'Examples'. |
| 54 | +#' e.g. for using `chisq.test()` on the frequency table. See |
| 55 | +#' [as.table.datawizard_table]. |
60 | 56 | #' |
61 | 57 | #' @section Crosstables: |
62 | 58 | #' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>` |
|
77 | 73 | #' @return A data frame, or a list of data frames, with one frequency table |
78 | 74 | #' as data frame per variable. |
79 | 75 | #' |
| 76 | +#' @seealso [as.prop.table] |
| 77 | +#' |
80 | 78 | #' @examplesIf requireNamespace("poorman") |
81 | 79 | #' # frequency tables ------- |
82 | 80 | #' # ------------------------ |
|
139 | 137 | #' # round percentages |
140 | 138 | #' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column") |
141 | 139 | #' print(out, digits = 0) |
142 | | -#' |
143 | | -#' # coerce to data frames |
144 | | -#' result <- data_tabulate(efc, "c172code", by = "e16sex") |
145 | | -#' as.data.frame(result) |
146 | | -#' as.data.frame(result)$table |
147 | | -#' as.data.frame(result, add_total = TRUE)$table |
148 | | -#' |
149 | | -#' # post-processing ------ |
150 | | -#' # ---------------------- |
151 | | -#' |
152 | | -#' out <- data_tabulate(efc, "c172code", by = "e16sex") |
153 | | -#' # we need to simplify the output, else we get a list of tables |
154 | | -#' suppressWarnings(chisq.test(as.table(out, simplify = TRUE))) |
155 | | -#' |
156 | | -#' # apply chisq.test to each table |
157 | | -#' out <- data_tabulate(efc, c("c172code", "e16sex")) |
158 | | -#' suppressWarnings(lapply(as.table(out), chisq.test)) |
159 | | -#' |
160 | | -#' # can also handle grouped data frames |
161 | | -#' d <- data_group(mtcars, "am") |
162 | | -#' x <- data_tabulate(d, "cyl", by = "gear") |
163 | | -#' as.table(x) |
164 | 140 | #' @export |
165 | 141 | data_tabulate <- function(x, ...) { |
166 | 142 | UseMethod("data_tabulate") |
@@ -464,10 +440,147 @@ insight::print_md |
464 | 440 | #' @export |
465 | 441 | insight::display |
466 | 442 |
|
| 443 | +#' Convert a crosstable to a frequency or a propensity table |
| 444 | +#' |
| 445 | +#' @description |
| 446 | +#' `as.prop.table()` is an S3 generic. It can be used on objects of class |
| 447 | +#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the |
| 448 | +#' arguments `by` and `proportions`. |
| 449 | +#' |
| 450 | +#' @param x An object created by `data_tabulate()`. It must be of class |
| 451 | +#' `datawizard_crosstab` for `as.prop.table()`. |
| 452 | +#' @param simplify Logical, if `TRUE`, the returned table is simplified to a |
| 453 | +#' single table object if there is only one frequency or contingency table |
| 454 | +#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a |
| 455 | +#' list of tables is returned. This is only relevant for the `as.table()` |
| 456 | +#' methods. To ensure consistent output, the default is `FALSE`. |
| 457 | +#' @inheritParams data_tabulate |
| 458 | +#' |
| 459 | +#' @export |
| 460 | +#' @seealso [data_tabulate] |
| 461 | +#' |
| 462 | +#' @examples |
| 463 | +#' data(efc) |
| 464 | +#' |
| 465 | +#' # Some cross tabulation |
| 466 | +#' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") |
| 467 | +#' cross |
| 468 | +#' |
| 469 | +#' # Convert to a propensity table |
| 470 | +#' as.prop.table(cross) |
| 471 | +#' |
| 472 | +#' # Convert to data.frame |
| 473 | +#' result <- data_tabulate(efc, "c172code", by = "e16sex") |
| 474 | +#' as.data.frame(result) |
| 475 | +#' as.data.frame(result)$table |
| 476 | +#' as.data.frame(result, add_total = TRUE)$table |
| 477 | +#' |
| 478 | +#' # Convert to a table that can be passed to chisq.test() |
| 479 | +#' |
| 480 | +#' out <- data_tabulate(efc, "c172code", by = "e16sex") |
| 481 | +#' # we need to simplify the output, else we get a list of tables |
| 482 | +#' tbl <- as.table(out, simplify = TRUE) |
| 483 | +#' tbl |
| 484 | +#' suppressWarnings(chisq.test(tbl)) |
| 485 | +#' |
| 486 | +#' # apply chisq.test to each table |
| 487 | +#' out <- data_tabulate(efc, c("c172code", "e16sex")) |
| 488 | +#' suppressWarnings(lapply(as.table(out), chisq.test)) |
| 489 | +#' |
| 490 | +#' # can also handle grouped data frames |
| 491 | +#' d <- data_group(mtcars, "am") |
| 492 | +#' x <- data_tabulate(d, "cyl", by = "gear") |
| 493 | +#' as.table(x) |
| 494 | +as.prop.table <- function(x, ...) { |
| 495 | + UseMethod("as.prop.table") |
| 496 | +} |
| 497 | + |
| 498 | +#' @rdname as.prop.table |
| 499 | +#' @export |
| 500 | +as.prop.table.datawizard_crosstab <- function( |
| 501 | + x, |
| 502 | + remove_na = TRUE, |
| 503 | + simplify = FALSE, |
| 504 | + verbose = TRUE, |
| 505 | + ... |
| 506 | +) { |
| 507 | + # sanity check - the `.data.frame` method returns a list, but not the |
| 508 | + # default method |
| 509 | + if (!is.data.frame(x)) { |
| 510 | + x <- x[[1]] |
| 511 | + } |
| 512 | + prop_table <- attributes(x)$prop_table |
| 513 | + |
| 514 | + if (is.null(prop_table)) { |
| 515 | + insight::format_warning("No proportions available.") |
| 516 | + return(NULL) |
| 517 | + } |
| 518 | + |
| 519 | + if (remove_na) { |
| 520 | + if ( |
| 521 | + verbose && |
| 522 | + ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) |
| 523 | + ) { |
| 524 | + insight::format_alert("Removing NA values from frequency table.") |
| 525 | + } |
| 526 | + if (!is.null(prop_table[["NA"]])) { |
| 527 | + prop_table[["NA"]] <- NULL |
| 528 | + } |
| 529 | + if ("NA" %in% rownames(prop_table)) { |
| 530 | + prop_table <- prop_table[rownames(prop_table) != "NA", ] |
| 531 | + } |
| 532 | + } |
| 533 | + # coerce to table |
| 534 | + result <- as.table(as.matrix(prop_table)) |
| 535 | + # if we don't want to simplify the table, we wrap it into a list |
| 536 | + if (!simplify) { |
| 537 | + result <- list(result) |
| 538 | + } |
| 539 | + |
| 540 | + result |
| 541 | +} |
| 542 | + |
| 543 | +#' @export |
| 544 | +as.prop.table.datawizard_crosstabs <- function( |
| 545 | + x, |
| 546 | + remove_na = TRUE, |
| 547 | + simplify = FALSE, |
| 548 | + verbose = TRUE, |
| 549 | + ... |
| 550 | +) { |
| 551 | + # only show message once we set `verbose = FALSE` in the lapply() |
| 552 | + if (remove_na && verbose) { |
| 553 | + prop_table <- attributes(x[[1]])$prop_table |
| 554 | + if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) { |
| 555 | + insight::format_alert("Removing NA values from frequency table.") |
| 556 | + } |
| 557 | + } |
| 558 | + |
| 559 | + out <- insight::compact_list(lapply( |
| 560 | + x, |
| 561 | + as.prop.table.datawizard_crosstab, |
| 562 | + remove_na = remove_na, |
| 563 | + simplify = TRUE, |
| 564 | + verbose = FALSE, |
| 565 | + ... |
| 566 | + )) |
| 567 | + |
| 568 | + # if no proportions found, return NULL |
| 569 | + if (!length(out)) { |
| 570 | + return(NULL) |
| 571 | + } |
| 572 | + |
| 573 | + # if only one table is returned, "unlist" |
| 574 | + if (length(out) == 1 && simplify) { |
| 575 | + out <- out[[1]] |
| 576 | + } |
| 577 | + out |
| 578 | +} |
| 579 | + |
467 | 580 |
|
468 | 581 | # as.data.frame -------------------- |
469 | 582 |
|
470 | | -#' @rdname data_tabulate |
| 583 | +#' @rdname as.prop.table |
471 | 584 | #' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and |
472 | 585 | #' column with the total N values are added to the data frame. `add_total` has |
473 | 586 | #' no effect in `as.data.frame()` for simple frequency tables. |
@@ -527,7 +640,7 @@ as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables |
527 | 640 |
|
528 | 641 | # as.table -------------------- |
529 | 642 |
|
530 | | -#' @rdname data_tabulate |
| 643 | +#' @rdname as.prop.table |
531 | 644 | #' @export |
532 | 645 | as.table.datawizard_table <- function( |
533 | 646 | x, |
|
0 commit comments