From 7789439f23b99fbfccf41e27fd061fc2d77d129c Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 18 Sep 2025 09:50:32 +0200 Subject: [PATCH 01/33] Extracting proportions from `data_tabulate()` Fixes #655 --- R/data_xtabulate.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 75a03ae72..67512bae3 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -80,6 +80,7 @@ attr(out, "proportions") <- proportions attr(out, "varname") <- obj_name attr(out, "grouped_df") <- !is.null(group_variable) + attr(out, "prop_table") <- .prop_table(out) class(out) <- c("datawizard_crosstab", "data.frame") @@ -87,6 +88,36 @@ } +.prop_table <- function(x) { + proportions <- attributes(x)$proportions + out <- NULL + if (!is.null(proportions)) { + # find numeric columns, only for these we need row/column sums + numeric_columns <- vapply(x, is.numeric, logical(1)) + total_n <- attributes(x)$total_n + + out <- switch( + proportions, + row = lapply(seq_len(nrow(x)), function(i) { + row_sum <- sum(x[i, numeric_columns], na.rm = TRUE) + x[i, numeric_columns] / row_sum + }), + column = lapply(seq_len(ncol(x))[numeric_columns], function(i) { + col_sum <- sum(x[, i], na.rm = TRUE) + x[, i] / col_sum + }), + full = lapply(seq_len(ncol(x))[numeric_columns], function(i) { + x[, i] / total_n + }), + ) + } + out <- as.data.frame(do.call(rbind, out)) + colnames(out) <- colnames(x)[numeric_columns] + rownames(out) <- x[[1]] + out +} + + # methods --------------------- From 4da8f2086f458da9ecb06c8f8511575f074f2e4b Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 1 Oct 2025 16:14:31 +0200 Subject: [PATCH 02/33] DRY --- R/data_xtabulate.R | 52 +++++++++------------------------------------- 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 67512bae3..eb4be18c2 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -113,7 +113,7 @@ } out <- as.data.frame(do.call(rbind, out)) colnames(out) <- colnames(x)[numeric_columns] - rownames(out) <- x[[1]] + rownames(out) <- rownames(x) out } @@ -133,51 +133,19 @@ format.datawizard_crosstab <- function(x, x <- as.data.frame(x) # find numeric columns, only for these we need row/column sums - numeric_columns <- vapply(x, is.numeric, logical(1)) - - # compute total N for rows and columns - total_n <- attributes(x)$total_n - total_column <- rowSums(x[numeric_columns], na.rm = TRUE) - total_row <- c(colSums(x[numeric_columns], na.rm = TRUE), total_n) + numeric_columns <- which(vapply(x, is.numeric, logical(1))) # proportions? props <- attributes(x)$proportions - - if (!is.null(props)) { - # we copy x to tmp, because when we create strings with "sprintf()", the - # variable is coerced to character, and in subsequent iterations of the loop, - # mathemathical operations are not possible anymore - tmp <- x - if (identical(props, "row")) { - for (i in seq_len(nrow(x))) { - row_sum <- sum(x[i, numeric_columns], na.rm = TRUE) - if (row_sum == 0) { - row_sum_string <- "(0%)" - } else { - row_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[i, numeric_columns] / row_sum) - } - tmp[i, numeric_columns] <- paste(format(x[i, numeric_columns]), format(row_sum_string, justify = "right")) - } - } else if (identical(props, "column")) { - for (i in seq_len(ncol(x))[numeric_columns]) { - col_sum <- sum(x[, i], na.rm = TRUE) - if (col_sum == 0) { - col_sum_string <- "(0%)" - } else { - col_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[, i] / col_sum) - } - tmp[, i] <- paste(format(x[, i]), format(col_sum_string, justify = "right")) - } - } else if (identical(props, "full")) { - for (i in seq_len(ncol(x))[numeric_columns]) { - tmp[, i] <- paste( - format(x[, i]), - format(sprintf("(%.*f%%)", digits, 100 * x[, i] / total_n), justify = "right") - ) - } + prop_table <- attributes(x)$prop_table + + if (!is.null(props) && !is.null(prop_table)) { + for (i in seq_len(ncol(prop_table))) { + x[, numeric_columns[i]] <- paste( + format(x[, numeric_columns[i]]), + format(sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), justify = "right") + ) } - # copy back final result - x <- tmp } x[] <- lapply(x, as.character) From c2dabec3773abe2902229947fe8768388fca727c Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 1 Oct 2025 16:34:31 +0200 Subject: [PATCH 03/33] fix --- R/data_xtabulate.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index eb4be18c2..c216c67eb 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -111,9 +111,13 @@ }), ) } - out <- as.data.frame(do.call(rbind, out)) - colnames(out) <- colnames(x)[numeric_columns] - rownames(out) <- rownames(x) + if (!is.null(out)) { + out <- as.data.frame(do.call(rbind, out)) + colnames(out) <- colnames(x)[numeric_columns] + if (nrow(out) == nrow(x)) { + rownames(out) <- rownames(x) + } + } out } @@ -133,7 +137,12 @@ format.datawizard_crosstab <- function(x, x <- as.data.frame(x) # find numeric columns, only for these we need row/column sums - numeric_columns <- which(vapply(x, is.numeric, logical(1))) + numeric_columns <- vapply(x, is.numeric, logical(1)) + + # compute total N for rows and columns + total_n <- attributes(x)$total_n + total_column <- rowSums(x[numeric_columns], na.rm = TRUE) + total_row <- c(colSums(x[numeric_columns], na.rm = TRUE), total_n) # proportions? props <- attributes(x)$proportions From 0482a2d4f3791a4ff45dbcddafdcb768cdaa7648 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 1 Oct 2025 16:47:46 +0200 Subject: [PATCH 04/33] fix --- R/data_xtabulate.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index c216c67eb..044428df8 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -112,7 +112,7 @@ ) } if (!is.null(out)) { - out <- as.data.frame(do.call(rbind, out)) + out <- as.data.frame(t(do.call(rbind, out))) colnames(out) <- colnames(x)[numeric_columns] if (nrow(out) == nrow(x)) { rownames(out) <- rownames(x) @@ -137,7 +137,7 @@ format.datawizard_crosstab <- function(x, x <- as.data.frame(x) # find numeric columns, only for these we need row/column sums - numeric_columns <- vapply(x, is.numeric, logical(1)) + numeric_columns <- which(vapply(x, is.numeric, logical(1))) # compute total N for rows and columns total_n <- attributes(x)$total_n From 99bd754c36a502d0ef59f9e3e30b27f28c923417 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:13:55 +0200 Subject: [PATCH 05/33] lintr --- R/data_xtabulate.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 044428df8..baec5cc7a 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -89,15 +89,15 @@ .prop_table <- function(x) { - proportions <- attributes(x)$proportions + props <- attributes(x)$props out <- NULL - if (!is.null(proportions)) { + if (!is.null(props)) { # find numeric columns, only for these we need row/column sums numeric_columns <- vapply(x, is.numeric, logical(1)) total_n <- attributes(x)$total_n out <- switch( - proportions, + props, row = lapply(seq_len(nrow(x)), function(i) { row_sum <- sum(x[i, numeric_columns], na.rm = TRUE) x[i, numeric_columns] / row_sum From 5b4add7965c8f37575aa9a11004f5c94df37278a Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:14:47 +0200 Subject: [PATCH 06/33] too much lintr --- R/data_xtabulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index baec5cc7a..f6badecae 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -89,7 +89,7 @@ .prop_table <- function(x) { - props <- attributes(x)$props + props <- attributes(x)$proportions out <- NULL if (!is.null(props)) { # find numeric columns, only for these we need row/column sums From 5f44ce172295360b0d13421027076c4e7ba1871e Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:18:35 +0200 Subject: [PATCH 07/33] fix --- R/data_xtabulate.R | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index f6badecae..2b9241627 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -100,19 +100,31 @@ props, row = lapply(seq_len(nrow(x)), function(i) { row_sum <- sum(x[i, numeric_columns], na.rm = TRUE) - x[i, numeric_columns] / row_sum + if (row_sum == 0) { + rep(0, sum(numeric_columns)) + } else { + x[i, numeric_columns] / row_sum + } }), column = lapply(seq_len(ncol(x))[numeric_columns], function(i) { col_sum <- sum(x[, i], na.rm = TRUE) - x[, i] / col_sum + if (col_sum == 0) { + rep(0, nrow(x)) + } else { + x[, i] / col_sum + } }), full = lapply(seq_len(ncol(x))[numeric_columns], function(i) { - x[, i] / total_n + if (total_n == 0) { + rep(0, nrow(x)) + } else { + x[, i] / total_n + } }), ) } if (!is.null(out)) { - out <- as.data.frame(t(do.call(rbind, out))) + out <- as.data.frame(do.call(cbind, out)) colnames(out) <- colnames(x)[numeric_columns] if (nrow(out) == nrow(x)) { rownames(out) <- rownames(x) From 28a7f69e70b1ad19eea56f2070bfa4b10912d8d0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:24:27 +0200 Subject: [PATCH 08/33] row names --- R/data_xtabulate.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 2b9241627..77fd9499a 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -127,7 +127,17 @@ out <- as.data.frame(do.call(cbind, out)) colnames(out) <- colnames(x)[numeric_columns] if (nrow(out) == nrow(x)) { - rownames(out) <- rownames(x) + # if we have variable labels for the first column, we use them as row names + # this helps the user identify NA rows when the "prop_table" attribute is + # extracted + if (isFALSE(numeric_columns[1])) { + r_names <- x[[1]] + r_names <- as.character(r_names) + r_names[is.na(r_names)] <- "" + rownames(out) <- r_names + } else { + rownames(out) <- rownames(x) + } } } out From 761f3917ba218001aaf3bc670b64ebd5c5d5582f Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:32:11 +0200 Subject: [PATCH 09/33] add as.prop.table method --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ R/data_tabulate.R | 61 ++++++++++++++++++++++++++++++++++++ R/data_xtabulate.R | 2 +- man/adjust.Rd | 2 +- man/contr.deviation.Rd | 2 +- man/data_group.Rd | 2 +- man/data_tabulate.Rd | 11 ++++++- man/data_to_long.Rd | 2 +- man/data_to_wide.Rd | 2 +- man/describe_distribution.Rd | 2 +- man/rescale_weights.Rd | 2 +- 12 files changed, 82 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 419b5b7df..e01f37595 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -76,7 +76,7 @@ VignetteBuilder: Encoding: UTF-8 Language: en-US Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Config/testthat/edition: 3 Config/testthat/parallel: true Config/Needs/website: easystats/easystatstemplate diff --git a/NAMESPACE b/NAMESPACE index dfe242add..c836303d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -230,6 +230,8 @@ S3method(winsorize,factor) S3method(winsorize,logical) S3method(winsorize,numeric) export(adjust) +export(as.prop.table.datawizard_crosstab) +export(as.prop.table.datawizard_crosstabs) export(assign_labels) export(categorize) export(center) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 812e4eabb..b4239d505 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -58,6 +58,9 @@ #' frequencies of the variable. This is useful for further statistical analysis, #' e.g. for using `chisq.test()` on the frequency table. See 'Examples'. #' +#' Finally, the `as.prop.table()` method returns the proportions of the +#' crosstable as a table object, if `by` was supplied to `data_tabulate()`. +#' #' @section Crosstables: #' If `by` is supplied, a crosstable is created. The crosstable includes `` #' (missing) values by default. The first column indicates values of `x`, the @@ -623,6 +626,64 @@ as.table.datawizard_crosstabs <- function(x, remove_na = TRUE, simplify = FALSE, out } +#' @rdname data_tabulate +#' @export +as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) { + # sanity check - the `.data.frame` method returns a list, but not the + # default method + if (!is.data.frame(x)) { + x <- x[[1]] + } + prop_table <- attributes(x)$prop_table + + if (is.null(prop_table)) { + insight::format_warning("No proportions available.") + return(NULL) + } + + if (remove_na) { + if (verbose && ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table))) { + insight::format_alert("Removing NA values from frequency table.") + } + if (!is.null(prop_table[["NA"]])) { + prop_table[["NA"]] <- NULL + } + if ("NA" %in% rownames(prop_table)) { + prop_table <- prop_table[row_names != "NA", ] + } + } + # coerce to table + result <- as.table(as.matrix(prop_table)) + # if we don't want to simplify the table, we wrap it into a list + if (!simplify) { + result <- list(result) + } + + result +} + +#' @export +as.prop.table.datawizard_crosstabs <- function(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) { + # only show message once we set `verbose = FALSE` in the lapply() + if (remove_na && verbose) { + insight::format_alert("Removing NA values from frequency table.") + } + + out <- lapply( + x, + as.prop.table.datawizard_crosstab, + remove_na = remove_na, + simplify = TRUE, + verbose = FALSE, + ... + ) + # if only one table is returned, "unlist" + if (length(out) == 1 && simplify) { + out <- out[[1]] + } + out +} + .is_grouped_df_xtab <- function(x) { if (!is.data.frame(x)) { diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 77fd9499a..0d3c5354a 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -133,7 +133,7 @@ if (isFALSE(numeric_columns[1])) { r_names <- x[[1]] r_names <- as.character(r_names) - r_names[is.na(r_names)] <- "" + r_names[is.na(r_names)] <- "NA" rownames(out) <- r_names } else { rownames(out) <- rownames(x) diff --git a/man/adjust.Rd b/man/adjust.Rd index edc01ada2..1574636e6 100644 --- a/man/adjust.Rd +++ b/man/adjust.Rd @@ -128,7 +128,7 @@ Note that a regular correlation between two "adjusted" variables is equivalent to the partial correlation between them. } \examples{ -\dontshow{if (all(insight::check_if_installed(c("bayestestR", "rstanarm", "gamm4"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("bayestestR", "rstanarm", "gamm4"), quietly = TRUE))) withAutoprint(\{ # examplesIf} adjusted_all <- adjust(attitude) head(adjusted_all) adjusted_one <- adjust(attitude, effect = "complaints", select = "rating") diff --git a/man/contr.deviation.Rd b/man/contr.deviation.Rd index d9d378429..fa4e10bf7 100644 --- a/man/contr.deviation.Rd +++ b/man/contr.deviation.Rd @@ -49,7 +49,7 @@ the differences the \strong{A} and \strong{B} group means from the overall mean, respectively. } \examples{ -\dontshow{if (!identical(Sys.getenv("IN_PKGDOWN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (!identical(Sys.getenv("IN_PKGDOWN"), "true")) withAutoprint(\{ # examplesIf} \donttest{ data("mtcars") diff --git a/man/data_group.Rd b/man/data_group.Rd index bc26bf068..572ff8b3a 100644 --- a/man/data_group.Rd +++ b/man/data_group.Rd @@ -90,7 +90,7 @@ following the \strong{datawizard} function design. \code{data_ungroup()} removes grouping information from a grouped data frame. } \examples{ -\dontshow{if (requireNamespace("poorman")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("poorman")) withAutoprint(\{ # examplesIf} data(efc) suppressPackageStartupMessages(library(poorman, quietly = TRUE)) diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 9adb869a7..c2fa65ce6 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -6,6 +6,7 @@ \alias{data_tabulate.data.frame} \alias{as.data.frame.datawizard_tables} \alias{as.table.datawizard_table} +\alias{as.prop.table.datawizard_crosstab} \alias{print.datawizard_table} \alias{display.datawizard_table} \title{Create frequency and crosstables of variables} @@ -51,6 +52,14 @@ data_tabulate(x, ...) \method{as.table}{datawizard_table}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) +as.prop.table.datawizard_crosstab( + x, + remove_na = TRUE, + simplify = FALSE, + verbose = TRUE, + ... +) + \method{print}{datawizard_table}(x, big_mark = NULL, ...) \method{display}{datawizard_table}(object, big_mark = NULL, format = "markdown", ...) @@ -223,7 +232,7 @@ for the full table. } \examples{ -\dontshow{if (requireNamespace("poorman")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("poorman")) withAutoprint(\{ # examplesIf} # frequency tables ------- # ------------------------ data(efc) diff --git a/man/data_to_long.Rd b/man/data_to_long.Rd index b3dd9c08d..697ae9af0 100644 --- a/man/data_to_long.Rd +++ b/man/data_to_long.Rd @@ -159,7 +159,7 @@ names, that identify the source of the gathered values, stored in one or more new columns (\code{names_to}). } \examples{ -\dontshow{if (all(insight::check_if_installed(c("psych", "tidyr"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("psych", "tidyr"), quietly = TRUE))) withAutoprint(\{ # examplesIf} wide_data <- setNames( data.frame(replicate(2, rnorm(8))), c("Time1", "Time2") diff --git a/man/data_to_wide.Rd b/man/data_to_wide.Rd index 8281dcce0..7389b4927 100644 --- a/man/data_to_wide.Rd +++ b/man/data_to_wide.Rd @@ -124,7 +124,7 @@ saved into the column \code{values_from} will be spread into new columns, which will be named after the values in \code{names_from}. See also 'Examples'. } \examples{ -\dontshow{if (requireNamespace("lme4", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("lme4", quietly = TRUE)) withAutoprint(\{ # examplesIf} data_long <- read.table(header = TRUE, text = " subject sex condition measurement 1 M control 7.9 diff --git a/man/describe_distribution.Rd b/man/describe_distribution.Rd index a8cdca879..3887404e1 100644 --- a/man/describe_distribution.Rd +++ b/man/describe_distribution.Rd @@ -171,7 +171,7 @@ There is also a implemented in the \href{https://easystats.github.io/see/}{\strong{see}-package}. } \examples{ -\dontshow{if (require("bayestestR", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("bayestestR", quietly = TRUE)) withAutoprint(\{ # examplesIf} describe_distribution(rnorm(100)) data(iris) diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd index b2a329a3e..ce7039827 100644 --- a/man/rescale_weights.Rd +++ b/man/rescale_weights.Rd @@ -101,7 +101,7 @@ standard survey-design. } } \examples{ -\dontshow{if (all(insight::check_if_installed(c("lme4", "parameters"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("lme4", "parameters"), quietly = TRUE))) withAutoprint(\{ # examplesIf} data(nhanes_sample) head(rescale_weights(nhanes_sample, "WTINT2YR", "SDMVSTRA")) From ab1c130c7fd7c2bbbef6a414631b7e4437d4b637 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:35:26 +0200 Subject: [PATCH 10/33] news, desc --- DESCRIPTION | 2 +- NEWS.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e01f37595..92571a5bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 1.2.0.8 +Version: 1.2.0.9 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531")), diff --git a/NEWS.md b/NEWS.md index 0267ea254..ffaece691 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,9 @@ BREAKING CHANGES CHANGES +* `data_tabulate()` now saves the table of proportions for crosstables as + attribute, accessible via the new `as.prop.table()` method. + * Due to changes in the package `insight`, `data_tabulate()` no longer prints decimals when all values in a column are integers (#641). From 6f466d5bb0977242c40dc274295dfbc31ae87743 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:35:44 +0200 Subject: [PATCH 11/33] PR number --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ffaece691..e8417386f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,7 +11,7 @@ BREAKING CHANGES CHANGES * `data_tabulate()` now saves the table of proportions for crosstables as - attribute, accessible via the new `as.prop.table()` method. + attribute, accessible via the new `as.prop.table()` method (#656). * Due to changes in the package `insight`, `data_tabulate()` no longer prints decimals when all values in a column are integers (#641). From 71fc12d095bebd3d2a2946469b5af1af46119b89 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:45:00 +0200 Subject: [PATCH 12/33] fixed warning --- R/utils.R | 2 +- man/data_tabulate.Rd | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 97f6d922b..d881597ca 100644 --- a/R/utils.R +++ b/R/utils.R @@ -224,7 +224,7 @@ } -#' Taken from https://github.com/coolbutuseless/gluestick [licence: MIT] +#' Taken from https://github.com/coolbutuseless/gluestick (licence: MIT) #' Same functionality as `{glue}` #' #' @noRd diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index c2fa65ce6..edd7d8e74 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -212,6 +212,9 @@ frequency tables as data frame. See 'Examples'. There is also an \code{as.table()} method, which returns a table object with the frequencies of the variable. This is useful for further statistical analysis, e.g. for using \code{chisq.test()} on the frequency table. See 'Examples'. + +Finally, the \code{as.prop.table()} method returns the proportions of the +crosstable as a table object, if \code{by} was supplied to \code{data_tabulate()}. } \note{ There are \code{print_html()} and \code{print_md()} methods available for printing From 3b9d655eea138c1a31638e28d83b918c6b52f951 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:46:23 +0200 Subject: [PATCH 13/33] declare method --- NAMESPACE | 5 +++-- R/data_tabulate.R | 6 ++++++ man/data_tabulate.Rd | 11 ++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c836303d8..f10b5720d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,8 @@ S3method(as.double,parameters_smoothness) S3method(as.numeric,parameters_kurtosis) S3method(as.numeric,parameters_skewness) S3method(as.numeric,parameters_smoothness) +S3method(as.prop.table,datawizard_crosstab) +S3method(as.prop.table,datawizard_crosstabs) S3method(as.table,datawizard_crosstab) S3method(as.table,datawizard_crosstabs) S3method(as.table,datawizard_table) @@ -230,8 +232,7 @@ S3method(winsorize,factor) S3method(winsorize,logical) S3method(winsorize,numeric) export(adjust) -export(as.prop.table.datawizard_crosstab) -export(as.prop.table.datawizard_crosstabs) +export(as.prop.table) export(assign_labels) export(categorize) export(center) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index b4239d505..82b71a575 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -626,6 +626,12 @@ as.table.datawizard_crosstabs <- function(x, remove_na = TRUE, simplify = FALSE, out } +#' @rdname data_tabulate +#' @export +as.prop.table <- function(x, ...) { + UseMethod("as.prop.table") +} + #' @rdname data_tabulate #' @export as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) { diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index edd7d8e74..4c2a1ee65 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -6,6 +6,7 @@ \alias{data_tabulate.data.frame} \alias{as.data.frame.datawizard_tables} \alias{as.table.datawizard_table} +\alias{as.prop.table} \alias{as.prop.table.datawizard_crosstab} \alias{print.datawizard_table} \alias{display.datawizard_table} @@ -52,13 +53,9 @@ data_tabulate(x, ...) \method{as.table}{datawizard_table}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) -as.prop.table.datawizard_crosstab( - x, - remove_na = TRUE, - simplify = FALSE, - verbose = TRUE, - ... -) +as.prop.table(x, ...) + +\method{as.prop.table}{datawizard_crosstab}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) \method{print}{datawizard_table}(x, big_mark = NULL, ...) From c8b3ed1bc04b2caa0c83b9529c2b2ab4b7ec945a Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:48:57 +0200 Subject: [PATCH 14/33] fix msg --- R/data_tabulate.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 82b71a575..409af216b 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -672,7 +672,10 @@ as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FA as.prop.table.datawizard_crosstabs <- function(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) { # only show message once we set `verbose = FALSE` in the lapply() if (remove_na && verbose) { - insight::format_alert("Removing NA values from frequency table.") + prop_table <- attributes(x[[1]])$prop_table + if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) { + insight::format_alert("Removing NA values from frequency table.") + } } out <- lapply( From e3218e62fa6cfb31acdeb5d69edd0db578f4707d Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:54:37 +0200 Subject: [PATCH 15/33] fix --- R/data_tabulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 409af216b..9a7cafb09 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -655,7 +655,7 @@ as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FA prop_table[["NA"]] <- NULL } if ("NA" %in% rownames(prop_table)) { - prop_table <- prop_table[row_names != "NA", ] + prop_table <- prop_table[rownames(prop_table) != "NA", ] } } # coerce to table From ea96216a9cee3593ace66fdde3f3aa05d325950a Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 09:59:58 +0200 Subject: [PATCH 16/33] fix --- R/data_tabulate.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 9a7cafb09..f34851b5d 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -678,14 +678,20 @@ as.prop.table.datawizard_crosstabs <- function(x, remove_na = TRUE, simplify = F } } - out <- lapply( + out <- insight::compact_list(lapply( x, as.prop.table.datawizard_crosstab, remove_na = remove_na, simplify = TRUE, verbose = FALSE, ... - ) + )) + + # if no proportions found, return NULL + if (!length(out)) { + return(NULL) + } + # if only one table is returned, "unlist" if (length(out) == 1 && simplify) { out <- out[[1]] From 9e2e604f65a92c25b1fd9d93c64b6f41d731ed9e Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 11:11:00 +0200 Subject: [PATCH 17/33] fix printing --- R/data_xtabulate.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 0d3c5354a..4dff702e5 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -146,7 +146,6 @@ # methods --------------------- - #' @export format.datawizard_crosstab <- function(x, format = "text", @@ -174,7 +173,14 @@ format.datawizard_crosstab <- function(x, for (i in seq_len(ncol(prop_table))) { x[, numeric_columns[i]] <- paste( format(x[, numeric_columns[i]]), - format(sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), justify = "right") + ifelse( + prop_table[, i] == 0, + "(0 %)", + format( + sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), + justify = "right" + ) + ) ) } } @@ -219,7 +225,11 @@ format.datawizard_crosstab <- function(x, ftab[nrow(ftab), ] <- .add_commas_in_numbers(ftab[nrow(ftab), ], big_mark) # also format NA column name - colnames(ftab)[colnames(ftab) == "NA"] <- ifelse(identical(format, "text"), "", "(NA)") + colnames(ftab)[colnames(ftab) == "NA"] <- ifelse( + identical(format, "text"), + "", + "(NA)" + ) ftab } From d20a50cf75a016665daefdd2396336ef2f4ba837 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 11:39:07 +0200 Subject: [PATCH 18/33] fix --- R/data_xtabulate.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 4dff702e5..91c052a3b 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -101,7 +101,9 @@ row = lapply(seq_len(nrow(x)), function(i) { row_sum <- sum(x[i, numeric_columns], na.rm = TRUE) if (row_sum == 0) { - rep(0, sum(numeric_columns)) + tmp <- as.data.frame(as.list(rep(0, sum(numeric_columns)))) + colnames(tmp) <- colnames(x)[numeric_columns] + tmp } else { x[i, numeric_columns] / row_sum } @@ -124,7 +126,12 @@ ) } if (!is.null(out)) { - out <- as.data.frame(do.call(cbind, out)) + # for rows, we need to rbind, for columns, we need to cbind + out <- switch( + props, + row = as.data.frame(do.call(rbind, out)), + as.data.frame(do.call(cbind, out)) + ) colnames(out) <- colnames(x)[numeric_columns] if (nrow(out) == nrow(x)) { # if we have variable labels for the first column, we use them as row names From c0eb952cff7e9b7844ba36ea69019f6dad1946e0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 2 Oct 2025 11:45:30 +0200 Subject: [PATCH 19/33] comment code --- R/data_xtabulate.R | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 91c052a3b..5ed4fac88 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -87,66 +87,90 @@ out } - +# Helper function to calculate a table of proportions from a frequency table .prop_table <- function(x) { + # Extract the "proportions" attribute, which determines the type of calculation (row, column, or full) props <- attributes(x)$proportions out <- NULL + + # Proceed only if the "proportions" attribute is set if (!is.null(props)) { - # find numeric columns, only for these we need row/column sums + # Identify numeric columns, as proportions are only calculated for these numeric_columns <- vapply(x, is.numeric, logical(1)) + # Get the total count from the attributes, used for "full" proportions total_n <- attributes(x)$total_n + # Use a switch to perform the calculation based on the "props" value out <- switch( props, + # Calculate row-wise proportions row = lapply(seq_len(nrow(x)), function(i) { + # Sum of the current row's numeric values row_sum <- sum(x[i, numeric_columns], na.rm = TRUE) + # Avoid division by zero; if row sum is 0, return a row of zeros if (row_sum == 0) { tmp <- as.data.frame(as.list(rep(0, sum(numeric_columns)))) + # for later rbind, we need identical column names colnames(tmp) <- colnames(x)[numeric_columns] tmp } else { x[i, numeric_columns] / row_sum } }), + # Calculate column-wise proportions column = lapply(seq_len(ncol(x))[numeric_columns], function(i) { + # Sum of the current column's values col_sum <- sum(x[, i], na.rm = TRUE) + # Avoid division by zero; if column sum is 0, return a vector of zeros if (col_sum == 0) { rep(0, nrow(x)) } else { x[, i] / col_sum } }), + # Calculate proportions relative to the total count of the entire table full = lapply(seq_len(ncol(x))[numeric_columns], function(i) { + # Avoid division by zero; if total is 0, return a vector of zeros if (total_n == 0) { rep(0, nrow(x)) } else { x[, i] / total_n } - }), + }) ) } + + # If a proportion table was calculated, format it into a data frame if (!is.null(out)) { - # for rows, we need to rbind, for columns, we need to cbind + # The output of the switch is a list. We need to bind it into a data frame. + # For row proportions, we bind rows. For column/full, we bind columns. out <- switch( props, row = as.data.frame(do.call(rbind, out)), as.data.frame(do.call(cbind, out)) ) + # Set the column names of the new proportion table colnames(out) <- colnames(x)[numeric_columns] + + # Check if the dimensions are consistent before setting row names if (nrow(out) == nrow(x)) { - # if we have variable labels for the first column, we use them as row names - # this helps the user identify NA rows when the "prop_table" attribute is - # extracted + # If the first column of the original data is not numeric, it's likely a + # label column. Use these labels as row names in the output for better + # readability. This is useful for identifying rows, especially when NAs + # are present. if (isFALSE(numeric_columns[1])) { r_names <- x[[1]] r_names <- as.character(r_names) + # Replace NA in labels with the string "NA", else we cannot set rownames r_names[is.na(r_names)] <- "NA" rownames(out) <- r_names } else { + # Otherwise, just use the original row names rownames(out) <- rownames(x) } } } + out } From ef5db64e1514648730e72bc55301501fb2e2ccba Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 11:25:24 +0200 Subject: [PATCH 20/33] fix --- R/data_xtabulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 5ed4fac88..b506e9450 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -206,7 +206,7 @@ format.datawizard_crosstab <- function(x, format(x[, numeric_columns[i]]), ifelse( prop_table[, i] == 0, - "(0 %)", + "(0%)", format( sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), justify = "right" From ae44f325f8ac6729c0fac7f7711f51bf77b54b84 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 17:15:21 +0200 Subject: [PATCH 21/33] fix --- R/data_xtabulate.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index b506e9450..1fe3439db 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -204,13 +204,13 @@ format.datawizard_crosstab <- function(x, for (i in seq_len(ncol(prop_table))) { x[, numeric_columns[i]] <- paste( format(x[, numeric_columns[i]]), - ifelse( - prop_table[, i] == 0, - "(0%)", - format( - sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), - justify = "right" - ) + format( + ifelse( + prop_table[, i] == 0, + "(0%)", + sprintf("(%.*f%%)", digits, 100 * prop_table[, i]) + ), + justify = "right" ) ) } From 3a08679126668a0e9379d96f92eeb07de379614e Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 17:22:02 +0200 Subject: [PATCH 22/33] lintr --- R/utils.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index d881597ca..cccb686f0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -148,7 +148,7 @@ for (nm in setdiff(names(custom_attr), names(attributes(data.frame())))) { attr(data, which = nm) <- custom_attr[[nm]] } - return(data) + data } @@ -269,15 +269,15 @@ # Evaluate if (eval) { - args <- lapply(exprs, function(expr) { + fun_args <- lapply(exprs, function(expr) { eval(parse(text = expr), envir = src) }) } else { - args <- unname(mget(exprs, envir = as.environment(src))) + fun_args <- unname(mget(exprs, envir = as.environment(src))) } # Create the string(s) - do.call(sprintf, c(list(fmt_sprintf), args)) + do.call(sprintf, c(list(fmt_sprintf), fun_args)) } From eacacc1b8ced3d618e32194d15d13b14aeadb721 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 17:31:56 +0200 Subject: [PATCH 23/33] fix --- R/data_xtabulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 1fe3439db..16db7210d 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -207,7 +207,7 @@ format.datawizard_crosstab <- function(x, format( ifelse( prop_table[, i] == 0, - "(0%)", + "(0.0%)", sprintf("(%.*f%%)", digits, 100 * prop_table[, i]) ), justify = "right" From cef721c7d69b86322d331fd4b0a6abe63aff7566 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 17:32:55 +0200 Subject: [PATCH 24/33] fix --- R/data_xtabulate.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 16db7210d..2504f7742 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -204,14 +204,7 @@ format.datawizard_crosstab <- function(x, for (i in seq_len(ncol(prop_table))) { x[, numeric_columns[i]] <- paste( format(x[, numeric_columns[i]]), - format( - ifelse( - prop_table[, i] == 0, - "(0.0%)", - sprintf("(%.*f%%)", digits, 100 * prop_table[, i]) - ), - justify = "right" - ) + format(sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), justify = "right") ) } } From af4a171f6113938aaea1f7cde27ade988434699d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 18:04:31 +0200 Subject: [PATCH 25/33] update snaps --- tests/testthat/_snaps/data_tabulate.md | 106 ++++++++++++------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/tests/testthat/_snaps/data_tabulate.md b/tests/testthat/_snaps/data_tabulate.md index c83b293a1..872b3764c 100644 --- a/tests/testthat/_snaps/data_tabulate.md +++ b/tests/testthat/_snaps/data_tabulate.md @@ -516,17 +516,17 @@ Code print(data_tabulate(efc, c("c172code", "e42dep"), by = "e16sex", proportions = "row")) Output - Variable | Value | male | female | | Total - ---------+-------+------------+------------+------------+------ - c172code | 1 | 5 (62.5%) | 2 (25.0%) | 1 (12.5%) | 8 - c172code | 2 | 31 (47.0%) | 33 (50.0%) | 2 (3.0%) | 66 - c172code | 3 | 4 (25.0%) | 11 (68.8%) | 1 (6.2%) | 16 - c172code | | 5 (50.0%) | 4 (40.0%) | 1 (10.0%) | 10 - e42dep | 1 | 2 (100.0%) | 0 (0.0%) | 0 (0.0%) | 2 - e42dep | 2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4 - e42dep | 3 | 8 (28.6%) | 18 (64.3%) | 2 (7.1%) | 28 - e42dep | 4 | 32 (50.8%) | 28 (44.4%) | 3 (4.8%) | 63 - e42dep | | 1 (33.3%) | 2 (66.7%) | 0 (0.0%) | 3 + Variable | Value | male | female | | Total + ---------+-------+-------------+------------+-----------+------ + c172code | 1 | 5 (62.5%) | 2 (25.0%) | 1 (12.5%) | 8 + c172code | 2 | 31 (47.0%) | 33 (50.0%) | 2 (3.0%) | 66 + c172code | 3 | 4 (25.0%) | 11 (68.8%) | 1 (6.2%) | 16 + c172code | | 5 (50.0%) | 4 (40.0%) | 1 (10.0%) | 10 + e42dep | 1 | 2 (100.0%) | 0 (0.0%) | 0 (0.0%) | 2 + e42dep | 2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4 + e42dep | 3 | 8 (28.6%) | 18 (64.3%) | 2 (7.1%) | 28 + e42dep | 4 | 32 (50.8%) | 28 (44.4%) | 3 (4.8%) | 63 + e42dep | | 1 (33.3%) | 2 (66.7%) | 0 (0.0%) | 3 # data_tabulate, cross tables, tinytable @@ -655,17 +655,17 @@ Output Grouped by e42dep (1) - Variable | Value | male | female | | Total - ---------+-------+------------+--------+------------+------ - c172code | 2 | 2 (100.0%) | | 0 (0.0%) | 2 - | | 0 (0%) | | 0 (0%) | 0 + Variable | Value | male | female | | Total + ---------+-------+------------+--------+----------+------ + c172code | 2 | 2 (100.0%) | | 0 (0.0%) | 2 + | | 0 (0.0%) | | 0 (0.0%) | 0 Grouped by e42dep (2) - Variable | Value | male | female | | Total - ---------+-------+-----------+-----------+-----------+------ - c172code | 2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4 - | | 0 (0%) | 0 (0%) | 0 (0%) | 0 + Variable | Value | male | female | | Total + ---------+-------+-----------+-----------+----------+------ + c172code | 2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) | 4 + | | 0 (0.0%) | 0 (0.0%) | 0 (0.0%) | 0 Grouped by e42dep (3) @@ -687,10 +687,10 @@ Grouped by e42dep (NA) - Variable | Value | male | female | | Total - ---------+-------+------------+------------+------------+------ - c172code | 2 | 0 (0.0%) | 2 (100.0%) | 0 (0.0%) | 2 - | | 1 (100.0%) | 0 (0.0%) | 0 (0.0%) | 1 + Variable | Value | male | female | | Total + ---------+-------+------------+------------+----------+------ + c172code | 2 | 0 (0.0%) | 2 (100.0%) | 0 (0.0%) | 2 + | | 1 (100.0%) | 0 (0.0%) | 0 (0.0%) | 1 # data_tabulate, cross tables, print/format works @@ -796,17 +796,17 @@ Output - |Variable | Value| male| female| (NA)| Total| - |:--------|-----:|----------:|----------:|----------:|-----:| - |c172code | 1| 5 (62.5%)| 2 (25.0%)| 1 (12.5%)| 8| - |c172code | 2| 31 (47.0%)| 33 (50.0%)| 2 (3.0%)| 66| - |c172code | 3| 4 (25.0%)| 11 (68.8%)| 1 (6.2%)| 16| - |c172code | (NA)| 5 (50.0%)| 4 (40.0%)| 1 (10.0%)| 10| - |e42dep | 1| 2 (100.0%)| 0 (0.0%)| 0 (0.0%)| 2| - |e42dep | 2| 2 (50.0%)| 2 (50.0%)| 0 (0.0%)| 4| - |e42dep | 3| 8 (28.6%)| 18 (64.3%)| 2 (7.1%)| 28| - |e42dep | 4| 32 (50.8%)| 28 (44.4%)| 3 (4.8%)| 63| - |e42dep | (NA)| 1 (33.3%)| 2 (66.7%)| 0 (0.0%)| 3| + |Variable | Value| male| female| (NA) | Total| + |:--------|-----:|-----------:|----------:|:---------|-----:| + |c172code | 1| 5 (62.5%)| 2 (25.0%)|1 (12.5%) | 8| + |c172code | 2| 31 (47.0%)| 33 (50.0%)|2 (3.0%) | 66| + |c172code | 3| 4 (25.0%)| 11 (68.8%)|1 (6.2%) | 16| + |c172code | (NA)| 5 (50.0%)| 4 (40.0%)|1 (10.0%) | 10| + |e42dep | 1| 2 (100.0%)| 0 (0.0%)| 0 (0.0%) | 2| + |e42dep | 2| 2 (50.0%)| 2 (50.0%)| 0 (0.0%) | 4| + |e42dep | 3| 8 (28.6%)| 18 (64.3%)| 2 (7.1%) | 28| + |e42dep | 4| 32 (50.8%)| 28 (44.4%)| 3 (4.8%) | 63| + |e42dep | (NA)| 1 (33.3%)| 2 (66.7%)| 0 (0.0%) | 3| --- @@ -831,31 +831,31 @@ Output - |Variable | Value| male| female| (NA)| Total| - |:--------|-----:|----------:|----------:|----------:|-----:| - |c172code | 1| 5 (62.5%)| 2 (25.0%)| 1 (12.5%)| 8| - |c172code | 2| 31 (47.0%)| 33 (50.0%)| 2 (3.0%)| 66| - |c172code | 3| 4 (25.0%)| 11 (68.8%)| 1 (6.2%)| 16| - |c172code | (NA)| 5 (50.0%)| 4 (40.0%)| 1 (10.0%)| 10| - |e42dep | 1| 2 (100.0%)| 0 (0.0%)| 0 (0.0%)| 2| - |e42dep | 2| 2 (50.0%)| 2 (50.0%)| 0 (0.0%)| 4| - |e42dep | 3| 8 (28.6%)| 18 (64.3%)| 2 (7.1%)| 28| - |e42dep | 4| 32 (50.8%)| 28 (44.4%)| 3 (4.8%)| 63| - |e42dep | (NA)| 1 (33.3%)| 2 (66.7%)| 0 (0.0%)| 3| + |Variable | Value| male| female| (NA) | Total| + |:--------|-----:|-----------:|----------:|:---------|-----:| + |c172code | 1| 5 (62.5%)| 2 (25.0%)|1 (12.5%) | 8| + |c172code | 2| 31 (47.0%)| 33 (50.0%)|2 (3.0%) | 66| + |c172code | 3| 4 (25.0%)| 11 (68.8%)|1 (6.2%) | 16| + |c172code | (NA)| 5 (50.0%)| 4 (40.0%)|1 (10.0%) | 10| + |e42dep | 1| 2 (100.0%)| 0 (0.0%)| 0 (0.0%) | 2| + |e42dep | 2| 2 (50.0%)| 2 (50.0%)| 0 (0.0%) | 4| + |e42dep | 3| 8 (28.6%)| 18 (64.3%)| 2 (7.1%) | 28| + |e42dep | 4| 32 (50.8%)| 28 (44.4%)| 3 (4.8%) | 63| + |e42dep | (NA)| 1 (33.3%)| 2 (66.7%)| 0 (0.0%) | 3| # data_tabulate, correct 0% for proportions Code print(out[[1]]) Output - c172code | male | female | | Total - ---------+------------+------------+--------+------ - 1 | 5 (10.9%) | 3 (5.6%) | 0 (0%) | 8 - 2 | 32 (69.6%) | 34 (63.0%) | 0 (0%) | 66 - 3 | 4 (8.7%) | 12 (22.2%) | 0 (0%) | 16 - | 5 (10.9%) | 5 (9.3%) | 0 (0%) | 10 - ---------+------------+------------+--------+------ - Total | 46 | 54 | 0 | 100 + c172code | male | female | | Total + ---------+------------+------------+----------+------ + 1 | 5 (10.9%) | 3 (5.6%) | 0 (0.0%) | 8 + 2 | 32 (69.6%) | 34 (63.0%) | 0 (0.0%) | 66 + 3 | 4 (8.7%) | 12 (22.2%) | 0 (0.0%) | 16 + | 5 (10.9%) | 5 (9.3%) | 0 (0.0%) | 10 + ---------+------------+------------+----------+------ + Total | 46 | 54 | 0 | 100 # data_tabulate, table methods From 4ff1f54d2c823737fc9a04b105f794639e62396f Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 3 Oct 2025 18:09:34 +0200 Subject: [PATCH 26/33] fix --- tests/testthat/test-data_tabulate.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R index 2896dbf4a..b0a7f199d 100644 --- a/tests/testthat/test-data_tabulate.R +++ b/tests/testthat/test-data_tabulate.R @@ -504,7 +504,10 @@ test_that("data_tabulate, validate against table", { test_that("data_tabulate, correct 0% for proportions", { data(efc, package = "datawizard") out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column") - expect_identical(format(out[[1]])[[4]], c("0 (0%)", "0 (0%)", "0 (0%)", "0 (0%)", "", "0")) + expect_identical( + format(out[[1]])[[4]], + c("0 (0.0%)", "0 (0.0%)", "0 (0.0%)", "0 (0.0%)", "", "0") + ) expect_snapshot(print(out[[1]])) }) From b37cec0cda499cfabf5d7178da1bbfdca75dd755 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 7 Oct 2025 14:09:28 +0200 Subject: [PATCH 27/33] add test --- tests/testthat/test-data_tabulate.R | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R index b0a7f199d..71769f251 100644 --- a/tests/testthat/test-data_tabulate.R +++ b/tests/testthat/test-data_tabulate.R @@ -682,3 +682,52 @@ test_that("data_tabulate, table methods, only warn if necessary", { d <- data_group(mtcars, "am") expect_silent(as.table(data_tabulate(d, "cyl", by = "gear"))) }) + + +test_that("data_tabulate, cross tables, extract proportions", { + data(efc, package = "datawizard") + out <- data_tabulate( + efc, + "c172code", + by = "e16sex", + proportions = "row", + remove_na = TRUE + ) + tab <- table(efc$c172code, efc$e16sex) / + rowSums(table(efc$c172code, efc$e16sex)) + dimnames(tab) <- list(c("1", "2", "3"), c("male", "female")) + expect_equal( + as.prop.table(out, verbose = FALSE), + list(tab), + ignore_attr = TRUE, + tolerance = 1e-4 + ) + expect_equal( + as.prop.table(out, verbose = FALSE, simplify = TRUE), + tab, + ignore_attr = TRUE, + tolerance = 1e-4 + ) + out <- data_tabulate( + efc, + "c172code", + by = "e16sex", + proportions = "col", + remove_na = TRUE + ) + tab <- as.table(t(t(table(efc$c172code, efc$e16sex)) / + colSums(table(efc$c172code, efc$e16sex)))) + dimnames(tab) <- list(c("1", "2", "3"), c("male", "female")) + expect_equal( + as.prop.table(out, verbose = FALSE), + list(tab), + ignore_attr = TRUE, + tolerance = 1e-4 + ) + expect_equal( + as.prop.table(out, verbose = FALSE, simplify = TRUE), + tab, + ignore_attr = TRUE, + tolerance = 1e-4 + ) +}) From 567cf2e4aa42557532ef8e30703d0d9ece1bc2ec Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Wed, 8 Oct 2025 21:36:34 +0200 Subject: [PATCH 28/33] fmt --- R/data_tabulate.R | 21 ++++++++++++++++++--- R/data_xtabulate.R | 5 ++++- tests/testthat/test-data_tabulate.R | 6 ++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index f43c33644..3f227af6c 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -686,7 +686,13 @@ as.prop.table <- function(x, ...) { #' @rdname data_tabulate #' @export -as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) { +as.prop.table.datawizard_crosstab <- function( + x, + remove_na = TRUE, + simplify = FALSE, + verbose = TRUE, + ... +) { # sanity check - the `.data.frame` method returns a list, but not the # default method if (!is.data.frame(x)) { @@ -700,7 +706,10 @@ as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FA } if (remove_na) { - if (verbose && ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table))) { + if ( + verbose && + ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) + ) { insight::format_alert("Removing NA values from frequency table.") } if (!is.null(prop_table[["NA"]])) { @@ -721,7 +730,13 @@ as.prop.table.datawizard_crosstab <- function(x, remove_na = TRUE, simplify = FA } #' @export -as.prop.table.datawizard_crosstabs <- function(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) { +as.prop.table.datawizard_crosstabs <- function( + x, + remove_na = TRUE, + simplify = FALSE, + verbose = TRUE, + ... +) { # only show message once we set `verbose = FALSE` in the lapply() if (remove_na && verbose) { prop_table <- attributes(x[[1]])$prop_table diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 5b0a0f541..f29387b36 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -216,7 +216,10 @@ format.datawizard_crosstab <- function( for (i in seq_len(ncol(prop_table))) { x[, numeric_columns[i]] <- paste( format(x[, numeric_columns[i]]), - format(sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), justify = "right") + format( + sprintf("(%.*f%%)", digits, 100 * prop_table[, i]), + justify = "right" + ) ) } } diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R index fdc092b3c..5ac4bad63 100644 --- a/tests/testthat/test-data_tabulate.R +++ b/tests/testthat/test-data_tabulate.R @@ -1085,8 +1085,10 @@ test_that("data_tabulate, cross tables, extract proportions", { proportions = "col", remove_na = TRUE ) - tab <- as.table(t(t(table(efc$c172code, efc$e16sex)) / - colSums(table(efc$c172code, efc$e16sex)))) + tab <- as.table(t( + t(table(efc$c172code, efc$e16sex)) / + colSums(table(efc$c172code, efc$e16sex)) + )) dimnames(tab) <- list(c("1", "2", "3"), c("male", "female")) expect_equal( as.prop.table(out, verbose = FALSE), From 4a6c5eec2384d33bb8c4bfea53fb7858819b9817 Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Thu, 9 Oct 2025 22:42:44 +0200 Subject: [PATCH 29/33] move as.prop.table to its own rd page --- R/data_tabulate.R | 29 +++++++++++++++++++++----- man/as.prop.table.Rd | 48 ++++++++++++++++++++++++++++++++++++++++++++ man/data_tabulate.Rd | 12 +++-------- 3 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 man/as.prop.table.Rd diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 3f227af6c..473e1b30b 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -58,9 +58,6 @@ #' frequencies of the variable. This is useful for further statistical analysis, #' e.g. for using `chisq.test()` on the frequency table. See 'Examples'. #' -#' Finally, the `as.prop.table()` method returns the proportions of the -#' crosstable as a table object, if `by` was supplied to `data_tabulate()`. -#' #' @section Crosstables: #' If `by` is supplied, a crosstable is created. The crosstable includes `` #' (missing) values by default. The first column indicates values of `x`, the @@ -79,6 +76,8 @@ #' #' @return A data frame, or a list of data frames, with one frequency table #' as data frame per variable. +#' +#' @seealso [as.prop.table] #' #' @examplesIf requireNamespace("poorman") #' # frequency tables ------- @@ -678,13 +677,33 @@ as.table.datawizard_crosstabs <- function( out } -#' @rdname data_tabulate +#' Convert a crosstable to a propensity table +#' +#' @description +#' This is an S3 generic. It can be used on objects of class +#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the +#' argument `by`. +#' +#' @param x An object of class `datawizard_crosstab`. +#' @inheritParams data_tabulate +#' #' @export +#' @seealso [data_tabulate] +#' +#' @examples +#' data(efc) +#' +#' # Some cross tabulation +#' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") +#' cross +#' +#' # We convert it to a propensity table +#' as.prop.table(cross) as.prop.table <- function(x, ...) { UseMethod("as.prop.table") } -#' @rdname data_tabulate +#' @rdname as.prop.table #' @export as.prop.table.datawizard_crosstab <- function( x, diff --git a/man/as.prop.table.Rd b/man/as.prop.table.Rd new file mode 100644 index 000000000..c8bc9ae36 --- /dev/null +++ b/man/as.prop.table.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_tabulate.R +\name{as.prop.table} +\alias{as.prop.table} +\alias{as.prop.table.datawizard_crosstab} +\title{Convert a crosstable to a propensity table} +\usage{ +as.prop.table(x, ...) + +\method{as.prop.table}{datawizard_crosstab}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) +} +\arguments{ +\item{x}{An object of class \code{datawizard_crosstab}.} + +\item{...}{not used.} + +\item{remove_na}{Logical, if \code{FALSE}, missing values are included in the +frequency or crosstable, else missing values are omitted. Note that the +default for the \code{as.table()} method is \code{remove_na = TRUE}, so that missing +values are not included in the returned table, which makes more sense for +post-processing of the table, e.g. using \code{chisq.test()}.} + +\item{simplify}{Logical, if \code{TRUE}, the returned table is simplified to a +single table object if there is only one frequency or contingency table +input. Else, always for multiple table inputs or when \code{simplify = FALSE}, a +list of tables is returned. This is only relevant for the \code{as.table()} +methods. To ensure consistent output, the default is \code{FALSE}.} + +\item{verbose}{Toggle warnings and messages.} +} +\description{ +This is an S3 generic. It can be used on objects of class +\code{datawizard_crosstab} created by \code{data_tabulate()} when it was run with the +argument \code{by}. +} +\examples{ +data(efc) + +# Some cross tabulation +cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") +cross + +# We convert it to a propensity table +as.prop.table(cross) +} +\seealso{ +\link{data_tabulate} +} diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 4c2a1ee65..86010b9c6 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -6,8 +6,6 @@ \alias{data_tabulate.data.frame} \alias{as.data.frame.datawizard_tables} \alias{as.table.datawizard_table} -\alias{as.prop.table} -\alias{as.prop.table.datawizard_crosstab} \alias{print.datawizard_table} \alias{display.datawizard_table} \title{Create frequency and crosstables of variables} @@ -53,10 +51,6 @@ data_tabulate(x, ...) \method{as.table}{datawizard_table}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) -as.prop.table(x, ...) - -\method{as.prop.table}{datawizard_crosstab}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) - \method{print}{datawizard_table}(x, big_mark = NULL, ...) \method{display}{datawizard_table}(object, big_mark = NULL, format = "markdown", ...) @@ -209,9 +203,6 @@ frequency tables as data frame. See 'Examples'. There is also an \code{as.table()} method, which returns a table object with the frequencies of the variable. This is useful for further statistical analysis, e.g. for using \code{chisq.test()} on the frequency table. See 'Examples'. - -Finally, the \code{as.prop.table()} method returns the proportions of the -crosstable as a table object, if \code{by} was supplied to \code{data_tabulate()}. } \note{ There are \code{print_html()} and \code{print_md()} methods available for printing @@ -318,3 +309,6 @@ x <- data_tabulate(d, "cyl", by = "gear") as.table(x) \dontshow{\}) # examplesIf} } +\seealso{ +\link{as.prop.table} +} From 2f4d0a10b1da8ddca06de574737c4235872d6786 Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Thu, 9 Oct 2025 22:47:22 +0200 Subject: [PATCH 30/33] fmt --- R/data_tabulate.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 473e1b30b..37e2874e5 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -76,7 +76,7 @@ #' #' @return A data frame, or a list of data frames, with one frequency table #' as data frame per variable. -#' +#' #' @seealso [as.prop.table] #' #' @examplesIf requireNamespace("poorman") @@ -678,25 +678,25 @@ as.table.datawizard_crosstabs <- function( } #' Convert a crosstable to a propensity table -#' -#' @description +#' +#' @description #' This is an S3 generic. It can be used on objects of class #' `datawizard_crosstab` created by `data_tabulate()` when it was run with the #' argument `by`. -#' +#' #' @param x An object of class `datawizard_crosstab`. #' @inheritParams data_tabulate -#' +#' #' @export #' @seealso [data_tabulate] -#' +#' #' @examples #' data(efc) #' #' # Some cross tabulation #' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") #' cross -#' +#' #' # We convert it to a propensity table #' as.prop.table(cross) as.prop.table <- function(x, ...) { From e7abe1fca140c934af76de56f5f92c3983f09f62 Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Thu, 9 Oct 2025 22:54:49 +0200 Subject: [PATCH 31/33] pkgdown --- pkgdown/_pkgdown.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/pkgdown/_pkgdown.yaml b/pkgdown/_pkgdown.yaml index 01b701f03..9d6308271 100644 --- a/pkgdown/_pkgdown.yaml +++ b/pkgdown/_pkgdown.yaml @@ -61,6 +61,7 @@ reference: desc: | Functions to compute statistical summaries of data properties and distributions contents: + - as.prop.table - data_codebook - data_summary - data_tabulate From 3a500f2af5e0b13c023c4269e190a61427cb8862 Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Fri, 10 Oct 2025 21:26:21 +0200 Subject: [PATCH 32/33] move as.table docs to as.prop.table --- R/data_tabulate.R | 271 ++++++++++++++++++++++--------------------- man/as.prop.table.Rd | 65 ++++++++++- man/data_tabulate.Rd | 70 ++--------- 3 files changed, 206 insertions(+), 200 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 37e2874e5..056a476b5 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -52,11 +52,12 @@ #' data frame. The structure of the returned object is a nested data frame, #' where the first column contains name of the variable for which frequencies #' were calculated, and the second column is a list column that contains the -#' frequency tables as data frame. See 'Examples'. +#' frequency tables as data frame. See [as.table.datawizard_table]. #' #' There is also an `as.table()` method, which returns a table object with the #' frequencies of the variable. This is useful for further statistical analysis, -#' e.g. for using `chisq.test()` on the frequency table. See 'Examples'. +#' e.g. for using `chisq.test()` on the frequency table. See +#' [as.table.datawizard_table]. #' #' @section Crosstables: #' If `by` is supplied, a crosstable is created. The crosstable includes `` @@ -141,28 +142,6 @@ #' # round percentages #' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column") #' print(out, digits = 0) -#' -#' # coerce to data frames -#' result <- data_tabulate(efc, "c172code", by = "e16sex") -#' as.data.frame(result) -#' as.data.frame(result)$table -#' as.data.frame(result, add_total = TRUE)$table -#' -#' # post-processing ------ -#' # ---------------------- -#' -#' out <- data_tabulate(efc, "c172code", by = "e16sex") -#' # we need to simplify the output, else we get a list of tables -#' suppressWarnings(chisq.test(as.table(out, simplify = TRUE))) -#' -#' # apply chisq.test to each table -#' out <- data_tabulate(efc, c("c172code", "e16sex")) -#' suppressWarnings(lapply(as.table(out), chisq.test)) -#' -#' # can also handle grouped data frames -#' d <- data_group(mtcars, "am") -#' x <- data_tabulate(d, "cyl", by = "gear") -#' as.table(x) #' @export data_tabulate <- function(x, ...) { UseMethod("data_tabulate") @@ -466,10 +445,142 @@ insight::print_md #' @export insight::display +#' Convert a crosstable to a frequency or a propensity table +#' +#' @description +#' `as.prop.table()` is an S3 generic. It can be used on objects of class +#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the +#' arguments `by` and `proportions`. +#' +#' @param x An object created by `data_tabulate()`. It must be of class +#' `datawizard_crosstab` for `as.prop.table()`. +#' @inheritParams data_tabulate +#' +#' @export +#' @seealso [data_tabulate] +#' +#' @examples +#' data(efc) +#' +#' # Some cross tabulation +#' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") +#' cross +#' +#' # Convert to a propensity table +#' as.prop.table(cross) +#' +#' # Convert to data.frame +#' result <- data_tabulate(efc, "c172code", by = "e16sex") +#' as.data.frame(result) +#' as.data.frame(result)$table +#' as.data.frame(result, add_total = TRUE)$table +#' +#' # Convert to a table that can be passed to chisq.test() +#' +#' out <- data_tabulate(efc, "c172code", by = "e16sex") +#' # we need to simplify the output, else we get a list of tables +#' tbl <- as.table(out, simplify = TRUE) +#' tbl +#' suppressWarnings(chisq.test(tbl)) +#' +#' # apply chisq.test to each table +#' out <- data_tabulate(efc, c("c172code", "e16sex")) +#' suppressWarnings(lapply(as.table(out), chisq.test)) +#' +#' # can also handle grouped data frames +#' d <- data_group(mtcars, "am") +#' x <- data_tabulate(d, "cyl", by = "gear") +#' as.table(x) +as.prop.table <- function(x, ...) { + UseMethod("as.prop.table") +} + +#' @rdname as.prop.table +#' @export +as.prop.table.datawizard_crosstab <- function( + x, + remove_na = TRUE, + simplify = FALSE, + verbose = TRUE, + ... +) { + # sanity check - the `.data.frame` method returns a list, but not the + # default method + if (!is.data.frame(x)) { + x <- x[[1]] + } + prop_table <- attributes(x)$prop_table + + if (is.null(prop_table)) { + insight::format_warning("No proportions available.") + return(NULL) + } + + if (remove_na) { + if ( + verbose && + ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) + ) { + insight::format_alert("Removing NA values from frequency table.") + } + if (!is.null(prop_table[["NA"]])) { + prop_table[["NA"]] <- NULL + } + if ("NA" %in% rownames(prop_table)) { + prop_table <- prop_table[rownames(prop_table) != "NA", ] + } + } + # coerce to table + result <- as.table(as.matrix(prop_table)) + # if we don't want to simplify the table, we wrap it into a list + if (!simplify) { + result <- list(result) + } + + result +} + +#' @export +as.prop.table.datawizard_crosstabs <- function( + x, + remove_na = TRUE, + simplify = FALSE, + verbose = TRUE, + ... +) { + # only show message once we set `verbose = FALSE` in the lapply() + if (remove_na && verbose) { + prop_table <- attributes(x[[1]])$prop_table + if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) { + insight::format_alert("Removing NA values from frequency table.") + } + } + + out <- insight::compact_list(lapply( + x, + as.prop.table.datawizard_crosstab, + remove_na = remove_na, + simplify = TRUE, + verbose = FALSE, + ... + )) + + # if no proportions found, return NULL + if (!length(out)) { + return(NULL) + } + + # if only one table is returned, "unlist" + if (length(out) == 1 && simplify) { + out <- out[[1]] + } + out +} + # as.data.frame -------------------- -#' @rdname data_tabulate +#' @rdname as.prop.table #' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and #' column with the total N values are added to the data frame. `add_total` has #' no effect in `as.data.frame()` for simple frequency tables. @@ -529,7 +640,7 @@ as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables # as.table -------------------- -#' @rdname data_tabulate +#' @rdname as.prop.table #' @export as.table.datawizard_table <- function( x, @@ -677,114 +788,6 @@ as.table.datawizard_crosstabs <- function( out } -#' Convert a crosstable to a propensity table -#' -#' @description -#' This is an S3 generic. It can be used on objects of class -#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the -#' argument `by`. -#' -#' @param x An object of class `datawizard_crosstab`. -#' @inheritParams data_tabulate -#' -#' @export -#' @seealso [data_tabulate] -#' -#' @examples -#' data(efc) -#' -#' # Some cross tabulation -#' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") -#' cross -#' -#' # We convert it to a propensity table -#' as.prop.table(cross) -as.prop.table <- function(x, ...) { - UseMethod("as.prop.table") -} - -#' @rdname as.prop.table -#' @export -as.prop.table.datawizard_crosstab <- function( - x, - remove_na = TRUE, - simplify = FALSE, - verbose = TRUE, - ... -) { - # sanity check - the `.data.frame` method returns a list, but not the - # default method - if (!is.data.frame(x)) { - x <- x[[1]] - } - prop_table <- attributes(x)$prop_table - - if (is.null(prop_table)) { - insight::format_warning("No proportions available.") - return(NULL) - } - - if (remove_na) { - if ( - verbose && - ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) - ) { - insight::format_alert("Removing NA values from frequency table.") - } - if (!is.null(prop_table[["NA"]])) { - prop_table[["NA"]] <- NULL - } - if ("NA" %in% rownames(prop_table)) { - prop_table <- prop_table[rownames(prop_table) != "NA", ] - } - } - # coerce to table - result <- as.table(as.matrix(prop_table)) - # if we don't want to simplify the table, we wrap it into a list - if (!simplify) { - result <- list(result) - } - - result -} - -#' @export -as.prop.table.datawizard_crosstabs <- function( - x, - remove_na = TRUE, - simplify = FALSE, - verbose = TRUE, - ... -) { - # only show message once we set `verbose = FALSE` in the lapply() - if (remove_na && verbose) { - prop_table <- attributes(x[[1]])$prop_table - if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) { - insight::format_alert("Removing NA values from frequency table.") - } - } - - out <- insight::compact_list(lapply( - x, - as.prop.table.datawizard_crosstab, - remove_na = remove_na, - simplify = TRUE, - verbose = FALSE, - ... - )) - - # if no proportions found, return NULL - if (!length(out)) { - return(NULL) - } - - # if only one table is returned, "unlist" - if (length(out) == 1 && simplify) { - out <- out[[1]] - } - out -} - .is_grouped_df_xtab <- function(x) { if (!is.data.frame(x)) { diff --git a/man/as.prop.table.Rd b/man/as.prop.table.Rd index c8bc9ae36..1cf84f1ca 100644 --- a/man/as.prop.table.Rd +++ b/man/as.prop.table.Rd @@ -3,14 +3,28 @@ \name{as.prop.table} \alias{as.prop.table} \alias{as.prop.table.datawizard_crosstab} -\title{Convert a crosstable to a propensity table} +\alias{as.data.frame.datawizard_tables} +\alias{as.table.datawizard_table} +\title{Convert a crosstable to a frequency or a propensity table} \usage{ as.prop.table(x, ...) \method{as.prop.table}{datawizard_crosstab}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) + +\method{as.data.frame}{datawizard_tables}( + x, + row.names = NULL, + optional = FALSE, + ..., + stringsAsFactors = FALSE, + add_total = FALSE +) + +\method{as.table}{datawizard_table}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) } \arguments{ -\item{x}{An object of class \code{datawizard_crosstab}.} +\item{x}{An object created by \code{data_tabulate()}. It must be of class +\code{datawizard_crosstab} for \code{as.prop.table()}.} \item{...}{not used.} @@ -27,11 +41,29 @@ list of tables is returned. This is only relevant for the \code{as.table()} methods. To ensure consistent output, the default is \code{FALSE}.} \item{verbose}{Toggle warnings and messages.} + +\item{row.names}{\code{NULL} or a character vector giving the row + names for the data frame. Missing values are not allowed.} + +\item{optional}{logical. If \code{TRUE}, setting row names and + converting column names (to syntactic names: see + \code{\link[base]{make.names}}) is optional. Note that all of \R's + \pkg{base} package \code{as.data.frame()} methods use + \code{optional} only for column names treatment, basically with the + meaning of \code{\link[base]{data.frame}(*, check.names = !optional)}. + See also the \code{make.names} argument of the \code{matrix} method.} + +\item{stringsAsFactors}{logical: should the character vector be converted + to a factor?} + +\item{add_total}{For crosstables (i.e. when \code{by} is not \code{NULL}), a row and +column with the total N values are added to the data frame. \code{add_total} has +no effect in \code{as.data.frame()} for simple frequency tables.} } \description{ -This is an S3 generic. It can be used on objects of class +\code{as.prop.table()} is an S3 generic. It can be used on objects of class \code{datawizard_crosstab} created by \code{data_tabulate()} when it was run with the -argument \code{by}. +arguments \code{by} and \code{proportions}. } \examples{ data(efc) @@ -40,8 +72,31 @@ data(efc) cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row") cross -# We convert it to a propensity table +# Convert to a propensity table as.prop.table(cross) + +# Convert to data.frame +result <- data_tabulate(efc, "c172code", by = "e16sex") +as.data.frame(result) +as.data.frame(result)$table +as.data.frame(result, add_total = TRUE)$table + +# Convert to a table that can be passed to chisq.test() + +out <- data_tabulate(efc, "c172code", by = "e16sex") +# we need to simplify the output, else we get a list of tables +tbl <- as.table(out, simplify = TRUE) +tbl +suppressWarnings(chisq.test(tbl)) + +# apply chisq.test to each table +out <- data_tabulate(efc, c("c172code", "e16sex")) +suppressWarnings(lapply(as.table(out), chisq.test)) + +# can also handle grouped data frames +d <- data_group(mtcars, "am") +x <- data_tabulate(d, "cyl", by = "gear") +as.table(x) } \seealso{ \link{data_tabulate} diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 86010b9c6..46de3c1b6 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -4,8 +4,6 @@ \alias{data_tabulate} \alias{data_tabulate.default} \alias{data_tabulate.data.frame} -\alias{as.data.frame.datawizard_tables} -\alias{as.table.datawizard_table} \alias{print.datawizard_table} \alias{display.datawizard_table} \title{Create frequency and crosstables of variables} @@ -40,17 +38,6 @@ data_tabulate(x, ...) ... ) -\method{as.data.frame}{datawizard_tables}( - x, - row.names = NULL, - optional = FALSE, - ..., - stringsAsFactors = FALSE, - add_total = FALSE -) - -\method{as.table}{datawizard_table}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...) - \method{print}{datawizard_table}(x, big_mark = NULL, ...) \method{display}{datawizard_table}(object, big_mark = NULL, format = "markdown", ...) @@ -146,30 +133,6 @@ functions (see 'Details'), this argument may be used as workaround.} \item{collapse}{Logical, if \code{TRUE} collapses multiple tables into one larger table for printing. This affects only printing, not the returned object.} -\item{row.names}{\code{NULL} or a character vector giving the row - names for the data frame. Missing values are not allowed.} - -\item{optional}{logical. If \code{TRUE}, setting row names and - converting column names (to syntactic names: see - \code{\link[base]{make.names}}) is optional. Note that all of \R's - \pkg{base} package \code{as.data.frame()} methods use - \code{optional} only for column names treatment, basically with the - meaning of \code{\link[base]{data.frame}(*, check.names = !optional)}. - See also the \code{make.names} argument of the \code{matrix} method.} - -\item{stringsAsFactors}{logical: should the character vector be converted - to a factor?} - -\item{add_total}{For crosstables (i.e. when \code{by} is not \code{NULL}), a row and -column with the total N values are added to the data frame. \code{add_total} has -no effect in \code{as.data.frame()} for simple frequency tables.} - -\item{simplify}{Logical, if \code{TRUE}, the returned table is simplified to a -single table object if there is only one frequency or contingency table -input. Else, always for multiple table inputs or when \code{simplify = FALSE}, a -list of tables is returned. This is only relevant for the \code{as.table()} -methods. To ensure consistent output, the default is \code{FALSE}.} - \item{big_mark}{Optional character string, indicating the big mark that is used for large numbers. If \code{NULL} (default), a big mark is added automatically for large numbers (i.e. numbers with more than 5 digits). If you want to remove @@ -182,6 +145,12 @@ the big mark, set \code{big_mark = ""}.} package. \code{format = "tt"} creates a \code{tinytable} object, which is either printed as markdown or HTML table, depending on the environment. See \code{\link[insight:export_table]{insight::export_table()}} for details.} + +\item{simplify}{Logical, if \code{TRUE}, the returned table is simplified to a +single table object if there is only one frequency or contingency table +input. Else, always for multiple table inputs or when \code{simplify = FALSE}, a +list of tables is returned. This is only relevant for the \code{as.table()} +methods. To ensure consistent output, the default is \code{FALSE}.} } \value{ A data frame, or a list of data frames, with one frequency table @@ -198,11 +167,12 @@ There is an \code{as.data.frame()} method, to return the frequency tables as a data frame. The structure of the returned object is a nested data frame, where the first column contains name of the variable for which frequencies were calculated, and the second column is a list column that contains the -frequency tables as data frame. See 'Examples'. +frequency tables as data frame. See \link{as.table.datawizard_table}. There is also an \code{as.table()} method, which returns a table object with the frequencies of the variable. This is useful for further statistical analysis, -e.g. for using \code{chisq.test()} on the frequency table. See 'Examples'. +e.g. for using \code{chisq.test()} on the frequency table. See +\link{as.table.datawizard_table}. } \note{ There are \code{print_html()} and \code{print_md()} methods available for printing @@ -285,28 +255,6 @@ data_tabulate( # round percentages out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column") print(out, digits = 0) - -# coerce to data frames -result <- data_tabulate(efc, "c172code", by = "e16sex") -as.data.frame(result) -as.data.frame(result)$table -as.data.frame(result, add_total = TRUE)$table - -# post-processing ------ -# ---------------------- - -out <- data_tabulate(efc, "c172code", by = "e16sex") -# we need to simplify the output, else we get a list of tables -suppressWarnings(chisq.test(as.table(out, simplify = TRUE))) - -# apply chisq.test to each table -out <- data_tabulate(efc, c("c172code", "e16sex")) -suppressWarnings(lapply(as.table(out), chisq.test)) - -# can also handle grouped data frames -d <- data_group(mtcars, "am") -x <- data_tabulate(d, "cyl", by = "gear") -as.table(x) \dontshow{\}) # examplesIf} } \seealso{ From 9387ce86a19011481dfc85dcd0d8dbbddfc4bd4d Mon Sep 17 00:00:00 2001 From: etiennebacher Date: Fri, 10 Oct 2025 21:33:50 +0200 Subject: [PATCH 33/33] move simplify arg --- R/data_tabulate.R | 10 +++++----- man/data_tabulate.Rd | 6 ------ 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 056a476b5..0a5c72972 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -32,11 +32,6 @@ #' used for large numbers. If `NULL` (default), a big mark is added automatically for #' large numbers (i.e. numbers with more than 5 digits). If you want to remove #' the big mark, set `big_mark = ""`. -#' @param simplify Logical, if `TRUE`, the returned table is simplified to a -#' single table object if there is only one frequency or contingency table -#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a -#' list of tables is returned. This is only relevant for the `as.table()` -#' methods. To ensure consistent output, the default is `FALSE`. #' @param object An object returned by `data_tabulate()`. #' @param format String, indicating the output format. Can be `"markdown"` #' `"html"`, or `"tt"`. `format = "html"` create an HTML table using the *gt* @@ -454,6 +449,11 @@ insight::display #' #' @param x An object created by `data_tabulate()`. It must be of class #' `datawizard_crosstab` for `as.prop.table()`. +#' @param simplify Logical, if `TRUE`, the returned table is simplified to a +#' single table object if there is only one frequency or contingency table +#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a +#' list of tables is returned. This is only relevant for the `as.table()` +#' methods. To ensure consistent output, the default is `FALSE`. #' @inheritParams data_tabulate #' #' @export diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 46de3c1b6..87656e17f 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -145,12 +145,6 @@ the big mark, set \code{big_mark = ""}.} package. \code{format = "tt"} creates a \code{tinytable} object, which is either printed as markdown or HTML table, depending on the environment. See \code{\link[insight:export_table]{insight::export_table()}} for details.} - -\item{simplify}{Logical, if \code{TRUE}, the returned table is simplified to a -single table object if there is only one frequency or contingency table -input. Else, always for multiple table inputs or when \code{simplify = FALSE}, a -list of tables is returned. This is only relevant for the \code{as.table()} -methods. To ensure consistent output, the default is \code{FALSE}.} } \value{ A data frame, or a list of data frames, with one frequency table