Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7789439
Extracting proportions from `data_tabulate()`
strengejacke Sep 18, 2025
272ba57
Merge branch 'main' into strengejacke/issue655
strengejacke Oct 1, 2025
4da8f20
DRY
strengejacke Oct 1, 2025
c2dabec
fix
strengejacke Oct 1, 2025
0482a2d
fix
strengejacke Oct 1, 2025
99bd754
lintr
strengejacke Oct 2, 2025
5b4add7
too much lintr
strengejacke Oct 2, 2025
5f44ce1
fix
strengejacke Oct 2, 2025
28a7f69
row names
strengejacke Oct 2, 2025
761f391
add as.prop.table method
strengejacke Oct 2, 2025
ab1c130
news, desc
strengejacke Oct 2, 2025
6f466d5
PR number
strengejacke Oct 2, 2025
71fc12d
fixed warning
strengejacke Oct 2, 2025
3b9d655
declare method
strengejacke Oct 2, 2025
c8b3ed1
fix msg
strengejacke Oct 2, 2025
e3218e6
fix
strengejacke Oct 2, 2025
ea96216
fix
strengejacke Oct 2, 2025
9e2e604
fix printing
strengejacke Oct 2, 2025
d20a50c
fix
strengejacke Oct 2, 2025
c0eb952
comment code
strengejacke Oct 2, 2025
ef5db64
fix
strengejacke Oct 3, 2025
ae44f32
fix
strengejacke Oct 3, 2025
3a08679
lintr
strengejacke Oct 3, 2025
eacacc1
fix
strengejacke Oct 3, 2025
cef721c
fix
strengejacke Oct 3, 2025
af4a171
update snaps
strengejacke Oct 3, 2025
4ff1f54
fix
strengejacke Oct 3, 2025
b37cec0
add test
strengejacke Oct 7, 2025
e634350
Merge branch 'main' into strengejacke/issue655
etiennebacher Oct 8, 2025
567cf2e
fmt
etiennebacher Oct 8, 2025
d958fbc
Merge branch 'main' into strengejacke/issue655
strengejacke Oct 9, 2025
4a6c5ee
move as.prop.table to its own rd page
etiennebacher Oct 9, 2025
2f4d0a1
fmt
etiennebacher Oct 9, 2025
e7abe1f
pkgdown
etiennebacher Oct 9, 2025
3a500f2
move as.table docs to as.prop.table
etiennebacher Oct 10, 2025
9387ce8
move simplify arg
etiennebacher Oct 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 1.2.0.8
Version: 1.2.0.9
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531")),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ S3method(as.double,parameters_smoothness)
S3method(as.numeric,parameters_kurtosis)
S3method(as.numeric,parameters_skewness)
S3method(as.numeric,parameters_smoothness)
S3method(as.prop.table,datawizard_crosstab)
S3method(as.prop.table,datawizard_crosstabs)
S3method(as.table,datawizard_crosstab)
S3method(as.table,datawizard_crosstabs)
S3method(as.table,datawizard_table)
Expand Down Expand Up @@ -230,6 +232,7 @@ S3method(winsorize,factor)
S3method(winsorize,logical)
S3method(winsorize,numeric)
export(adjust)
export(as.prop.table)
export(assign_labels)
export(categorize)
export(center)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ BREAKING CHANGES

CHANGES

* `data_tabulate()` now saves the table of proportions for crosstables as
attribute, accessible via the new `as.prop.table()` method (#656).

* Due to changes in the package `insight`, `data_tabulate()` no longer prints
decimals when all values in a column are integers (#641).

Expand Down
175 changes: 144 additions & 31 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@
#' used for large numbers. If `NULL` (default), a big mark is added automatically for
#' large numbers (i.e. numbers with more than 5 digits). If you want to remove
#' the big mark, set `big_mark = ""`.
#' @param simplify Logical, if `TRUE`, the returned table is simplified to a
#' single table object if there is only one frequency or contingency table
#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a
#' list of tables is returned. This is only relevant for the `as.table()`
#' methods. To ensure consistent output, the default is `FALSE`.
#' @param object An object returned by `data_tabulate()`.
#' @param format String, indicating the output format. Can be `"markdown"`
#' `"html"`, or `"tt"`. `format = "html"` create an HTML table using the *gt*
Expand All @@ -52,11 +47,12 @@
#' data frame. The structure of the returned object is a nested data frame,
#' where the first column contains name of the variable for which frequencies
#' were calculated, and the second column is a list column that contains the
#' frequency tables as data frame. See 'Examples'.
#' frequency tables as data frame. See [as.table.datawizard_table].
#'
#' There is also an `as.table()` method, which returns a table object with the
#' frequencies of the variable. This is useful for further statistical analysis,
#' e.g. for using `chisq.test()` on the frequency table. See 'Examples'.
#' e.g. for using `chisq.test()` on the frequency table. See
#' [as.table.datawizard_table].
#'
#' @section Crosstables:
#' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`
Expand All @@ -77,6 +73,8 @@
#' @return A data frame, or a list of data frames, with one frequency table
#' as data frame per variable.
#'
#' @seealso [as.prop.table]
#'
#' @examplesIf requireNamespace("poorman")
#' # frequency tables -------
#' # ------------------------
Expand Down Expand Up @@ -139,28 +137,6 @@
#' # round percentages
#' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
#' print(out, digits = 0)
#'
#' # coerce to data frames
#' result <- data_tabulate(efc, "c172code", by = "e16sex")
#' as.data.frame(result)
#' as.data.frame(result)$table
#' as.data.frame(result, add_total = TRUE)$table
#'
#' # post-processing ------
#' # ----------------------
#'
#' out <- data_tabulate(efc, "c172code", by = "e16sex")
#' # we need to simplify the output, else we get a list of tables
#' suppressWarnings(chisq.test(as.table(out, simplify = TRUE)))
#'
#' # apply chisq.test to each table
#' out <- data_tabulate(efc, c("c172code", "e16sex"))
#' suppressWarnings(lapply(as.table(out), chisq.test))
#'
#' # can also handle grouped data frames
#' d <- data_group(mtcars, "am")
#' x <- data_tabulate(d, "cyl", by = "gear")
#' as.table(x)
#' @export
data_tabulate <- function(x, ...) {
UseMethod("data_tabulate")
Expand Down Expand Up @@ -464,10 +440,147 @@ insight::print_md
#' @export
insight::display

#' Convert a crosstable to a frequency or a propensity table
#'
#' @description
#' `as.prop.table()` is an S3 generic. It can be used on objects of class
#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the
#' arguments `by` and `proportions`.
#'
#' @param x An object created by `data_tabulate()`. It must be of class
#' `datawizard_crosstab` for `as.prop.table()`.
#' @param simplify Logical, if `TRUE`, the returned table is simplified to a
#' single table object if there is only one frequency or contingency table
#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a
#' list of tables is returned. This is only relevant for the `as.table()`
#' methods. To ensure consistent output, the default is `FALSE`.
#' @inheritParams data_tabulate
#'
#' @export
#' @seealso [data_tabulate]
#'
#' @examples
#' data(efc)
#'
#' # Some cross tabulation
#' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row")
#' cross
#'
#' # Convert to a propensity table
#' as.prop.table(cross)
#'
#' # Convert to data.frame
#' result <- data_tabulate(efc, "c172code", by = "e16sex")
#' as.data.frame(result)
#' as.data.frame(result)$table
#' as.data.frame(result, add_total = TRUE)$table
#'
#' # Convert to a table that can be passed to chisq.test()
#'
#' out <- data_tabulate(efc, "c172code", by = "e16sex")
#' # we need to simplify the output, else we get a list of tables
#' tbl <- as.table(out, simplify = TRUE)
#' tbl
#' suppressWarnings(chisq.test(tbl))
#'
#' # apply chisq.test to each table
#' out <- data_tabulate(efc, c("c172code", "e16sex"))
#' suppressWarnings(lapply(as.table(out), chisq.test))
#'
#' # can also handle grouped data frames
#' d <- data_group(mtcars, "am")
#' x <- data_tabulate(d, "cyl", by = "gear")
#' as.table(x)
as.prop.table <- function(x, ...) {
UseMethod("as.prop.table")
}

#' @rdname as.prop.table
#' @export
as.prop.table.datawizard_crosstab <- function(
x,
remove_na = TRUE,
simplify = FALSE,
verbose = TRUE,
...
) {
# sanity check - the `.data.frame` method returns a list, but not the
# default method
if (!is.data.frame(x)) {
x <- x[[1]]
}
prop_table <- attributes(x)$prop_table

if (is.null(prop_table)) {
insight::format_warning("No proportions available.")
return(NULL)
}

if (remove_na) {
if (
verbose &&
("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table))
) {
insight::format_alert("Removing NA values from frequency table.")
}
if (!is.null(prop_table[["NA"]])) {
prop_table[["NA"]] <- NULL
}
if ("NA" %in% rownames(prop_table)) {
prop_table <- prop_table[rownames(prop_table) != "NA", ]
}
}
# coerce to table
result <- as.table(as.matrix(prop_table))
# if we don't want to simplify the table, we wrap it into a list
if (!simplify) {
result <- list(result)
}

result
}

#' @export
as.prop.table.datawizard_crosstabs <- function(
x,
remove_na = TRUE,
simplify = FALSE,
verbose = TRUE,
...
) {
# only show message once we set `verbose = FALSE` in the lapply()
if (remove_na && verbose) {
prop_table <- attributes(x[[1]])$prop_table
if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) {
insight::format_alert("Removing NA values from frequency table.")
}
}

out <- insight::compact_list(lapply(
x,
as.prop.table.datawizard_crosstab,
remove_na = remove_na,
simplify = TRUE,
verbose = FALSE,
...
))

# if no proportions found, return NULL
if (!length(out)) {
return(NULL)
}

# if only one table is returned, "unlist"
if (length(out) == 1 && simplify) {
out <- out[[1]]
}
out
}


# as.data.frame --------------------

#' @rdname data_tabulate
#' @rdname as.prop.table
#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and
#' column with the total N values are added to the data frame. `add_total` has
#' no effect in `as.data.frame()` for simple frequency tables.
Expand Down Expand Up @@ -527,7 +640,7 @@ as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables

# as.table --------------------

#' @rdname data_tabulate
#' @rdname as.prop.table
#' @export
as.table.datawizard_table <- function(
x,
Expand Down
Loading
Loading