Skip to content

Commit 6fc1426

Browse files
Add as.prop.table() to extract proportions from data_tabulate() (#656)
Co-authored-by: Etienne Bacher <[email protected]> Co-authored-by: etiennebacher <[email protected]>
1 parent 4c041df commit 6fc1426

File tree

11 files changed

+470
-200
lines changed

11 files changed

+470
-200
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: datawizard
33
Title: Easy Data Wrangling and Statistical Transformations
4-
Version: 1.2.0.8
4+
Version: 1.2.0.9
55
Authors@R: c(
66
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
77
comment = c(ORCID = "0000-0003-1995-6531")),

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ S3method(as.double,parameters_smoothness)
88
S3method(as.numeric,parameters_kurtosis)
99
S3method(as.numeric,parameters_skewness)
1010
S3method(as.numeric,parameters_smoothness)
11+
S3method(as.prop.table,datawizard_crosstab)
12+
S3method(as.prop.table,datawizard_crosstabs)
1113
S3method(as.table,datawizard_crosstab)
1214
S3method(as.table,datawizard_crosstabs)
1315
S3method(as.table,datawizard_table)
@@ -230,6 +232,7 @@ S3method(winsorize,factor)
230232
S3method(winsorize,logical)
231233
S3method(winsorize,numeric)
232234
export(adjust)
235+
export(as.prop.table)
233236
export(assign_labels)
234237
export(categorize)
235238
export(center)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ BREAKING CHANGES
1010

1111
CHANGES
1212

13+
* `data_tabulate()` now saves the table of proportions for crosstables as
14+
attribute, accessible via the new `as.prop.table()` method (#656).
15+
1316
* Due to changes in the package `insight`, `data_tabulate()` no longer prints
1417
decimals when all values in a column are integers (#641).
1518

R/data_tabulate.R

Lines changed: 144 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,6 @@
3232
#' used for large numbers. If `NULL` (default), a big mark is added automatically for
3333
#' large numbers (i.e. numbers with more than 5 digits). If you want to remove
3434
#' the big mark, set `big_mark = ""`.
35-
#' @param simplify Logical, if `TRUE`, the returned table is simplified to a
36-
#' single table object if there is only one frequency or contingency table
37-
#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a
38-
#' list of tables is returned. This is only relevant for the `as.table()`
39-
#' methods. To ensure consistent output, the default is `FALSE`.
4035
#' @param object An object returned by `data_tabulate()`.
4136
#' @param format String, indicating the output format. Can be `"markdown"`
4237
#' `"html"`, or `"tt"`. `format = "html"` create an HTML table using the *gt*
@@ -52,11 +47,12 @@
5247
#' data frame. The structure of the returned object is a nested data frame,
5348
#' where the first column contains name of the variable for which frequencies
5449
#' were calculated, and the second column is a list column that contains the
55-
#' frequency tables as data frame. See 'Examples'.
50+
#' frequency tables as data frame. See [as.table.datawizard_table].
5651
#'
5752
#' There is also an `as.table()` method, which returns a table object with the
5853
#' frequencies of the variable. This is useful for further statistical analysis,
59-
#' e.g. for using `chisq.test()` on the frequency table. See 'Examples'.
54+
#' e.g. for using `chisq.test()` on the frequency table. See
55+
#' [as.table.datawizard_table].
6056
#'
6157
#' @section Crosstables:
6258
#' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`
@@ -77,6 +73,8 @@
7773
#' @return A data frame, or a list of data frames, with one frequency table
7874
#' as data frame per variable.
7975
#'
76+
#' @seealso [as.prop.table]
77+
#'
8078
#' @examplesIf requireNamespace("poorman")
8179
#' # frequency tables -------
8280
#' # ------------------------
@@ -139,28 +137,6 @@
139137
#' # round percentages
140138
#' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
141139
#' print(out, digits = 0)
142-
#'
143-
#' # coerce to data frames
144-
#' result <- data_tabulate(efc, "c172code", by = "e16sex")
145-
#' as.data.frame(result)
146-
#' as.data.frame(result)$table
147-
#' as.data.frame(result, add_total = TRUE)$table
148-
#'
149-
#' # post-processing ------
150-
#' # ----------------------
151-
#'
152-
#' out <- data_tabulate(efc, "c172code", by = "e16sex")
153-
#' # we need to simplify the output, else we get a list of tables
154-
#' suppressWarnings(chisq.test(as.table(out, simplify = TRUE)))
155-
#'
156-
#' # apply chisq.test to each table
157-
#' out <- data_tabulate(efc, c("c172code", "e16sex"))
158-
#' suppressWarnings(lapply(as.table(out), chisq.test))
159-
#'
160-
#' # can also handle grouped data frames
161-
#' d <- data_group(mtcars, "am")
162-
#' x <- data_tabulate(d, "cyl", by = "gear")
163-
#' as.table(x)
164140
#' @export
165141
data_tabulate <- function(x, ...) {
166142
UseMethod("data_tabulate")
@@ -464,10 +440,147 @@ insight::print_md
464440
#' @export
465441
insight::display
466442

443+
#' Convert a crosstable to a frequency or a propensity table
444+
#'
445+
#' @description
446+
#' `as.prop.table()` is an S3 generic. It can be used on objects of class
447+
#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the
448+
#' arguments `by` and `proportions`.
449+
#'
450+
#' @param x An object created by `data_tabulate()`. It must be of class
451+
#' `datawizard_crosstab` for `as.prop.table()`.
452+
#' @param simplify Logical, if `TRUE`, the returned table is simplified to a
453+
#' single table object if there is only one frequency or contingency table
454+
#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a
455+
#' list of tables is returned. This is only relevant for the `as.table()`
456+
#' methods. To ensure consistent output, the default is `FALSE`.
457+
#' @inheritParams data_tabulate
458+
#'
459+
#' @export
460+
#' @seealso [data_tabulate]
461+
#'
462+
#' @examples
463+
#' data(efc)
464+
#'
465+
#' # Some cross tabulation
466+
#' cross <- data_tabulate(efc, select = "e42dep", by = "c172code", proportions = "row")
467+
#' cross
468+
#'
469+
#' # Convert to a propensity table
470+
#' as.prop.table(cross)
471+
#'
472+
#' # Convert to data.frame
473+
#' result <- data_tabulate(efc, "c172code", by = "e16sex")
474+
#' as.data.frame(result)
475+
#' as.data.frame(result)$table
476+
#' as.data.frame(result, add_total = TRUE)$table
477+
#'
478+
#' # Convert to a table that can be passed to chisq.test()
479+
#'
480+
#' out <- data_tabulate(efc, "c172code", by = "e16sex")
481+
#' # we need to simplify the output, else we get a list of tables
482+
#' tbl <- as.table(out, simplify = TRUE)
483+
#' tbl
484+
#' suppressWarnings(chisq.test(tbl))
485+
#'
486+
#' # apply chisq.test to each table
487+
#' out <- data_tabulate(efc, c("c172code", "e16sex"))
488+
#' suppressWarnings(lapply(as.table(out), chisq.test))
489+
#'
490+
#' # can also handle grouped data frames
491+
#' d <- data_group(mtcars, "am")
492+
#' x <- data_tabulate(d, "cyl", by = "gear")
493+
#' as.table(x)
494+
as.prop.table <- function(x, ...) {
495+
UseMethod("as.prop.table")
496+
}
497+
498+
#' @rdname as.prop.table
499+
#' @export
500+
as.prop.table.datawizard_crosstab <- function(
501+
x,
502+
remove_na = TRUE,
503+
simplify = FALSE,
504+
verbose = TRUE,
505+
...
506+
) {
507+
# sanity check - the `.data.frame` method returns a list, but not the
508+
# default method
509+
if (!is.data.frame(x)) {
510+
x <- x[[1]]
511+
}
512+
prop_table <- attributes(x)$prop_table
513+
514+
if (is.null(prop_table)) {
515+
insight::format_warning("No proportions available.")
516+
return(NULL)
517+
}
518+
519+
if (remove_na) {
520+
if (
521+
verbose &&
522+
("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table))
523+
) {
524+
insight::format_alert("Removing NA values from frequency table.")
525+
}
526+
if (!is.null(prop_table[["NA"]])) {
527+
prop_table[["NA"]] <- NULL
528+
}
529+
if ("NA" %in% rownames(prop_table)) {
530+
prop_table <- prop_table[rownames(prop_table) != "NA", ]
531+
}
532+
}
533+
# coerce to table
534+
result <- as.table(as.matrix(prop_table))
535+
# if we don't want to simplify the table, we wrap it into a list
536+
if (!simplify) {
537+
result <- list(result)
538+
}
539+
540+
result
541+
}
542+
543+
#' @export
544+
as.prop.table.datawizard_crosstabs <- function(
545+
x,
546+
remove_na = TRUE,
547+
simplify = FALSE,
548+
verbose = TRUE,
549+
...
550+
) {
551+
# only show message once we set `verbose = FALSE` in the lapply()
552+
if (remove_na && verbose) {
553+
prop_table <- attributes(x[[1]])$prop_table
554+
if ("NA" %in% colnames(prop_table) || "NA" %in% rownames(prop_table)) {
555+
insight::format_alert("Removing NA values from frequency table.")
556+
}
557+
}
558+
559+
out <- insight::compact_list(lapply(
560+
x,
561+
as.prop.table.datawizard_crosstab,
562+
remove_na = remove_na,
563+
simplify = TRUE,
564+
verbose = FALSE,
565+
...
566+
))
567+
568+
# if no proportions found, return NULL
569+
if (!length(out)) {
570+
return(NULL)
571+
}
572+
573+
# if only one table is returned, "unlist"
574+
if (length(out) == 1 && simplify) {
575+
out <- out[[1]]
576+
}
577+
out
578+
}
579+
467580

468581
# as.data.frame --------------------
469582

470-
#' @rdname data_tabulate
583+
#' @rdname as.prop.table
471584
#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and
472585
#' column with the total N values are added to the data frame. `add_total` has
473586
#' no effect in `as.data.frame()` for simple frequency tables.
@@ -527,7 +640,7 @@ as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables
527640

528641
# as.table --------------------
529642

530-
#' @rdname data_tabulate
643+
#' @rdname as.prop.table
531644
#' @export
532645
as.table.datawizard_table <- function(
533646
x,

0 commit comments

Comments
 (0)