tern/R/utils_default_stats_formats_labels.R at 78aabe18aae5488c01d0d7a83e474ae0b2dd843a · insightsengineering/tern · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
#' Get default statistical methods and their associated formats, labels, and indent modifiers
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Utility functions to get valid statistic methods for different method groups
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers
#' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be
#' seen in [analyze_vars()]. See notes to understand why this is experimental.
#'
#' @param stats (`character`)\cr statistical methods to return defaults for.
#' @param levels_per_stats (named `list` of `character` or `NULL`)\cr named list where the name of each element is a
#'   statistic from `stats` and each element is the levels of a `factor` or `character` variable (or variable name),
#'   each corresponding to a single row, for which the named statistic should be calculated for. If a statistic is only
#'   calculated once (one row), the element can be either `NULL` or the name of the statistic. Each list element will be
#'   flattened such that the names of the list elements returned by the function have the format `statistic.level` (or
#'   just `statistic` for statistics calculated for a single row). Defaults to `NULL`.
#' @param tern_defaults (`list` or `vector`)\cr defaults to use to fill in missing values if no user input is given.
#'   Must be of the same type as the values that are being filled in (e.g. indentation must be integers).
#'
#' @details
#' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`.
#'
#' @note
#' These defaults are experimental because we use the names of functions to retrieve the default
#' statistics. This should be generalized in groups of methods according to more reasonable groupings.
#'
#' @name default_stats_formats_labels
NULL

#' @describeIn default_stats_formats_labels Get statistics available for a given method
#'   group (analyze function). To check available defaults see `tern::tern_default_stats` list.
#'
#' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function)
#'   to retrieve default statistics for. A character vector can be used to specify more than one statistical
#'   method group.
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical
#'   functions are used, `stats_in` needs to have them in too.
#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics.
#' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains
#'   `"analyze_vars_counts"`) be added to the statistical methods?
#'
#' @return
#' * `get_stats()` returns a `character` vector of statistical methods.
#'
#' @examples
#' # analyze_vars is numeric
#' num_stats <- get_stats("analyze_vars_numeric") # also the default
#'
#' # Other type
#' cnt_stats <- get_stats("analyze_vars_counts")
#'
#' # Weirdly taking the pval from count_occurrences
#' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval")
#'
#' # All count_occurrences
#' all_cnt_occ <- get_stats("count_occurrences")
#'
#' # Multiple
#' get_stats(c("count_occurrences", "analyze_vars_counts"))
#'
#' @export
get_stats <- function(method_groups = "analyze_vars_numeric",
                      stats_in = NULL, custom_stats_in = NULL, add_pval = FALSE) {
  checkmate::assert_character(method_groups)
  checkmate::assert_character(stats_in, null.ok = TRUE)
  checkmate::assert_character(custom_stats_in, null.ok = TRUE)
  checkmate::assert_flag(add_pval)

  # Default is still numeric
  if (any(method_groups == "analyze_vars")) {
    method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric"
  }

  type_tmp <- ifelse(any(grepl("counts$", method_groups)), "counts", "numeric") # for pval checks

  # Defaults for loop
  out <- NULL

  # Loop for multiple method groups
  for (mgi in method_groups) {
    if (mgi %in% names(tern_default_stats)) {
      out_tmp <- tern_default_stats[[mgi]]
    } else {
      stop("The selected method group (", mgi, ") has no default statistical method.")
    }
    out <- unique(c(out, out_tmp))
  }

  # Add custom stats
  out <- c(out, custom_stats_in)

  # If you added pval to the stats_in you certainly want it
  if (!is.null(stats_in) && any(grepl("^pval", stats_in))) {
    stats_in_pval_value <- stats_in[grepl("^pval", stats_in)]

    # Must be only one value between choices
    checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts", "pvalue"))

    # Mismatch with counts and numeric
    if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" ||
      any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint
      stop(
        "Inserted p-value (", stats_in_pval_value, ") is not valid for type ",
        type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")),
        " instead."
      )
    }

    # Lets add it even if present (thanks to unique)
    add_pval <- TRUE
  }

  # Mainly used in "analyze_vars" but it could be necessary elsewhere
  if (isTRUE(add_pval)) {
    if (any(grepl("counts", method_groups))) {
      out <- unique(c(out, "pval_counts"))
    } else {
      out <- unique(c(out, "pval"))
    }
  }

  # Filtering for stats_in (character vector)
  if (!is.null(stats_in)) {
    out <- intersect(stats_in, out) # It orders them too
  }

  # If intersect did not find matches (and no pval?) -> error
  if (length(out) == 0) {
    stop(
      "The selected method group(s) (", paste0(method_groups, collapse = ", "), ")",
      " do not have the required default statistical methods:\n",
      paste0(stats_in, collapse = " ")
    )
  }

  out
}

#' @describeIn default_stats_formats_labels Get statistical *names* available for a given method
#'   group (analyze function). Please use the `s_*` functions to get the statistical names.
#' @param stat_results (`list`)\cr list of statistical results. It should be used close to the end of
#'   a statistical function. See examples for a structure with two statistical results and two groups.
#' @param stat_names_in (`character`)\cr custom modification of statistical values.
#'
#' @return
#' * `get_stat_names()` returns a named list of `character` vectors, indicating the names of
#'    statistical outputs.
#'
#' @examples
#' stat_results <- list(
#'   "n" = list("M" = 1, "F" = 2),
#'   "count_fraction" = list("M" = c(1, 0.2), "F" = c(2, 0.1))
#' )
#' get_stat_names(stat_results)
#' get_stat_names(stat_results, list("n" = "argh"))
#'
#' @export
get_stat_names <- function(stat_results, stat_names_in = NULL) {
  checkmate::assert_character(names(stat_results), min.len = 1)
  checkmate::assert_list(stat_names_in, null.ok = TRUE)

  stat_nms_from_stats <- lapply(stat_results, function(si) {
    nm <- names(si)
    if (is.null(nm)) {
      nm <- rep(NA_character_, length(si)) # no statistical names
    }
    nm
  })

  # Modify some with custom stat names
  if (!is.null(stat_names_in)) {
    # Stats is the main
    common_names <- intersect(names(stat_nms_from_stats), names(stat_names_in))
    stat_nms_from_stats[common_names] <- stat_names_in[common_names]
  }

  stat_nms_from_stats
}

# Utility function used to separate custom stats (user-defined functions) from defaults
.split_std_from_custom_stats <- function(stats_in) {
  out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL)
  if (is.list(stats_in)) {
    is_custom_fnc <- sapply(stats_in, is.function)
    checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named")
    out[["custom_stats"]] <- stats_in[is_custom_fnc]
    out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc])
    all_stats <- names(stats_in) # to keep the order
    all_stats[!is_custom_fnc] <- out[["default_stats"]]
    out[["all_stats"]] <- all_stats
  } else {
    out[["default_stats"]] <- out[["all_stats"]] <- stats_in
  }
  out
}

# Utility function to apply statistical functions
.apply_stat_functions <- function(default_stat_fnc, custom_stat_fnc_list, args_list) {
  # Default checks
  checkmate::assert_function(default_stat_fnc)
  checkmate::assert_list(custom_stat_fnc_list, types = "function", null.ok = TRUE, names = "named")
  checkmate::assert_list(args_list)

  # Checking custom stats have same formals
  if (!is.null(custom_stat_fnc_list)) {
    fundamental_call_to_data <- names(formals(default_stat_fnc))[[1]]
    for (fnc in custom_stat_fnc_list) {
      if (!identical(names(formals(fnc))[[1]], fundamental_call_to_data)) {
        stop(
          "The first parameter of a custom statistical function needs to be the same (it can be `df` or `x`) ",
          "as the default statistical function. In this case your custom function has ", names(formals(fnc))[[1]],
          " as first parameter, while the default function has ", fundamental_call_to_data, "."
        )
      }
      if (!any(names(formals(fnc)) == "...")) {
        stop(
          "The custom statistical function needs to have `...` as a parameter to accept additional arguments. ",
          "In this case your custom function does not have `...`."
        )
      }
    }
  }

  # Applying
  out_default <- do.call(default_stat_fnc, args = args_list)
  out_custom <- lapply(custom_stat_fnc_list, function(fnc) do.call(fnc, args = args_list))

  # Merging
  c(out_default, out_custom)
}

#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
#'   To check available defaults see list `tern::tern_default_formats`.
#'
#' @param formats_in (named `vector`)\cr custom formats to use instead of defaults. Can be a character vector with
#'   values from [formatters::list_valid_format_labels()] or custom format functions. Defaults to `NULL` for any rows
#'   with no value is provided. See Details.
#'
#' @details if `formats_in` is `"default"`, instead of populating the
#'   return value with tern defaults, the return value will specify
#'   the `"default"` format for each element. This is useful
#'   primarily when formatting behavior should be inherited from a
#'   format specified via the `format` or `formats_var` argument to
#'   `analyze`.
#'
#' @return
#' * `get_formats_from_stats()` returns a named list of formats as strings or functions.
#'
#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and
#'   return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`.
#'
#' @examples
#' # Defaults formats
#' get_formats_from_stats(num_stats)
#' get_formats_from_stats(cnt_stats)
#' get_formats_from_stats(only_pval)
#' get_formats_from_stats(all_cnt_occ)
#'
#' # Addition of customs
#' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx")))
#' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx")))
#'
#' @seealso [formatting_functions]
#'
#' @export
get_formats_from_stats <- function(stats,
                                   formats_in = NULL,
                                   levels_per_stats = NULL,
                                   tern_defaults = tern_default_formats) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list if there is a function in the formats
  if (checkmate::test_list(formats_in, null.ok = TRUE)) {
    checkmate::assert_list(formats_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(formats_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If unnamed formats given as formats_in and same number of stats, use one format per stat
  if (
    !is.null(formats_in) && length(formats_in) == length(stats) &&
      is.null(names(formats_in)) && is.null(levels_per_stats)
  ) {
    out <- as.list(formats_in) %>% setNames(stats)
    return(out)
  }

  full_default <- identical(formats_in, "default")

  if (full_default) {
    ## act as if we got NULL to get the right structure for return value
    ## then replace each element with "default" below
    formats_in <- NULL
  }

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats)

  # Apply custom formats
  out <- .fill_in_vals_by_stats(levels_per_stats, formats_in, tern_defaults)

  if (full_default) {
    out <- setNames(rep("default", length(out)), names(out))
  } else {
    # Default to NULL if no format
    which_null <- names(which(sapply(levels_per_stats, is.null)))
    levels_per_stats[which_null] <- which_null
    case_input_is_not_stat <- unlist(out, use.names = FALSE) == unlist(levels_per_stats, use.names = FALSE)
    out[names(out) == out | case_input_is_not_stat] <- list(NULL)
  }

  out
}

#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics.
#'   To check for available defaults see list `tern::tern_default_labels`.
#'
#' @param labels_in (named `character`)\cr custom labels to use instead of defaults. If no value is provided, the
#'   variable level (if rows correspond to levels of a variable) or statistic name will be used as label.
#' @param label_attr_from_stats (named `list`)\cr if `labels_in = NULL`, then this will be used instead. It is a list
#'   of values defined in statistical functions as default labels. Values are ignored if `labels_in` is provided or `""`
#'   values are provided.
#'
#' @return
#' * `get_labels_from_stats()` returns a named list of labels as strings.
#'
#' @examples
#' # Defaults labels
#' get_labels_from_stats(num_stats)
#' get_labels_from_stats(cnt_stats)
#' get_labels_from_stats(only_pval)
#' get_labels_from_stats(all_cnt_occ)
#'
#' # Addition of customs
#' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction"))
#' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions")))
#'
#' @export
get_labels_from_stats <- function(stats,
                                  labels_in = NULL,
                                  levels_per_stats = NULL,
                                  label_attr_from_stats = NULL,
                                  tern_defaults = tern_default_labels) {
  checkmate::assert_character(stats, min.len = 1)

  # If labels_in is NULL, use label_attr_from_stats
  if (is.null(labels_in)) {
    labels_in <- label_attr_from_stats
    labels_in <- label_attr_from_stats[
      nzchar(label_attr_from_stats) &
        !sapply(label_attr_from_stats, is.null) &
        !is.na(label_attr_from_stats)
    ]
  }

  # It may be a list
  if (checkmate::test_list(labels_in, null.ok = TRUE)) {
    checkmate::assert_list(labels_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(labels_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If unnamed labels given as labels_in and same number of stats, use one label per stat
  if (
    !is.null(labels_in) && length(labels_in) == length(stats) &&
      is.null(names(labels_in)) && is.null(levels_per_stats)
  ) {
    out <- as.list(labels_in) %>% setNames(stats)
    return(out)
  }

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats)

  # Apply custom labels
  out <- .fill_in_vals_by_stats(levels_per_stats, labels_in, tern_defaults)
  out
}

#' @describeIn default_stats_formats_labels Get row indent modifiers corresponding to a list of statistics/rows.
#'
#' @param indents_in (named `integer`)\cr custom row indent modifiers to use instead of defaults. Defaults to `0L` for
#'   all values.
#' @param row_nms `r lifecycle::badge("deprecated")` Deprecation cycle started. See the `levels_per_stats` parameter
#'   for details.
#'
#' @return
#' * `get_indents_from_stats()` returns a named list of indentation modifiers as integers.
#'
#' @examples
#' get_indents_from_stats(all_cnt_occ, indents_in = 3L)
#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L))
#' get_indents_from_stats(
#'   all_cnt_occ,
#'   indents_in = list(a = 2L, count.a = 1L, count.b = 5L)
#' )
#'
#' @export
get_indents_from_stats <- function(stats,
                                   indents_in = NULL,
                                   levels_per_stats = NULL,
                                   tern_defaults = as.list(rep(0L, length(stats))) %>% setNames(stats),
                                   row_nms = lifecycle::deprecated()) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list
  if (checkmate::test_list(indents_in, null.ok = TRUE)) {
    checkmate::assert_list(indents_in, null.ok = TRUE)
    # Or it may be a vector of integers
  } else {
    checkmate::assert_integerish(indents_in, null.ok = TRUE)
  }
  checkmate::assert_list(levels_per_stats, null.ok = TRUE)

  # If levels_per_stats not given, assume one row per statistic
  if (is.null(levels_per_stats)) levels_per_stats <- as.list(stats) %>% setNames(stats)

  # Single indentation level for all rows
  if (is.null(names(indents_in)) && length(indents_in) == 1) {
    out <- rep(indents_in, length(levels_per_stats %>% unlist()))
    return(out)
  }

  # Apply custom indentation
  out <- .fill_in_vals_by_stats(levels_per_stats, indents_in, tern_defaults)
  out
}

# Function to loop over each stat and levels to set correct values
.fill_in_vals_by_stats <- function(levels_per_stats, user_in, tern_defaults) {
  out <- list()

  for (stat_i in names(levels_per_stats)) {
    # Get all levels of the statistic
    all_lvls <- levels_per_stats[[stat_i]]

    if ((length(all_lvls) == 1 && all_lvls == stat_i) || is.null(all_lvls)) { # One row per statistic
      out[[stat_i]] <- if (stat_i %in% names(user_in)) { # 1. Check for stat_i in user input
        user_in[[stat_i]]
      } else if (stat_i %in% names(tern_defaults)) { # 2. Check for stat_i in tern defaults
        tern_defaults[[stat_i]]
      } else { # 3. Otherwise stat_i
        stat_i
      }
    } else { # One row per combination of variable level and statistic
      # Loop over levels for each statistic
      for (lev_i in all_lvls) {
        # Construct row name (stat_i.lev_i)
        row_nm <- paste(stat_i, lev_i, sep = ".")

        out[[row_nm]] <- if (row_nm %in% names(user_in)) { # 1. Check for stat_i.lev_i in user input
          user_in[[row_nm]]
        } else if (lev_i %in% names(user_in)) { # 2. Check for lev_i in user input
          user_in[[lev_i]]
        } else if (stat_i %in% names(user_in)) { # 3. Check for stat_i in user input
          user_in[[stat_i]]
        } else if (lev_i %in% names(tern_defaults)) { # 4. Check for lev_i in tern defaults (only used for labels)
          tern_defaults[[lev_i]]
        } else if (stat_i %in% names(tern_defaults)) { # 5. Check for stat_i in tern defaults
          tern_defaults[[stat_i]]
        } else { # 6. Otherwise lev_i
          lev_i
        }
      }
    }
  }

  out
}

# Custom unlist function to retain NULL as "NULL" or NA
.unlist_keep_nulls <- function(lst, null_placeholder = "NULL", recursive = FALSE) {
  lapply(lst, function(x) if (is.null(x)) null_placeholder else x) %>%
    unlist(recursive = recursive)
}

#' Update labels according to control specifications
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant
#' control specification. For example, if control has element `conf_level` set to `0.9`, the default
#' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied
#' via `labels_custom` will not be updated regardless of `control`.
#'
#' @param labels_default (named `character`)\cr a named vector of statistic labels to modify
#'   according to the control specifications. Labels that are explicitly defined in `labels_custom` will
#'   not be affected.
#' @param labels_custom (named `character`)\cr named vector of labels that are customized by
#'   the user and should not be affected by `control`.
#' @param control (named `list`)\cr list of control parameters to apply to adjust default labels.
#'
#' @return A named character vector of labels with control specifications applied to relevant labels.
#'
#' @examples
#' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57)
#' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>%
#'   labels_use_control(control = control)
#'
#' @export
labels_use_control <- function(labels_default, control, labels_custom = NULL) {
  if ("conf_level" %in% names(control)) {
    labels_default <- sapply(
      names(labels_default),
      function(x) {
        if (!x %in% names(labels_custom)) {
          gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]]))
        } else {
          labels_default[[x]]
        }
      }
    )
  }
  if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) &&
    !"quantiles" %in% names(labels_custom)) { # nolint
    labels_default["quantiles"] <- gsub(
      "[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""),
      labels_default["quantiles"]
    )
  }
  if ("quantiles" %in% names(control) && "quantiles_lower" %in% names(labels_default) &&
    !"quantiles_lower" %in% names(labels_custom)) { # nolint
    labels_default["quantiles_lower"] <- gsub(
      "[0-9]+%-ile", paste0(control[["quantiles"]][1] * 100, "%-ile", ""),
      labels_default["quantiles_lower"]
    )
  }
  if ("quantiles" %in% names(control) && "quantiles_upper" %in% names(labels_default) &&
    !"quantiles_upper" %in% names(labels_custom)) { # nolint
    labels_default["quantiles_upper"] <- gsub(
      "[0-9]+%-ile", paste0(control[["quantiles"]][2] * 100, "%-ile", ""),
      labels_default["quantiles_upper"]
    )
  }
  if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) &&
    !"mean_pval" %in% names(labels_custom)) { # nolint
    labels_default["mean_pval"] <- gsub(
      "p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"]
    )
  }

  labels_default
}

# tern_default_stats -----------------------------------------------------------
#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`.
#'
#' @format
#' * `tern_default_stats` is a named list of available statistics, with each element
#'   named for their corresponding statistical method group.
#'
#' @export
tern_default_stats <- list(
  abnormal = c("fraction"),
  abnormal_by_baseline = c("fraction"),
  abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"),
  abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"),
  abnormal_lab_worsen_by_baseline = c("fraction"),
  analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"),
  analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "fraction", "n_blq"),
  analyze_vars_numeric = c(
    "n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval",
    "median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv",
    "geom_mean", "geom_sd", "geom_mean_sd", "geom_mean_ci", "geom_cv",
    "median_ci_3d",
    "mean_ci_3d", "geom_mean_ci_3d"
  ),
  count_cumulative = c("count_fraction"),
  count_missed_doses = c("n", "count_fraction"),
  count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"),
  count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"),
  count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"),
  estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci", "n_unique", "n_rate"),
  estimate_multinomial_response = c("n_prop", "prop_ci"),
  estimate_odds_ratio = c("or_ci", "n_tot"),
  estimate_proportion = c("n_prop", "prop_ci"),
  estimate_proportion_diff = c("diff", "diff_ci"),
  summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"),
  summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"),
  summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
  summarize_num_patients = c("unique", "nonunique", "unique_count"),
  summarize_patients_events_in_cols = c("unique", "all"),
  surv_time = c(
    "median", "median_ci", "median_ci_3d", "quantiles",
    "quantiles_lower", "quantiles_upper", "range_censor", "range_event", "range"
  ),
  surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "event_free_rate_3d"),
  surv_timepoint_diff = c("rate_diff", "rate_diff_ci", "ztest_pval", "rate_diff_ci_3d"),
  tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
  tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval", "riskdiff"),
  tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
  tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval", "riskdiff"),
  test_proportion_diff = c("pval")
)

# tern_default_formats ---------------------------------------------------------
#' @describeIn default_stats_formats_labels Named vector of default formats for `tern`.
#'
#' @format
#' * `tern_default_formats` is a named vector of available default formats, with each element
#'   named for their corresponding statistic.
#'
#' @export
tern_default_formats <- c(
  ci = list(format_extreme_values_ci(2L)),
  count = "xx.x",
  count_fraction = format_count_fraction,
  count_fraction_fixed_dp = format_count_fraction_fixed_dp,
  cv = "xx.xx",
  event_free_rate = "xx.xxx",
  fraction = format_fraction_fixed_dp,
  geom_cv = "xx.xx",
  geom_mean = "xx.xx",
  geom_mean_ci = "(xx.x, xx.x)",
  geom_mean_ci_3d = "xx.x (xx.x - xx.x)",
  geom_mean_sd = "xx.xx (xx.xx)",
  geom_sd = "xx.xx",
  hr = list(format_extreme_values(2L)),
  hr_ci = "(xx.x, xx.x)",
  hr_ci_3d = "xx.x (xx.x - xx.x)",
  iqr = "xx.xx",
  lsmean = "xx.x",
  lsmean_diff = "xx.x",
  lsmean_diff_ci = "(xx.x, xx.x)",
  mad = "xx.xx",
  max = "xx.xx",
  mean = "xx.xx",
  mean_ci = "(xx.x, xx.x)",
  mean_ci_3d = "xx.x (xx.x - xx.x)",
  mean_pval = "x.xxx | (<0.0001)",
  mean_sd = "xx.xx (xx.xx)",
  mean_sdi = "(xx.x, xx.x)",
  mean_se = "xx.xx (xx.xx)",
  mean_sei = "(xx.x, xx.x)",
  median = "xx.xx",
  median_ci = "(xx.x, xx.x)",
  median_ci_3d = "xx.x (xx.x - xx.x)",
  median_range = "xx.xx (xx.xx - xx.xx)",
  min = "xx.xx",
  n = "xx.x",
  n_blq = "xx.x",
  n_events = "xx.x",
  n_patients = "xx.x (xx.xx%)",
  n_prop = "xx (xx.xx%)",
  n_rate = "xx (xx.xx)",
  n_rsp = "xx.x",
  n_tot = "xx.x",
  n_tot_events = "xx.x",
  n_unique = "xx.x",
  nonunique = "xx.x",
  or = list(format_extreme_values(2L)),
  or_ci = "xx.x (xx.x - xx.x)",
  person_years = "xx.xx",
  prop = "xx.xx%",
  prop_ci = "(xx.xx, xx.xx)",
  pt_at_risk = "xx.x",
  pval = "x.xxx | (<0.0001)",
  pvalue = "x.xxx | (<0.0001)",
  pval_counts = "x.xxx | (<0.0001)",
  quantiles = "xx.xx - xx.xx",
  quantiles_lower = "xx.x (xx.x - xx.x)",
  quantiles_upper = "xx.x (xx.x - xx.x)",
  range = "xx.xx - xx.xx",
  range_censor = "xx.xx to xx.xx",
  range_event = "xx.xx to xx.xx",
  rate = "xx.xxx",
  rate_ci = "(xx.xxx, xx.xxx)",
  rate_diff = "xx.x",
  rate_diff_ci = "(xx.x, xx.x)",
  rate_diff_ci_3d = format_xx("xx.x (xx.x, xx.x)"),
  rate_ratio = "xx.xxx",
  rate_ratio_ci = "(xx.xxx, xx.xxx)",
  rate_se = "xx.x",
  riskdiff = "xx.xx (xx.xx - xx.xx)",
  sd = "xx.xx",
  se = "xx.xx",
  sum = "xx.xx",
  sum_exposure = "xx.x",
  unique = format_count_fraction_fixed_dp,
  unique_count = "xx.x",
  ztest_pval = "x.xxx | (<0.0001)"
)

# tern_default_labels ----------------------------------------------------------
#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`.
#'
#' @format
#' * `tern_default_labels` is a named `character` vector of available default labels, with each element
#'   named for their corresponding statistic.
#'
#' @export
tern_default_labels <- c(
  cv = "CV (%)",
  iqr = "IQR",
  geom_cv = "CV % Geometric Mean",
  geom_mean = "Geometric Mean",
  geom_mean_sd = "Geometric Mean (SD)",
  geom_mean_ci = "Geometric Mean 95% CI",
  geom_mean_ci_3d = "Geometric Mean (95% CI)",
  geom_sd = "Geometric SD",
  mad = "Median Absolute Deviation",
  max = "Maximum",
  mean = "Mean",
  mean_ci = "Mean 95% CI",
  mean_ci_3d = "Mean (95% CI)",
  mean_pval = "Mean p-value (H0: mean = 0)",
  mean_sd = "Mean (SD)",
  mean_sdi = "Mean -/+ 1xSD",
  mean_se = "Mean (SE)",
  mean_sei = "Mean -/+ 1xSE",
  median = "Median",
  median_ci = "Median 95% CI",
  median_ci_3d = "Median (95% CI)",
  median_range = "Median (Min - Max)",
  min = "Minimum",
  n = "n",
  n_blq = "n_blq",
  nonunique = "Number of events",
  pval = "p-value (t-test)", # Default for numeric
  pval_counts = "p-value (chi-squared test)", # Default for counts
  quantiles = "25% and 75%-ile",
  quantiles_lower = "25%-ile (95% CI)",
  quantiles_upper = "75%-ile (95% CI)",
  range = "Min - Max",
  range_censor = "Range (censored)",
  range_event = "Range (event)",
  rate = "Adjusted Rate",
  rate_ratio = "Adjusted Rate Ratio",
  sd = "SD",
  se = "SE",
  sum = "Sum",
  unique = "Number of patients with at least one event"
)

#' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics:
#'   [analyze_vars()] and [analyze_vars_in_cols()] principally.
#'
#' @param type (`string`)\cr `"numeric"` or `"counts"`.
#'
#' @return
#' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type.
#'
#' @examples
#' summary_formats()
#' summary_formats(type = "counts", include_pval = TRUE)
#'
#' @export
summary_formats <- function(type = "numeric", include_pval = FALSE) {
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  get_formats_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics.
#'   Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`.
#'
#' @param include_pval (`flag`)\cr same as the `add_pval` argument in [get_stats()].
#'
#' @details
#' `summary_*` quick get functions for labels or formats uses `get_stats` and `get_labels_from_stats` or
#' `get_formats_from_stats` respectively to retrieve relevant information.
#'
#' @return
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
#'
#' @examples
#' summary_labels()
#' summary_labels(type = "counts", include_pval = TRUE)
#'
#' @export
summary_labels <- function(type = "numeric", include_pval = FALSE) {
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  get_labels_from_stats(get_stats(met_grp, add_pval = include_pval))
}