Skip to content

Commit c386b11

Browse files
committed
Merge remote-tracking branch 'origin/dev-current'
2 parents 68b03c9 + 71d5a87 commit c386b11

18 files changed

+541
-288
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,14 @@ Imports:
3434
Suggests:
3535
forcats,
3636
formatR,
37+
haven,
3738
kableExtra,
3839
knitr,
3940
magrittr,
4041
rmarkdown,
4142
rstudioapi,
4243
backports
43-
Depends: R (>= 2.10)
44+
Depends: R (>= 3.5)
4445
VignetteBuilder: knitr
4546
LazyData: true
4647
License: GPL-2

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export(unlabel)
2828
export(use_custom_lang)
2929
export(view)
3030
export(what.is)
31+
export(zap_attr)
3132
import(htmltools)
3233
importFrom(base64enc,base64encode)
3334
importFrom(checkmate,anyNaN)

NEWS.md

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
1-
# summarytools 1.1.0
2-
- Optimized metadata extraction
3-
- Improved support for dplyr::group_by()
4-
- `llabel()` wrapper added for `label(x, all = TRUE)`
1+
# summarytools 1.1.0 (2025-02-12)
52
- In `stby()`
63
+ New parameter `useNA` adds a group for missing values in
74
grouping variable(s); set to `FALSE` to avoid the message displayed
85
when `NA`s are detected.
96
- In `tb()`
107
+ Fix for broken proportions in freq tables
11-
+ New parameters `fct.to.chr` and `recalculate` for freq tables
8+
+ New parameters `fct.to.chr` and `recalculate` for `freq()` tables
129
+ Parameter `na.rm` deprecated
1310
- In `dfSummary()`:
1411
+ New parameter `class` allows switching off class reporting in *Variable*
1512
column.
16-
- In `freq()`:
17-
+ New parameter `na.val` allows specifying a value (factor level) that
13+
- In `freq()`, `ctable()` and `dfSummary()`:
14+
+ New parameter `na.val` allows specifying a value / factor level that
1815
is to be considered `NA`. In turn, the value "(Missing)" is no longer
19-
considered missing by default; using `na.val = "(Missing)"`
20-
will yield the same results.
16+
considered missing by default (using `na.val = "(Missing)"`
17+
will yield the same results).
2118
+ Fix for weights not being applied correctly in by-group processing.
19+
+ **Labelled vectors** ("labelled" / "haven_labelled") are treated like
20+
factors in `freq()`, and in `dfSummary()` when all values have a label.
21+
Future versions will extend support to `ctable()`.
2222
- In `descr()`:
2323
+ "n" (total number of observations, also displayed in heading) added to
2424
available statistics.
@@ -28,6 +28,8 @@
2828
excludes *Pct. Valid* from, *common* statistics.
2929
+ Fix for *N* in header showing 1st group's size rather than global size.
3030
+ Fix for weights not being applied correctly in by-group processing.
31+
- `define_keywords()` now uses RStudio's api for dialogs.
32+
- `llabel()` wrapper added for `label(x, all = TRUE)`
3133

3234
# summarytools 1.0.2 (2022-07-10)
3335
- Github-only release

R/args_validation.R

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,15 @@ check_args <- function(mc, dotArgs, caller) {
168168
if (!isTRUE(test_character(pf$na.val, any.missing = FALSE, len = 1))) {
169169
errmsg %+=% "invalid na.val value; must be character of length 1"
170170
}
171-
if (anyNA(pf$x) ||
172-
(is.factor(pf$x) && !pf$na.val %in% levels(pf$x)) ||
173-
(is.character(pf$x) && !pf$na.val %in% pf$x)) {
171+
172+
# If na.val is specified and one of two conditions is TRUE, we set it
173+
# back to its default (NULL) value:
174+
# - There are <NA> values in the vector/factor or
175+
# - na.val is not in a factor's levels
176+
if ((nas_found <- anyNA(pf$x)) ||
177+
(is.factor(pf$x) && !pf$na.val %in% levels(pf$x))) {
178+
if (nas_found && !isTRUE(st_options("freq.silent")))
179+
message("NA values detected - na.val will be ignored")
174180
pf$na.val <- NULL
175181
}
176182
}
@@ -265,13 +271,11 @@ check_args <- function(mc, dotArgs, caller) {
265271
errmsg %+=% "invalid na.val value; must be character of length 1"
266272
}
267273
if (anyNA(pf$x) ||
268-
(is.factor(pf$x) && !pf$na.val %in% levels(pf$x)) ||
269-
(is.character(pf$x) && !pf$na.val %in% pf$x)) {
274+
(is.factor(pf$x) && !pf$na.val %in% levels(pf$x))) {
270275
pf$na.val.x <- NULL
271276
}
272277
if (anyNA(pf$y) ||
273-
(is.factor(pf$y) && !pf$na.val %in% levels(pf$y)) ||
274-
(is.character(pf$y) && !pf$na.val %in% pf$y)) {
278+
(is.factor(pf$y) && !pf$na.val %in% levels(pf$y))) {
275279
pf$na.val.y <- NULL
276280
}
277281
}

R/ctable.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,18 +261,18 @@ ctable <- function(x,
261261
# Replace values == na.val by NA in factors & char vars
262262
if (!is.null(na.val.x)) {
263263
if (is.factor(x)) {
264-
x[which(x == na.val.x)] <- NA
265-
levels(x)[which(levels(x) == na.val.x)] <- NA
264+
x[x == na.val.x] <- NA
265+
levels(x)[levels(x) == na.val.x] <- NA
266266
} else if (is.character(x)) {
267-
x[which(x == na.val.x)] <- NA
267+
x[x == na.val.x] <- NA
268268
}
269269
}
270270
if (!is.null(na.val.y)) {
271271
if (is.factor(y)) {
272-
y[which(x == na.val.y)] <- NA
273-
levels(x)[which(levels(y) == na.val.y)] <- NA
272+
y[y == na.val.y] <- NA
273+
levels(y)[levels(y) == na.val.y] <- NA
274274
} else if (is.character(y)) {
275-
y[which(y == na.val.y)] <- NA
275+
y[y == na.val.y] <- NA
276276
}
277277
}
278278

@@ -366,9 +366,9 @@ ctable <- function(x,
366366
colnames(prop_table)[is.na(colnames(prop_table))] <- "<NA>"
367367
}
368368
} else {
369-
colnames(freq_table)[is.na(colnames(freq_table))] <- "<NA>"
369+
colnames(freq_table)[is.na(colnames(freq_table))] <- na.val.y
370370
if (prop != "n") {
371-
colnames(prop_table)[is.na(colnames(prop_table))] <- "<NA>"
371+
colnames(prop_table)[is.na(colnames(prop_table))] <- na.val.y
372372
}
373373
}
374374
}

R/define_keywords.R

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -217,53 +217,52 @@ define_keywords <- function(..., ask = TRUE, file = NA) {
217217
dialog_error <- FALSE
218218
if (isTRUE(resp)) {
219219
while (!filename_ok) {
220-
dir_name <- rstudioapi::selectDirectory(
221-
caption = "Choose directory for custom language file"
220+
filename <- rstudioapi::selectFile(
221+
caption = "Save custom language file",
222+
label = "Save",#Enter file name, including extension (.csv)",
223+
path = getwd(),
224+
filter = "CSV file (*.csv)",
225+
existing = FALSE
222226
)
223-
if (is.null(dir_name)) {
224-
# dialog cancelled
227+
if (is.null(filename)) {
228+
# dialog cancelled -- validate cancel action
225229
filename <- ""
226230
filename_ok <- TRUE
227-
message("Custom language file not saved")
228231
} else {
229-
filename <- rstudioapi::showPrompt(
230-
title = "Custom language file",
231-
message = "Enter file name, including extension (.csv)",
232-
default = "custom_lang.csv"
233-
)
234-
if (is.null(filename)) {
235-
# dialog cancelled -- validate cancel action
236-
filename <- ""
237-
filename_ok <- !rstudioapi::showQuestion(
238-
title = "Custom language file",
239-
message = "Invalid file name.",
232+
# make sure csv / txt extension is there
233+
if (!grepl("\\.(csv|txt)$", filename)) {
234+
rv <- rstudioapi::showQuestion(
235+
title = "Save custom language file",
236+
message = "Invalid file extension (must be .csv)",
237+
ok = "Retry"
238+
)
239+
if (rv == "cancel") {
240+
filename <- ""
241+
filename_ok <- TRUE
242+
} else {
243+
filename_ok <- FALSE
244+
next
245+
}
246+
}
247+
filename <- normalizePath(filename, mustWork = FALSE)
248+
if (!isTRUE(check_path_for_output(filename,
249+
overwrite = TRUE))) {
250+
rv <- rstudioapi::showQuestion(
251+
title = "Custom language file",
252+
message = "Invalid file name or location",
240253
ok = "Retry",
241254
cancel = "Cancel"
242255
)
243-
} else {
244-
filename <- normalizePath(
245-
paste(dir_name, filename, sep = .Platform$file.sep),
246-
mustWork = FALSE
247-
)
248-
if (!isTRUE(check_path_for_output(filename,
249-
overwrite = TRUE))) {
250-
rv <- rstudioapi::showQuestion(
251-
title = "Custom language file",
252-
message = "Invalid file name or location",
253-
ok = "Retry",
254-
cancel = "Cancel"
255-
)
256-
if (!rv) {
257-
# cancel confirmed
258-
filename <- ""
259-
filename_ok <- TRUE
260-
}
261-
} else {
262-
# Filename is valid
256+
if (!rv) {
257+
# cancel confirmed
258+
filename <- ""
263259
filename_ok <- TRUE
264260
}
265-
}
266-
}
261+
} else {
262+
# Filename is valid
263+
filename_ok <- TRUE
264+
}
265+
}
267266
} # while
268267
} # save yes
269268
}

R/descr.R

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,12 @@ descr.default <- function(x,
352352
output <- as.data.frame(t(results[ ,-1]))
353353
colnames(output) <- results$stat
354354
} else {
355-
output <- xx %>%
356-
summarise_all(.funs = summar_funs, na.rm = na.rm) %>%
357-
as.data.frame
355+
# Suppress warnings for groups having 0 valid values
356+
suppressWarnings({
357+
output <- xx %>%
358+
summarise_all(.funs = summar_funs, na.rm = na.rm) %>%
359+
as.data.frame
360+
})
358361
rownames(output) <- parse_info$var_name %||% var_names
359362
}
360363

@@ -371,12 +374,14 @@ descr.default <- function(x,
371374
}
372375

373376
if ("pct.valid" %in% stats) {
374-
output$pct.valid <- output$n.valid *100 / nrow(xx)
377+
output$pct.valid <- output$n.valid * 100 / nrow(xx)
375378
}
376379

377380
# Apply corrections where n.valid = 0
378381
zerows <- which(output$n.valid == 0)
379-
output[zerows, setdiff(stats, "n.valid")] <- NA
382+
if (length(zerows)) {
383+
warning("no non-missing arguments to numerical functions")
384+
}
380385

381386
} else {
382387

@@ -464,28 +469,33 @@ descr.default <- function(x,
464469
}
465470

466471
# Calculate and insert stats into output dataframe
467-
output[i, ] <-
468-
c(ifelse("mean" %in% stats, variable.mean, NA),
469-
ifelse("sd" %in% stats, variable.sd, NA),
470-
ifelse("min" %in% stats, min(variable, na.rm = na.rm), NA),
471-
ifelse("med" %in% stats, weightedMedian(variable, weights_tmp,
472-
refine = TRUE,
473-
na.rm = na.rm), NA),
474-
ifelse("max" %in% stats, max(variable, na.rm = na.rm), NA),
475-
ifelse("mad" %in% stats, weightedMad(variable, weights_tmp,
476-
refine = TRUE,
477-
na.rm = na.rm), NA),
478-
ifelse("cv" %in% stats, variable.sd/variable.mean, NA),
479-
ifelse("n.valid" %in% stats, n_valid, NA),
480-
ifelse("n" %in% stats, n, NA),
481-
ifelse("pct.valid" %in% stats, p_valid * 100, NA))
472+
# (suppress repeated warnings when no non-missing data)
473+
suppressWarnings({
474+
output[i, ] <-
475+
c(ifelse("mean" %in% stats, variable.mean, NA),
476+
ifelse("sd" %in% stats, variable.sd, NA),
477+
ifelse("min" %in% stats, min(variable, na.rm = na.rm), NA),
478+
ifelse("med" %in% stats, weightedMedian(variable, weights_tmp,
479+
refine = TRUE,
480+
na.rm = na.rm), NA),
481+
ifelse("max" %in% stats, max(variable, na.rm = na.rm), NA),
482+
ifelse("mad" %in% stats, weightedMad(variable, weights_tmp,
483+
refine = TRUE,
484+
na.rm = na.rm), NA),
485+
ifelse("cv" %in% stats, variable.sd/variable.mean, NA),
486+
ifelse("n.valid" %in% stats, n_valid, NA),
487+
ifelse("n" %in% stats, n, NA),
488+
ifelse("pct.valid" %in% stats, p_valid * 100, NA))
489+
})
482490
}
483491

484492
rownames(output) <- var_names
485493

486494
# Apply corrections where n.valid = 0
487495
zerows <- which(output$n.valid == 0)
488-
output[zerows, setdiff(stats, "n.valid")] <- NA
496+
if (length(zerows)) {
497+
warning("no non-missing arguments to numerical functions")
498+
}
489499
}
490500

491501
# Prepare output data -------------------------------------------------------
@@ -594,8 +604,11 @@ descr.default <- function(x,
594604

595605
attr(output, "lang") <- st_options("lang")
596606

597-
if (!is.null(ignored))
598-
attr(output, "ignored") <- ignored
607+
if (!is.null(ignored)) {
608+
if (length(ignored <- setdiff(ignored, data_info$by_var))) {
609+
attr(output, "ignored") <- ignored
610+
}
611+
}
599612

600613
return(output)
601614
}

0 commit comments

Comments
 (0)