Skip to content

Commit 3fc7650

Browse files
committed
0.9.7 a new fct identical_cols
1 parent c0238c1 commit 3fc7650

File tree

11 files changed

+183
-12
lines changed

11 files changed

+183
-12
lines changed

CRAN-SUBMISSION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Version: 0.9.6
2-
Date: 2024-10-10 09:11:05 UTC
3-
SHA: 36d1276af66be31fa3c87d390016725f810962b8
2+
Date: 2024-10-10 11:37:51 UTC
3+
SHA: c0238c1521c02218a6308200db1b5091b27e13ce

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ Description: The main functionalities of 'wrappedtools' are:
1010
descriptive statistics and p-values; creating specialized plots for
1111
correlation matrices. Functions were mainly written for my own daily work or
1212
teaching, but may be of use to others as well.
13-
Version: 0.9.6
14-
Date: 2024-10-10
13+
Version: 0.9.7
14+
Date: 2025-03-25
1515
Authors@R: c(
1616
person(given = "Andreas",
1717
family = "Busjahn",

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export(flex2rmd)
1919
export(formatP)
2020
export(ggcormat)
2121
export(glmCI)
22+
export(identical_cols)
2223
export(ksnormal)
2324
export(label_outliers)
2425
export(logrange_1)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#wrappedtools 0.9.7
2+
- function identical_cols to find and remove duplicated columns
3+
14
#wrappedtools 0.9.6
25
- function ksnormal now uses Lilliefors test by default
36
- example for compare_n_numvars was corrected

R/basefunctions.R

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ ColSeeker <- function(data=rawdata,
399399
#' @param nrows number of rows (30) before splitting.
400400
#' @param ncols number of columns (100) before splitting.
401401
#' @param caption header.
402-
#' @param ... Further arguments passed to [kable].
402+
#' @param ... Further arguments passed to [knitr::kable].
403403
#' @return No return value, called for side effects.
404404
#'
405405
#' @examples
@@ -448,7 +448,7 @@ print_kable <- function(t, nrows = 30, caption = "",
448448
#' @param innercaption subheader
449449
#' @param caption header
450450
#' @param foot footnote
451-
#' @param escape see kable
451+
#' @param escape see [knitr::kable]
452452
#'
453453
#'@return A character vector of the table source code.
454454
#' @export
@@ -590,3 +590,112 @@ flex2rmd <- function(ft){
590590
return(flextable_to_rmd(ft))
591591
}
592592
}
593+
594+
595+
#' Find and optionally remove identical columns in a data frame.
596+
#'
597+
#' This function identifies columns with identical values in a data frame and
598+
#' provides options to remove them, clean column names, and print the duplicated groups.
599+
#' It also includes an interactive mode where the user can choose to remove all,
600+
#' some, or none of the duplicated columns.
601+
#'
602+
#' @param df A data frame or tibble.
603+
#' @param interactive Logical. If TRUE, the function prompts the user to choose how
604+
#' to handle duplicated columns. Defaults to TRUE.
605+
#' @param remove_duplicates Logical. If TRUE, removes duplicated columns. Defaults to TRUE.
606+
#' @param clean_names Logical. If TRUE, cleans column names by removing trailing
607+
#' "..." followed by digits. Defaults to TRUE.
608+
#' @param print_duplicates Logical. If TRUE, prints the groups of duplicated columns.
609+
#' Defaults to TRUE.
610+
#'
611+
#' @return A data frame with optionally removed and renamed columns.
612+
#'
613+
#' @examples
614+
#' library(tibble)
615+
#'
616+
#' dummy <- tibble(
617+
#' A...1 = rnorm(10),
618+
#' A...2 = A...1,
619+
#' C = sample(letters, 10),
620+
#' A...4 = A...1,
621+
#' E = sample(1:10, 10),
622+
#' `F` = C
623+
#' )
624+
#'
625+
#' # Example usage:
626+
#' identical_cols(dummy) # Interactive removal
627+
#' identical_cols(dummy, remove_duplicates = FALSE) # Find identical columns only
628+
#' identical_cols(dummy, print_duplicates = FALSE) # Interactive removal, no print
629+
#' identical_cols(dummy, clean_names = FALSE) # Interactive removal, no clean names
630+
#' identical_cols(dummy, interactive = FALSE) #Non interactive removal of all duplicates.
631+
#'
632+
#' @export
633+
identical_cols <- function(df,
634+
interactive = TRUE,
635+
remove_duplicates = TRUE,
636+
clean_names = TRUE,
637+
print_duplicates = TRUE) {
638+
col_names <- names(df)
639+
identical.cols <-
640+
purrr::map(col_names, function(current_col) {
641+
col_names[purrr::map_lgl(df, ~ identical(.x, df[[current_col]]))]
642+
})
643+
names(identical.cols) <- col_names
644+
duplicated_groups <- unique(identical.cols[purrr::map_lgl(identical.cols, ~ length(.x) > 1)])
645+
646+
if (print_duplicates) {
647+
print(duplicated_groups)
648+
}
649+
650+
if (remove_duplicates) {
651+
if(interactive &
652+
length(duplicated_groups) > 0) {
653+
user_choice <- readline("Remove (a)ll, (s)ome, or (n)one of the duplicates? (a/s/n): ")
654+
} else{
655+
user_choice = "a"
656+
}
657+
if (user_choice == "a") {
658+
cols_to_remove <- col_names[col_names %in% unlist(
659+
purrr::map(duplicated_groups, ~ .x[-1])
660+
)]
661+
cols_to_keep <-
662+
col_names[!col_names %in% unlist(
663+
purrr::map(duplicated_groups, ~ .x[-1])
664+
)]
665+
df <- df[, cols_to_keep, drop = FALSE]
666+
if (clean_names) {
667+
df <- rename_with(
668+
df,
669+
.cols = all_of(cols_to_keep),
670+
.fn = ~ str_remove(.x, "\\.{3}\\d+$")
671+
)
672+
}
673+
} else if (user_choice == "s") {
674+
cn2rename <- ""
675+
cols_to_remove <- character(0)
676+
for (group in duplicated_groups) {
677+
cat(paste0("Duplicate group:\n", paste0("- ",
678+
group,
679+
collapse = "\n")))
680+
remove_group <- readline("Remove this duplication? (y/n): ")
681+
682+
if (remove_group == "y") {
683+
cols_to_remove <- c(cols_to_remove, group[-1])
684+
cn2rename <- c(cn2rename,group[1])
685+
}
686+
}
687+
cols_to_keep <- col_names[!col_names %in% cols_to_remove]
688+
df <- df[, cols_to_keep, drop = FALSE]
689+
if (clean_names) {
690+
df <- rename_with(
691+
df,
692+
.cols = any_of(cn2rename),
693+
.fn = ~ str_remove(.x, "\\.{3}\\d+$")
694+
)
695+
}
696+
} else {
697+
cols_to_keep <- col_names
698+
}
699+
return(df)
700+
}
701+
}

R/tests.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -870,13 +870,13 @@ compare_n_qualvars <- function(data, dep_vars, indep_var,
870870
#'
871871
#' \code{pairwise_wilcox_test} calculates pairwise comparisons on ordinal data
872872
#' between all group levels with corrections for multiple testing based on
873-
#' \link{wilcox_test} from package 'coin'.
873+
#' [coin::wilcox_test] from package 'coin'.
874874
#'
875875
#' @param dep_var dependent variable, containing the data.
876876
#' @param indep_var independent variable, should be factor.
877877
#' @param strat_var optional factor for stratification.
878878
#' @param adjmethod method for adjusting p values (see [p.adjust])
879-
#' @param distr Computation of p-values, see \link{wilcox_test}.
879+
#' @param distr Computation of p-values, see [coin::wilcox_test].
880880
#' @param plevel threshold for significance.
881881
#' @param symbols predefined as b,c, d...; provides footnotes to mark group
882882
#' differences, e.g. b means different from group 2.

man/identical_cols.Rd

Lines changed: 57 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/pairwise_wilcox_test.Rd

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/pdf_kable.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/print_kable.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)