@@ -399,7 +399,7 @@ ColSeeker <- function(data=rawdata,
399399# ' @param nrows number of rows (30) before splitting.
400400# ' @param ncols number of columns (100) before splitting.
401401# ' @param caption header.
402- # ' @param ... Further arguments passed to [kable].
402+ # ' @param ... Further arguments passed to [knitr:: kable].
403403# ' @return No return value, called for side effects.
404404# '
405405# ' @examples
@@ -448,7 +448,7 @@ print_kable <- function(t, nrows = 30, caption = "",
448448# ' @param innercaption subheader
449449# ' @param caption header
450450# ' @param foot footnote
451- # ' @param escape see kable
451+ # ' @param escape see [knitr:: kable]
452452# '
453453# '@return A character vector of the table source code.
454454# ' @export
@@ -590,3 +590,112 @@ flex2rmd <- function(ft){
590590 return (flextable_to_rmd(ft ))
591591 }
592592}
593+
594+
595+ # ' Find and optionally remove identical columns in a data frame.
596+ # '
597+ # ' This function identifies columns with identical values in a data frame and
598+ # ' provides options to remove them, clean column names, and print the duplicated groups.
599+ # ' It also includes an interactive mode where the user can choose to remove all,
600+ # ' some, or none of the duplicated columns.
601+ # '
602+ # ' @param df A data frame or tibble.
603+ # ' @param interactive Logical. If TRUE, the function prompts the user to choose how
604+ # ' to handle duplicated columns. Defaults to TRUE.
605+ # ' @param remove_duplicates Logical. If TRUE, removes duplicated columns. Defaults to TRUE.
606+ # ' @param clean_names Logical. If TRUE, cleans column names by removing trailing
607+ # ' "..." followed by digits. Defaults to TRUE.
608+ # ' @param print_duplicates Logical. If TRUE, prints the groups of duplicated columns.
609+ # ' Defaults to TRUE.
610+ # '
611+ # ' @return A data frame with optionally removed and renamed columns.
612+ # '
613+ # ' @examples
614+ # ' library(tibble)
615+ # '
616+ # ' dummy <- tibble(
617+ # ' A...1 = rnorm(10),
618+ # ' A...2 = A...1,
619+ # ' C = sample(letters, 10),
620+ # ' A...4 = A...1,
621+ # ' E = sample(1:10, 10),
622+ # ' `F` = C
623+ # ' )
624+ # '
625+ # ' # Example usage:
626+ # ' identical_cols(dummy) # Interactive removal
627+ # ' identical_cols(dummy, remove_duplicates = FALSE) # Find identical columns only
628+ # ' identical_cols(dummy, print_duplicates = FALSE) # Interactive removal, no print
629+ # ' identical_cols(dummy, clean_names = FALSE) # Interactive removal, no clean names
630+ # ' identical_cols(dummy, interactive = FALSE) #Non interactive removal of all duplicates.
631+ # '
632+ # ' @export
633+ identical_cols <- function (df ,
634+ interactive = TRUE ,
635+ remove_duplicates = TRUE ,
636+ clean_names = TRUE ,
637+ print_duplicates = TRUE ) {
638+ col_names <- names(df )
639+ identical.cols <-
640+ purrr :: map(col_names , function (current_col ) {
641+ col_names [purrr :: map_lgl(df , ~ identical(.x , df [[current_col ]]))]
642+ })
643+ names(identical.cols ) <- col_names
644+ duplicated_groups <- unique(identical.cols [purrr :: map_lgl(identical.cols , ~ length(.x ) > 1 )])
645+
646+ if (print_duplicates ) {
647+ print(duplicated_groups )
648+ }
649+
650+ if (remove_duplicates ) {
651+ if (interactive &
652+ length(duplicated_groups ) > 0 ) {
653+ user_choice <- readline(" Remove (a)ll, (s)ome, or (n)one of the duplicates? (a/s/n): " )
654+ } else {
655+ user_choice = " a"
656+ }
657+ if (user_choice == " a" ) {
658+ cols_to_remove <- col_names [col_names %in% unlist(
659+ purrr :: map(duplicated_groups , ~ .x [- 1 ])
660+ )]
661+ cols_to_keep <-
662+ col_names [! col_names %in% unlist(
663+ purrr :: map(duplicated_groups , ~ .x [- 1 ])
664+ )]
665+ df <- df [, cols_to_keep , drop = FALSE ]
666+ if (clean_names ) {
667+ df <- rename_with(
668+ df ,
669+ .cols = all_of(cols_to_keep ),
670+ .fn = ~ str_remove(.x , " \\ .{3}\\ d+$" )
671+ )
672+ }
673+ } else if (user_choice == " s" ) {
674+ cn2rename <- " "
675+ cols_to_remove <- character (0 )
676+ for (group in duplicated_groups ) {
677+ cat(paste0(" Duplicate group:\n " , paste0(" - " ,
678+ group ,
679+ collapse = " \n " )))
680+ remove_group <- readline(" Remove this duplication? (y/n): " )
681+
682+ if (remove_group == " y" ) {
683+ cols_to_remove <- c(cols_to_remove , group [- 1 ])
684+ cn2rename <- c(cn2rename ,group [1 ])
685+ }
686+ }
687+ cols_to_keep <- col_names [! col_names %in% cols_to_remove ]
688+ df <- df [, cols_to_keep , drop = FALSE ]
689+ if (clean_names ) {
690+ df <- rename_with(
691+ df ,
692+ .cols = any_of(cn2rename ),
693+ .fn = ~ str_remove(.x , " \\ .{3}\\ d+$" )
694+ )
695+ }
696+ } else {
697+ cols_to_keep <- col_names
698+ }
699+ return (df )
700+ }
701+ }
0 commit comments