@@ -164,9 +164,10 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
164164 prot <- prot %> %
165165 as_tibble() %> %
166166 # filter(grepl("\\*", {{by_column}})) %>% # Keep only rows with Query (*) for GenContext
167- filter(! grepl(" ^-$" , {{ by_column }})) %> % # remove "-"
168- filter(! grepl(" ^NA$" , {{ by_column }})) %> % # remove "NA"
169- filter(! grepl(" ^$" , {{ by_column }})) # remove empty rows
167+ filter(! grepl(" ^-$" , . [[by_column ]])) %> % # remove "-"
168+ filter(! grepl(" ^NA$" , . [[by_column ]])) %> % # remove "NA"
169+ filter(! grepl(" ^$" , . [[by_column ]])) %> % # remove empty rows
170+ filter(! grepl(" ^\\ s*$" , . [[by_column ]])) # remove rows with only spaces
170171
171172 return (prot )
172173}
@@ -191,7 +192,7 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
191192# ' @export
192193# '
193194# ' @importFrom dplyr pull mutate
194- # ' @importFrom stringr str_replace_all
195+ # ' @importFrom stringr str_replace_all regex
195196# ' @importFrom rlang .data :=
196197# '
197198# ' @examples
@@ -201,29 +202,19 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
201202condenseRepeatedDomains <- function (prot , by_column = " DomArch" , excluded_prots = c()) {
202203 # If there are strings that condenseRepeatedDomains should not affect, the pattern to search
203204 # for must be changed to exclude a search for those desired strings
204-
205- collapsed_prots <- paste0(excluded_prots , collapse = " \\ s|" )
206- regex_exclude <- paste0(" (?!" , collapsed_prots , " \\ s)" )
207- regex_identify_repeats <- paste0(" (?i)" , regex_exclude , " \\ b([a-z0-9_-]+)\\ b(?:\\ s+\\ 1\\ b)+" )
208-
209- # !! FUNS is soft-deprecated. FIX!!!
210- prot <- prot %> %
211- dplyr :: mutate(!! by_column : = stringr :: str_replace_all(
212- .data [[by_column ]],
213- c(
214- " \\ ." = " _d_" ,
215- " " = " _" ,
216- " \\ +" = " " ,
217- " -" = " __" ,
218- regex_identify_repeats = " \\ 1(s)" ,
219- " __" = " -" ,
220- " " = " +" ,
221- " _d_" = " ."
222- )
223- ))
205+ collapsed_prots <- paste0(excluded_prots , collapse = " |" )
206+ regex_exclude <- if (length(excluded_prots )) paste0(" (?!" , collapsed_prots , " \\ b)" ) else " "
207+
208+ # Allow + or space (or combinations) as delimiters
209+ regex_identify_repeats <- paste0(" (?i)" , regex_exclude , " \\ b([A-Za-z0-9_-]+)\\ b(?:[+\\ s]+\\ 1\\ b)+" )
210+
211+ prot <-
212+ prot %> %
213+ mutate(
214+ !! by_column : = str_replace_all(.data [[by_column ]], regex(regex_identify_repeats ), " \\ 1(s)" )
215+ )
224216
225217 return (prot )
226-
227218}
228219
229220
@@ -731,7 +722,7 @@ selectLongestDuplicate <- function(prot, column) {
731722 # grab all the longest rows
732723 unique_dups <- prot %> %
733724 filter(! .data $ row.orig %in% remove_rows ) %> %
734- select(- .data $ row.orig )
725+ select(- " row.orig" )
735726
736727 return (unique_dups )
737728}
0 commit comments