@@ -192,7 +192,7 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
192192# ' @export
193193# '
194194# ' @importFrom dplyr pull mutate
195- # ' @importFrom stringr str_replace_all
195+ # ' @importFrom stringr str_replace_all regex
196196# ' @importFrom rlang .data :=
197197# '
198198# ' @examples
@@ -202,29 +202,19 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
202202condenseRepeatedDomains <- function (prot , by_column = " DomArch" , excluded_prots = c()) {
203203 # If there are strings that condenseRepeatedDomains should not affect, the pattern to search
204204 # for must be changed to exclude a search for those desired strings
205-
206- collapsed_prots <- paste0(excluded_prots , collapse = " \\ s|" )
207- regex_exclude <- paste0(" (?!" , collapsed_prots , " \\ s)" )
208- regex_identify_repeats <- paste0(" (?i)" , regex_exclude , " \\ b([a-z0-9_-]+)\\ b(?:\\ s+\\ 1\\ b)+" )
209-
210- # !! FUNS is soft-deprecated. FIX!!!
211- prot <- prot %> %
212- dplyr :: mutate(!! by_column : = stringr :: str_replace_all(
213- .data [[by_column ]],
214- c(
215- " \\ ." = " _d_" ,
216- " " = " _" ,
217- " \\ +" = " " ,
218- " -" = " __" ,
219- regex_identify_repeats = " \\ 1(s)" ,
220- " __" = " -" ,
221- " " = " +" ,
222- " _d_" = " ."
223- )
224- ))
205+ collapsed_prots <- paste0(excluded_prots , collapse = " |" )
206+ regex_exclude <- if (length(excluded_prots )) paste0(" (?!" , collapsed_prots , " \\ b)" ) else " "
207+
208+ # Allow + or space (or combinations) as delimiters
209+ regex_identify_repeats <- paste0(" (?i)" , regex_exclude , " \\ b([A-Za-z0-9_-]+)\\ b(?:[+\\ s]+\\ 1\\ b)+" )
210+
211+ prot <-
212+ prot %> %
213+ mutate(
214+ !! by_column : = str_replace_all(.data [[by_column ]], regex(regex_identify_repeats ), " \\ 1(s)" )
215+ )
225216
226217 return (prot )
227-
228218}
229219
230220
0 commit comments