Skip to content

Commit 3e58486

Browse files
committed
refactor condenseRepeatedDomains()
- regex matches domains separated by '+' OR whitespace - simplified replacement logic
1 parent 6f148a0 commit 3e58486

File tree

2 files changed

+13
-22
lines changed

2 files changed

+13
-22
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ importFrom(stats,na.omit)
259259
importFrom(stringi,stri_extract_all_regex)
260260
importFrom(stringi,stri_replace_all_regex)
261261
importFrom(stringr,coll)
262+
importFrom(stringr,regex)
262263
importFrom(stringr,str_count)
263264
importFrom(stringr,str_detect)
264265
importFrom(stringr,str_glue)

R/cleanup.R

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
192192
#' @export
193193
#'
194194
#' @importFrom dplyr pull mutate
195-
#' @importFrom stringr str_replace_all
195+
#' @importFrom stringr str_replace_all regex
196196
#' @importFrom rlang .data :=
197197
#'
198198
#' @examples
@@ -202,29 +202,19 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
202202
condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) {
203203
# If there are strings that condenseRepeatedDomains should not affect, the pattern to search
204204
# for must be changed to exclude a search for those desired strings
205-
206-
collapsed_prots <- paste0(excluded_prots, collapse = "\\s|")
207-
regex_exclude <- paste0("(?!", collapsed_prots, "\\s)")
208-
regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([a-z0-9_-]+)\\b(?:\\s+\\1\\b)+")
209-
210-
# !! FUNS is soft-deprecated. FIX!!!
211-
prot <- prot %>%
212-
dplyr::mutate(!!by_column := stringr::str_replace_all(
213-
.data[[by_column]],
214-
c(
215-
"\\." = "_d_",
216-
" " = "_",
217-
"\\+" = " ",
218-
"-" = "__",
219-
regex_identify_repeats = "\\1(s)",
220-
"__" = "-",
221-
" " = "+",
222-
"_d_" = "."
223-
)
224-
))
205+
collapsed_prots <- paste0(excluded_prots, collapse = "|")
206+
regex_exclude <- if (length(excluded_prots)) paste0("(?!", collapsed_prots, "\\b)") else ""
207+
208+
# Allow + or space (or combinations) as delimiters
209+
regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([A-Za-z0-9_-]+)\\b(?:[+\\s]+\\1\\b)+")
210+
211+
prot <-
212+
prot %>%
213+
mutate(
214+
!!by_column := str_replace_all(.data[[by_column]], regex(regex_identify_repeats), "\\1(s)")
215+
)
225216

226217
return(prot)
227-
228218
}
229219

230220

0 commit comments

Comments
 (0)