JRaviLab · the-mayer · Dec 19, 2025 · Oct 8, 2024 · Oct 8, 2024 · Oct 10, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -259,6 +259,7 @@ importFrom(stats,na.omit)
 importFrom(stringi,stri_extract_all_regex)
 importFrom(stringi,stri_replace_all_regex)
 importFrom(stringr,coll)
+importFrom(stringr,regex)
 importFrom(stringr,str_count)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_glue)

diff --git a/R/cleanup.R b/R/cleanup.R
@@ -164,9 +164,10 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
     prot <- prot %>%
         as_tibble() %>%
         # filter(grepl("\\*", {{by_column}})) %>%		  # Keep only rows with Query (*) for GenContext
-        filter(!grepl("^-$", {{ by_column }})) %>% # remove "-"
-        filter(!grepl("^NA$", {{ by_column }})) %>% # remove "NA"
-        filter(!grepl("^$", {{ by_column }})) # remove empty rows
+        filter(!grepl("^-$", .[[by_column]])) %>%   # remove "-"
+        filter(!grepl("^NA$", .[[by_column]])) %>%  # remove "NA"
+        filter(!grepl("^$", .[[by_column]])) %>%    # remove empty rows
+        filter(!grepl("^\\s*$", .[[by_column]]))     # remove rows with only spaces
 
     return(prot)
 }
@@ -191,7 +192,7 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
 #' @export
 #'
 #' @importFrom dplyr pull mutate
-#' @importFrom stringr str_replace_all
+#' @importFrom stringr str_replace_all regex
 #' @importFrom rlang .data :=
 #'
 #' @examples
@@ -201,29 +202,19 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
 condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots = c()) {
     # If there are strings that condenseRepeatedDomains should not affect, the pattern to search
     # for must be changed to exclude a search for those desired strings
-
-    collapsed_prots <- paste0(excluded_prots, collapse = "\\s|")
-    regex_exclude <- paste0("(?!", collapsed_prots, "\\s)")
-    regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([a-z0-9_-]+)\\b(?:\\s+\\1\\b)+")
-
-    # !! FUNS is soft-deprecated. FIX!!!
-    prot <- prot %>%
-        dplyr::mutate(!!by_column := stringr::str_replace_all(
-            .data[[by_column]],
-            c(
-                "\\." = "_d_",
-                " " = "_",
-                "\\+" = " ",
-                "-" = "__",
-                regex_identify_repeats = "\\1(s)",
-                "__" = "-",
-                " " = "+",
-                "_d_" = "."
-            )
-        ))
+    collapsed_prots <- paste0(excluded_prots, collapse = "|")
+    regex_exclude <- if (length(excluded_prots)) paste0("(?!", collapsed_prots, "\\b)") else ""
+
+    # Allow + or space (or combinations) as delimiters
+    regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([A-Za-z0-9_-]+)\\b(?:[+\\s]+\\1\\b)+")
+
+    prot <-
+        prot %>%
+        mutate(
+            !!by_column := str_replace_all(.data[[by_column]], regex(regex_identify_repeats), "\\1(s)")
+        )
 
     return(prot)
-
 }
 
 
@@ -731,7 +722,7 @@ selectLongestDuplicate <- function(prot, column) {
     # grab all the longest rows
     unique_dups <- prot %>%
         filter(!.data$row.orig %in% remove_rows) %>%
-        select(-.data$row.orig)
+        select(-"row.orig")
 
     return(unique_dups)
 }

diff --git a/R/fa2domain.R b/R/fa2domain.R
@@ -185,7 +185,7 @@ createIPRScanDomainTable <- function(
             id_domain = stringr::str_glue("{AccNum}-{DB.ID}-{StartLoc}_{StopLoc}")
         ) |>
         dplyr::ungroup() |>
-        dplyr::relocate(.data$id_domain, .before = 1)
+        dplyr::relocate('id_domain', .before = 1)
     return(df_iprscan_domains)
 }