Skip to content

Commit 8864ee2

Browse files
committed
Adjust getIPRScanColNames() and tests
- document package data - use package data in function - adjust test to validate ingested sample colnames References: JRaviLab#75 (comment) Co-authored-by: Jananiravi <jananiravi@users.noreply.github.com>
1 parent 7ca0c57 commit 8864ee2

File tree

5 files changed

+73
-15
lines changed

5 files changed

+73
-15
lines changed

R/fa2domain.R

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,10 @@ runIPRScan <- function(
6565
#' (based upon the global variable written in
6666
#' molevol_scripts/R/colnames_molevol.R)
6767
#'
68-
#' @return [chr] interproscan column names used throughout molevolvr
68+
#' @return [chr] interproscan column names used throughout MolEvolvR
6969
getIPRScanColNames <- function() {
70-
column_names <- c(
71-
"AccNum", "SeqMD5Digest", "SLength", "Analysis",
72-
"DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score",
73-
"Status", "RunDate", "IPRAcc", "IPRDesc"
74-
)
75-
return(column_names)
70+
data("ipr_colnames", package = "MolEvolvR", envir = environment())
71+
ipr_colnames
7672
}
7773

7874
#' construct column types for reading interproscan output TSVs

R/ipr_colnames.R

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#' InterProScan Column Names
2+
#'
3+
#' A character vector containing the expected column names from an
4+
#' InterProScan output table. This dataset is useful for validating,
5+
#' parsing, or reconstructing data frames produced by InterProScan.
6+
#'
7+
#' @format A character vector with 13 elements:
8+
#' \describe{
9+
#' \item{AccNum}{Accession number of the sequence.}
10+
#' \item{SeqMD5Digest}{MD5 digest of the sequence.}
11+
#' \item{SLength}{Length of the sequence.}
12+
#' \item{Analysis}{Type of analysis or database used (e.g., Pfam, SMART).}
13+
#' \item{DB.ID}{Database-specific identifier.}
14+
#' \item{SignDesc}{Description of the signature or domain.}
15+
#' \item{StartLoc}{Start position of the match on the sequence.}
16+
#' \item{StopLoc}{Stop position of the match on the sequence.}
17+
#' \item{Score}{Score assigned to the match (if applicable).}
18+
#' \item{Status}{Status of the analysis (e.g., OK, WARNING).}
19+
#' \item{RunDate}{Date the InterProScan analysis was run.}
20+
#' \item{IPRAcc}{InterPro accession number.}
21+
#' \item{IPRDesc}{InterPro entry description.}
22+
#' }
23+
#'
24+
#' @source Generated internally to represent standard InterProScan output fields.
25+
#' @examples
26+
#' data(ipr_colnames)
27+
#' ipr_colnames
28+
"ipr_colnames"

man/getIPRScanColNames.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ipr_colnames.Rd

Lines changed: 40 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-fa2domain.R

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,9 @@ test_that("fa2domain", {
7979
# Check that the result is a character vector
8080
expect_type(col_names, "character")
8181

82-
# Define the expected column names
83-
expected_col_names <- c(
84-
"AccNum", "SeqMD5Digest", "SLength", "Analysis",
85-
"DB.ID", "SignDesc", "StartLoc", "StopLoc", "Score",
86-
"Status", "RunDate", "IPRAcc", "IPRDesc"
87-
)
8882

8983
# Check that the column names match exactly
90-
expect_equal(col_names, expected_col_names)
84+
expect_equal(colnames(df_ipr), col_names)
9185
expect_type(col_names, "character")
9286

9387
# Ensure there are exactly 13 columns

0 commit comments

Comments
 (0)