diff --git a/DESCRIPTION b/DESCRIPTION
index 031b194..c55e19e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,9 +5,10 @@ Authors@R: person("Jonathan", "Manning", email = "jonathan.manning@seqera.io", r
Description: Provides Shiny applications for various array and NGS applications.
Currently very RNA-seq centric, with plans for expansion.
Depends:
- R (>= 3.2.2),
+ R (>= 3.4.0),
SummarizedExperiment
License: AGPL (>= 3)
+Encoding: UTF-8
LazyData: true
Imports:
cluster,
@@ -53,6 +54,6 @@ Remotes:
cran/d3heatmap,
pinin4fjords/zhangneurons
biocViews: Software
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
VignetteBuilder: knitr
Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 389cf11..ee3fe7f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(annotatedHeatmap)
export(anova_pca_metadata)
export(barcode_plot)
export(bootstrapMedian)
+export(build_enrichment_path)
export(calculateDendrogram)
export(calculateDist)
export(checkListIsSubset)
diff --git a/R/ExploratorySummarizedExperiment-class.R b/R/ExploratorySummarizedExperiment-class.R
index a171838..5a1c73c 100644
--- a/R/ExploratorySummarizedExperiment-class.R
+++ b/R/ExploratorySummarizedExperiment-class.R
@@ -1,8 +1,6 @@
#' The ExploratorySummarizedExperiment class
#'
-#' Subclass of SummarizedExperiment if present in the SummarizedExperiment
-#' package (newer versions of Bioconductor have moved this from GenomicRanges),
-#' otherwise of SummarizedExperiment0.
+#' Subclass of SummarizedExperiment.
#'
#' @slot idfield character.
#' @slot entrezgenefield character.
@@ -12,21 +10,17 @@
#' @slot gene_set_analyses list.
#' @slot dexseq_results list.
#' @slot read_reports list.
+#' @slot gene_set_analyses_tool list.
#'
#' @export
-setClass("ExploratorySummarizedExperiment", contains = ifelse("SummarizedExperiment" %in% getClasses(where = "package:SummarizedExperiment"), "SummarizedExperiment",
- "SummarizedExperiment0"
-), representation = representation(
+setClass("ExploratorySummarizedExperiment", contains = "SummarizedExperiment", slots = c(
idfield = "character", entrezgenefield = "character", labelfield = "character", contrast_stats = "list",
- assay_measures = "list", gene_set_analyses = "list", dexseq_results = "list", read_reports = "list"
+ assay_measures = "list", gene_set_analyses = "list", dexseq_results = "list", read_reports = "list", gene_set_analyses_tool = "list"
))
setAs("RangedSummarizedExperiment", "ExploratorySummarizedExperiment", function(from) {
- as(
- (as(from, ifelse("SummarizedExperiment" %in% getClasses(where = "package:SummarizedExperiment"), "SummarizedExperiment", "SummarizedExperiment0"))),
- "ExploratorySummarizedExperiment"
- )
+ as(as(from, "SummarizedExperiment"), "ExploratorySummarizedExperiment")
})
#' ExploratorySummarizedExperiments
@@ -63,21 +57,23 @@ setAs("RangedSummarizedExperiment", "ExploratorySummarizedExperiment", function(
#' correspond to 'contrasts' set in the containing SummarizedExperimentList.
#' @param assay_measures Optional List of measures to display related to each
#' assay.
-#' @param gene_set_analyses List of lists of gene set tables keyed first by
-#' gene set
-#' type and secondly by contrast
+#' @param gene_set_analyses Three-level nested lists of gene set tables keyed first by
+#' assay, then by gene set type and then by contrast.
#' @param read_reports A named list of matrices with read counts in columns
#' and sample names in rows. Useful for providing mapped read counts,
#' counts per gene type etc
#' @param dexseq_results An optional list of \code{DEXSeqResults} objects
#' corresponding to the contrasts listed in the \code{contrasts} slot..
+#' @param gene_set_analyses_tool Three-level nested lists of a string, nested as \code{gene_set_analyses}.
+#' Each string may be \code{"auto"} (the default), \code{"gsea"} or \code{"roast"}. It defines the format of the
+#' corresponding \code{gene_set_analyses} table.
#'
#' @return output An ExploratoryRangedSummarizedExperient object
#' @rawNamespace import(SummarizedExperiment, except = 'shift')
#' @export
ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield, labelfield = character(), entrezgenefield = character(), contrast_stats = list(),
- assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), read_reports = list()) {
+ assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), read_reports = list(), gene_set_analyses_tool = list()) {
# Reset NULLs to empty
if (is.null(entrezgenefield)) {
@@ -116,6 +112,9 @@ ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield
annotation <- data.frame(lapply(annotation, as.character), stringsAsFactors = FALSE, check.names = FALSE, row.names = rownames(annotation))[all_rows, ]
+ # Ensure consistency between gene_set_analyses with gene_set_analyses_tool
+ gene_set_analyses_tool <- check_gene_set_analyses_tool_consistency(gene_set_analyses, gene_set_analyses_tool)
+
# Build the object
sumexp <- SummarizedExperiment(assays = assays, colData = DataFrame(colData, check.names = FALSE))
@@ -123,6 +122,75 @@ ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield
new("ExploratorySummarizedExperiment", sumexp,
idfield = idfield, labelfield = labelfield, entrezgenefield = entrezgenefield, assay_measures = assay_measures,
- contrast_stats = contrast_stats, gene_set_analyses = gene_set_analyses, dexseq_results = dexseq_results, read_reports = read_reports
+ contrast_stats = contrast_stats, gene_set_analyses = gene_set_analyses, dexseq_results = dexseq_results, read_reports = read_reports,
+ gene_set_analyses_tool = gene_set_analyses_tool
+ )
+}
+
+#' Ensure consistency between gene_set_analyses and gene_set_analyses_tool structures
+#' @noRd
+#'
+#' @description
+#' Ensures that the structure of \code{gene_set_analyses_tool} matches that of \code{gene_set_analyses},
+#' filling in missing elements as needed. Each entry in \code{gene_set_analyses_tool} should be a string
+#' (e.g., "auto", "gsea", or "roast") corresponding to the format of the associated gene set analysis table.
+#'
+#' @param gene_set_analyses A three-level nested list of gene set tables, keyed by assay, gene set type, and contrast.
+#' @param gene_set_analyses_tool A three-level nested list of strings, structured as \code{gene_set_analyses}, indicating the tool used for each gene set analysis.
+#'
+#' @return A three-level nested list of strings, matching the structure of \code{gene_set_analyses}, with missing elements filled as needed.
+check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) {
+ # gene_set_analyses and gene_set_analyses_tool should have the same list of lists
+ # structure. gene_set_analyses_tool should have a single string
+
+ if (is.null(gene_set_analyses_tool)) {
+ gene_set_analyses_tool <- list()
+ }
+
+ valid_tools <- c("auto", "gsea", "roast")
+
+ safe_get <- function(x, path, default="auto") {
+ # similar to purrr::pluck()
+ if (is.null(x)) {
+ return(default)
+ }
+ for (p in path) {
+ if (!p %in% names(x)) {
+ return(default)
+ }
+ x <- x[[p]]
+ if (is.null(x)) {
+ return(default)
+ }
+ }
+ x
+ }
+
+ lapply(
+ setNames(nm=names(gene_set_analyses)),
+ function(assay_name) {
+ gene_sets <- gene_set_analyses[[assay_name]]
+ lapply(
+ setNames(nm=names(gene_sets)),
+ function(gene_set_name) {
+ contrasts <- gene_sets[[gene_set_name]]
+ lapply(
+ setNames(nm=names(contrasts)),
+ function(contrast_name) {
+ tool_name <- safe_get(gene_set_analyses_tool, c(assay_name, gene_set_name, contrast_name), "auto")
+ if (!is.character(tool_name) || length(tool_name) > 1 || !tool_name %in% valid_tools) {
+ stop(
+ "Invalid gene_set_analyses_tool. gene_set_analyses_tool for ",
+ assay_name, ",", gene_set_name, ",", contrast_name,
+ ". It should be one of 'auto', 'gsea' or 'roast'. Found ",
+ tool_name
+ )
+ }
+ tool_name
+ }
+ )
+ }
+ )
+ }
)
}
diff --git a/R/ExploratorySummarizedExperimentList-class.R b/R/ExploratorySummarizedExperimentList-class.R
index 81b0ee4..c9e0ed4 100644
--- a/R/ExploratorySummarizedExperimentList-class.R
+++ b/R/ExploratorySummarizedExperimentList-class.R
@@ -15,7 +15,7 @@
#'
#' @export
-setClass("ExploratorySummarizedExperimentList", contains = "list", representation = representation(
+setClass("ExploratorySummarizedExperimentList", contains = "list", slots = c(
title = "character", author = "character", description = "character", static_pdf = "character",
group_vars = "character", default_groupvar = "character", contrasts = "list", url_roots = "list", gene_sets = "list", gene_set_id_type = "character", ensembl_species = "character"
))
@@ -149,7 +149,7 @@ ExploratorySummarizedExperimentList <- function(eses, title = "", author = "", d
} else {
# Numeric IDs (like entrez will be cast to integers)
- is_numeric <- all(!is.na(as.numeric(annotation[[gene_set_id_type]])))
+ is_numeric <- all(!is.na(suppressWarnings(as.numeric(annotation[[gene_set_id_type]]))))
}
print("Processing gene sets")
diff --git a/R/accessory.R b/R/accessory.R
index 0ffc434..dc44055 100644
--- a/R/accessory.R
+++ b/R/accessory.R
@@ -555,6 +555,51 @@ eselistFromYAML <- function(configfile) {
eselistFromList(config)
}
+#' Reads gene enrichment files
+#' @noRd
+#' @param contrast_spec One of:
+#' - \code{NULL} (meaning no enrichment was analyzed for that contrast)
+#' - a path to a file (e.g. the table output from roast)
+#' - a named list with elements "up" and "down" with paths to files (e.g.
+#' corresponding to gsea up-regulated and down-regulated output tables).
+#'
+#' The two tables from GSEA output will be combined into a single data frame. A column "Direction" with
+#' values "Up" and "Down" will be added.
+#'
+#' @returns A data frame with the file contents (or \code{NULL})
+#'
+read_enrichment_file <- function(contrast_spec) {
+ # contrast_spec may be one file name or two file names (up and down), or NULL
+ if (is.null(contrast_spec) || length(contrast_spec) == 0) {
+ return(NULL)
+ }
+
+ read_one <- function(path) {
+ read.csv(path, sep = getSeparator(path), check.names = FALSE,
+ stringsAsFactors = FALSE, row.names = 1)
+ }
+
+ if (length(contrast_spec) == 1) {
+ return(read_one(contrast_spec))
+ }
+
+ if (length(contrast_spec) == 2) {
+ # This is useful for GSEA output, that splits up and down in two tsv files.
+ # We read both files and set the direction
+ up <- read_one(contrast_spec[["up"]])
+ if (nrow(up) > 0) {
+ up$Direction <- "Up"
+ }
+ down <- read_one(contrast_spec[["down"]])
+ if (nrow(down) > 0) {
+ down$Direction <- "Down"
+ }
+ return(rbind(up, down))
+ }
+
+ stop("gene_set_analyses should have zero, one or two contrast files per gene_set_type")
+}
+
#' Build an ExploratorySummarisedExperimentList from a description provided in a list
#'
#' @param config Hierachical named list with input components. See \code{eselistFromYAML} for detail.
@@ -682,13 +727,17 @@ eselistfromConfig <-
}
if ("gene_set_analyses" %in% names(exp)) {
+ # Basic list to pass to object creation
+ exp$gene_set_analyses_tool <- check_gene_set_analyses_tool_consistency(exp$gene_set_analyses, exp$gene_set_analyses_tool)
+
ese_list$gene_set_analyses <- lapply(exp$gene_set_analyses, function(assay) {
lapply(assay, function(gene_set_type) {
- lapply(gene_set_type, function(contrast) {
- read.csv(contrast, check.names = FALSE, stringsAsFactors = FALSE, row.names = 1)
- })
+ lapply(gene_set_type, read_enrichment_file)
})
})
+
+ ese_list$gene_set_analyses <- remove_nulls(ese_list$gene_set_analyses)
+ ese_list$gene_set_analyses_tool <- exp$gene_set_analyses_tool
}
do.call(ExploratorySummarizedExperiment, ese_list)
@@ -751,6 +800,19 @@ eselistfromConfig <-
eselist
}
+#' Recursively remove NULL entries from a nested list
+#' @noRd
+#' @param x A list (possibly nested) from which NULL entries should be removed.
+#'
+#' @return The input list with all NULL entries recursively removed.
+remove_nulls <- function(x) {
+ if (is.list(x) && !is.data.frame(x)) {
+ x <- lapply(x, remove_nulls)
+ x <- Filter(Negate(is.null), x)
+ }
+ return(x)
+}
+
#' Read an expression matrix file and match to specified samples and features
#'
#' @param matrix_file Matrix file
@@ -1470,3 +1532,106 @@ cond_log2_transform_assays <- function(assay_data, log2_assays, threshold = 30,
return(assay_data)
}
+
+#' Build path to the enrichment results
+#'
+#' @details
+#'
+#' The template accepts the following:
+#'
+#' \describe{
+#' \item{\code{\{contrast_name\}}}{Will be replaced by \code{contrast_info$id} argument}
+#' \item{\code{\{geneset_type\}}}{Will be replaced by the \code{geneset_type} argument}
+#' \item{\code{\{target|reference\}}}{If the \code{direction} argument is \code{"up"}, will be replaced
+#' with \code{contrast_info$target}, if it is \code{"down"}, \code{contrast_info$reference} will be used instead.}
+#' }
+#'
+#' @param template A string, such as \code{"/path/to/folder/{contrast_name}-{geneset_type}.csv"} or
+#' \code{"./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv"}
+#' @param contrast_info A list with contrast details: `id`, `reference`, and `target`,
+#' to be replaced in template.
+#' @param geneset_type The name of the geneset type, to be replaced in the template
+#' @param direction Either `"up"`, `"down"` or `NULL`, used to determine how the replacement will happen.
+#'
+#' @returns A string similar to template, but with the templates replaced
+#' @export
+#' @examples
+#' build_enrichment_path(
+#' template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv",
+#' contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"),
+#' geneset_type = "m2.cp.v2024.1.Mm.entrez",
+#' direction = "up"
+#' )
+#'
+build_enrichment_path <- function(template, contrast_info, geneset_type, direction = NULL) {
+ path <- template
+ path <- gsub("{contrast_name}", contrast_info$id, path, fixed = TRUE)
+ path <- gsub("{geneset_type}", geneset_type, path, fixed = TRUE)
+ if (!is.null(direction)) {
+ target_val <- if (direction == "up") contrast_info$target else contrast_info$reference
+ path <- gsub("{target|reference}", target_val, path, fixed = TRUE)
+ }
+ path
+}
+
+#' Detects the enrichment tool used
+#'
+#' @noRd
+#' @param gst The enrichment table
+#'
+#' @returns The enrichment tool as a string, based on whether "NOM p-val" is a column ("gsea") or
+#' either "p value" or "PValue" are found ("roast")
+detect_enrichment_tool <- function(gst) {
+ if ("NOM p-val" %in% colnames(gst)) return("gsea")
+ if (any(c("p value", "PValue") %in% colnames(gst))) return("roast")
+ stop("Could not detect enrichment tool from column names")
+}
+
+
+
+#' Get the expected column names for the gene set enrichment tool
+#'
+#' @noRd
+#' @param gst The enrichment table.
+#' @param gs_tool Either `"roast"` or `"gsea"`.
+#'
+#' @returns A list with three elements: `"pvalue"`, `"fdr"` and `"direction"`. Each element is
+#' a string pointing to the expected column name for that tool.
+get_enrichment_mapping <- function(gst, gs_tool) {
+ mappings <- list(
+ roast = list(pvalue = "p value", fdr = "FDR", direction = "Direction"),
+ gsea = list(pvalue = "NOM p-val", fdr = "FDR q-val", direction = "Direction")
+ )
+ if (gs_tool == "roast") {
+ if ("PValue" %in% colnames(gst)) {
+ mappings[["roast"]][["pvalue"]] <- "PValue"
+ }
+ }
+ mappings[[gs_tool]]
+}
+
+# Returns an error if the table has missing expected columns.
+validate_enrichment_table <- function(gst, gs_tool) {
+ col_map <- get_enrichment_mapping(gst, gs_tool)
+ # sanity checks
+ if (!col_map$pvalue %in% colnames(gst)) {
+ stop(paste0(col_map$pvalue, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", ")))
+ }
+
+ if (!col_map$fdr %in% colnames(gst)) {
+ stop(paste0(col_map$fdr, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", ")))
+ }
+
+ if (!col_map$direction %in% colnames(gst)) {
+ stop(paste0(col_map$direction, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", ")))
+ }
+}
+
+clean_enrichment_table <- function(gst, gs_tool) {
+ if (gs_tool == "gsea") {
+ # gsea tsv files have two useless columns that can be removed:
+ cols_to_remove <- c("GS
follow link to MSigDB", "GS DETAILS")
+ gst <- gst[ , !(colnames(gst) %in% cols_to_remove), drop=FALSE]
+ }
+ gst
+}
\ No newline at end of file
diff --git a/R/genesetanalysistable.R b/R/genesetanalysistable.R
index 02f7a00..f1c9002 100644
--- a/R/genesetanalysistable.R
+++ b/R/genesetanalysistable.R
@@ -237,10 +237,19 @@ genesetanalysistable <- function(input, output, session, eselist) {
selected_contrasts <- getSelectedContrastNumbers()[[1]]
gst <- ese@gene_set_analyses[[assay]][[gene_set_types]][[as.numeric(selected_contrasts)]]
-
- # Rename p value if we have PValue from mroast etc()
-
- colnames(gst) <- sub("PValue", "p value", colnames(gst))
+
+ # Get the tool used for enrichment, or auto-detect it:
+ if ("gene_set_analyses_tool" %in% slotNames(ese)) {
+ gs_tool <- ese@gene_set_analyses_tool[[assay]][[gene_set_types]][[as.numeric(selected_contrasts)]]
+ } else {
+ gs_tool <- "auto"
+ }
+ if (gs_tool == "auto") {
+ gs_tool <- detect_enrichment_tool(gst)
+ }
+ validate_enrichment_table(gst, gs_tool)
+ col_map <- get_enrichment_mapping(gst, gs_tool)
+ gst <- clean_enrichment_table(gst, gs_tool)
# Select out specific gene sets if they've been provided
@@ -258,7 +267,7 @@ genesetanalysistable <- function(input, output, session, eselist) {
# Apply the user's filters
- gst <- gst[gst[["p value"]] < input$pval & gst[["FDR"]] < input$fdr, , drop = FALSE]
+ gst <- gst[gst[[col_map$pvalue]] < input$pval & gst[[col_map$fdr]] < input$fdr, , drop = FALSE]
validate(need(nrow(gst) > 0, "No results matching specified filters"))
@@ -272,7 +281,7 @@ genesetanalysistable <- function(input, output, session, eselist) {
gene_sets <- getGeneSets()
gst$significant_genes <- apply(gst, 1, function(row) {
- if (row["Direction"] == "Up") {
+ if (row[col_map$direction] == "Up") {
siggenes <- intersect(gene_sets[[getGeneSetTypes()]][[row["gene_set_id"]]], up)
} else {
siggenes <- intersect(gene_sets[[getGeneSetTypes()]][[row["gene_set_id"]]], down)
@@ -289,7 +298,7 @@ genesetanalysistable <- function(input, output, session, eselist) {
getDisplayGeneSetAnalysis <- reactive({
gst <- getGeneSetAnalysis()
- # Add links, but use a prettiefied version of the gene set name that re-flows to take up less space
+ # Add links, but use a prettified version of the gene set name that re-flows to take up less space
gst <- linkMatrix(gst, eselist@url_roots, data.frame(gene_set_id = prettifyGeneSetName(gst$gene_set_id), stringsAsFactors = FALSE))
colnames(gst) <- prettifyVariablename(colnames(gst))
@@ -297,7 +306,7 @@ genesetanalysistable <- function(input, output, session, eselist) {
gst
})
- # Make an explantory file name
+ # Make an explanatory file name
makeFileName <- reactive({
gsub("[^a-zA-Z0-9_]", "_", paste("gsa", getSelectedContrastNames(), getGeneSetTypes()))
diff --git a/R/genesetbarcodeplot.R b/R/genesetbarcodeplot.R
index c33df3d..2c5911a 100644
--- a/R/genesetbarcodeplot.R
+++ b/R/genesetbarcodeplot.R
@@ -166,8 +166,23 @@ genesetbarcodeplot <- function(input, output, session, eselist) {
contrast_numbers <- as.numeric(getSelectedContrastNumbers()[[1]][[1]])
if (gene_set_types %in% names(ese@gene_set_analyses[[assay]]) && gene_set_names %in% rownames(ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]])) {
- fdr <- paste(signif(ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]][gene_set_names, "FDR"], 3), collapse = ",")
- direction <- paste(ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]][gene_set_names, "Direction"], collapse = ",")
+ gst <- ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]]
+
+ # Get the tool used for enrichment, or auto-detect it:
+ if ("gene_set_analyses_tool" %in% slotNames(ese)) {
+ gs_tool <- ese@gene_set_analyses_tool[[assay]][[gene_set_types]][[contrast_numbers]]
+ } else {
+ gs_tool <- "auto"
+ }
+ if (gs_tool == "auto") {
+ gs_tool <- detect_enrichment_tool(gst)
+ }
+ validate_enrichment_table(gst, gs_tool)
+ col_map <- get_enrichment_mapping(gst, gs_tool)
+ gst <- clean_enrichment_table(gst, gs_tool)
+
+ fdr <- paste(signif(gst[gene_set_names, col_map$fdr], 3), collapse = ",")
+ direction <- paste(gst[gene_set_names, col_map$direction], collapse = ",")
title_components <- c(title_components, paste(paste("Direction:", direction), paste("FDR:", fdr)))
} else {
title_components <- c(title_components, "(no association)")
diff --git a/R/shinyngs.R b/R/shinyngs.R
index 1e8ded2..1a6fa68 100644
--- a/R/shinyngs.R
+++ b/R/shinyngs.R
@@ -194,7 +194,5 @@
#' button} \item{summarizematrix}{Module which takes a matrix and a factor which
#' groups columns, and summarises by a supplied method, mean by default} }}
#'
-#' @docType package
#' @name shinyngs
-
-NULL
+"_PACKAGE"
diff --git a/exec/make_app_from_files.R b/exec/make_app_from_files.R
index f5960f1..17f284a 100755
--- a/exec/make_app_from_files.R
+++ b/exec/make_app_from_files.R
@@ -132,6 +132,30 @@ option_list <- list(
default = "padj",
help = "Column in differential results files holding q values/ adjusted p values."
),
+ make_option(
+ "--enrichment_gene_sets",
+ type = "character",
+ default = NULL,
+ help = "Comma-separated list of the GMT files used in the enrichment analyses."
+ ),
+ make_option(
+ "--enrichment_filename_template",
+ type = "character",
+ default = NULL,
+ help = "Template of filenames containing enrichment results. For instance '{contrast_name}_enrichment_for_{geneset_type}.tsv' or, if up and down regulated results are provided separately '{contrast_name}.{geneset_type}.gsea_report_for_{target|reference}.tsv'. '{contrast_name}', '{geneset_type}', and optionally '{target|reference}' are substituted dynamically for each contrast and geneset type. If not given, no enrichment results are included"
+ ),
+ make_option(
+ "--enrichment_skip_missing",
+ action = "store_true",
+ default = FALSE,
+ help = "Ignore if any gene set enrichment result for any contrast is missing"
+ ),
+ make_option(
+ "--enrichment_gene_type_id",
+ type = "character",
+ default = "gene_name",
+ help = "Gene identifier in the enrichment gene sets. Use this to specify that the gmt files represent genes with the gene name or an entrez id"
+ ),
make_option(
c("-o", "--output_directory"),
type = "character",
@@ -272,9 +296,86 @@ contrast_stats[[opt$assay_entity_name]] <- lapply(contrast_stats_files, function
)
})
+# Enrichment results:
+# To show enrichment results we need:
+# - The contrasts file opt$contrast_file
+# - The gmt enrichment files used
+# - The actual enrichment results
+#
+
+if (!is.null(opt$enrichment_filename_template)) {
+ # Ensure we have gene sets and contrasts
+ if (is.null(opt$contrast_file)) {
+ stop("When --enrichment_filename_template is given, --contrast_file is required")
+ }
+ if (is.null(opt$enrichment_gene_sets)) {
+ stop("When --enrichment_filename_template is given, --enrichment_gene_sets is required")
+ }
+
+ contrasts_df <- read_metadata(opt$contrast_file)
+ genesets_files <- simpleSplit(opt$enrichment_gene_sets)
+ names(genesets_files) <- tools::file_path_sans_ext(basename(genesets_files))
+
+ gene_set_analyses <- list(
+ lapply(setNames(nm=names(genesets_files)), function(geneset_type) {
+ lapply(setNames(nm=contrasts_df$id), function(contrast_name) {
+ ctrst_info <- as.list(contrasts_df[contrasts_df$id == contrast_name,])
+ # Two types of opt$enrichment_filename_template are supported:
+ # - The template points to a single file for each contrast and geneset_type
+ # - The template points to two files for each contrast and geneset_type. In
+ # this case, one file is for the up-regulated results and another file is
+ # for the down-regulated results.
+ #
+ # The template may look like:
+ # "gsea_for_{contrast_name}_using_{geneset_type}_for_{target|reference}.tsv"
+ # Which for a contrast "disease_vs_healthy" with target="disease" and reference="healthy"
+ # and geneset_type "geo_bp", would result in shinyngs expecting two files named:
+ # up: "gsea_for_disease_vs_healthy_using_geo_bp_for_disease.tsv"
+ # down: "gsea_for_disease_vs_healthy_using_geo_bp_for_healthy.tsv"
+ #
+ # If the template does not include {target|reference} we assume there is
+ # a single file per contrast and geneset type, for instance:
+ # "gsea_for_{contrast_name}_using_{geneset_type}_results.tsv"
+ # becomes:
+ # "gsea_for_disease_vs_healthy_using_geo_bp_results.tsv"
+ if (grepl("target\\|reference", opt$enrichment_filename_template)) {
+ up_file <- build_enrichment_path(opt$enrichment_filename_template, ctrst_info, geneset_type, "up")
+ down_file <- build_enrichment_path(opt$enrichment_filename_template, ctrst_info, geneset_type, "down")
+ if (file.exists(up_file) && file.exists(down_file)) {
+ list("up" = up_file, "down" = down_file)
+ } else {
+ if (opt$enrichment_skip_missing) {
+ NULL
+ } else {
+ stop(sprintf("both enrichment files should exist: %s and %s", up_file, down_file))
+ }
+ }
+ } else {
+ enrichment_file <- build_enrichment_path(opt$enrichment_filename_template, ctrst_info, geneset_type)
+ if (file.exists(enrichment_file)) {
+ enrichment_file
+ } else {
+ if (opt$enrichment_skip_missing) {
+ NULL
+ } else {
+ stop(sprintf("enrichment file: %s does not exist", enrichment_file))
+ }
+ }
+ }
+ })
+ })
+ )
+ names(gene_set_analyses) <- names(assay_files)[contrast_stats_assay]
+} else {
+ gene_set_analyses <- list()
+ genesets_files <- list()
+}
+
+
+
################################################
################################################
-## Build the app ##
+## Build the app ##
################################################
################################################
@@ -294,7 +395,8 @@ experiments[[opt$assay_entity_name]] <- list(
file = x,
measure = "counts"
)
- })
+ }),
+ "gene_set_analyses" = gene_set_analyses
)
shiny_config <- list(
@@ -324,6 +426,12 @@ if (!is.null(opt$description)) {
shiny_config[['report']] <- opt$report_markdown_file
}
+if (length(genesets_files) > 0) {
+ shiny_config[["gene_set_id_type"]] <- opt$enrichment_gene_type_id
+ shiny_config[["gene_sets"]] <- genesets_files
+}
+
+
myesel <- eselistfromConfig(
shiny_config,
log2_assays = opt$log2_assays,
@@ -332,7 +440,7 @@ myesel <- eselistfromConfig(
# Write output
-dir.create(opt$output_directory, showWarnings = FALSE)
+dir.create(opt$output_directory, showWarnings = FALSE, recursive = TRUE)
saveRDS(myesel, file = file.path(opt$output_directory, "data.rds"))
writeLines(
c(
@@ -346,7 +454,7 @@ writeLines(
)
# If deployment has been indicated, try to do that. Needs SHINYAPPS_SECRET AND
-# SHINYAPPS_TOKEN to be set in the evironment
+# SHINYAPPS_TOKEN to be set in the environment
if (opt$deploy_app) {
library(rsconnect)
diff --git a/man/ExploratorySummarizedExperiment-class.Rd b/man/ExploratorySummarizedExperiment-class.Rd
index ddf1fd4..1603c2d 100644
--- a/man/ExploratorySummarizedExperiment-class.Rd
+++ b/man/ExploratorySummarizedExperiment-class.Rd
@@ -5,9 +5,7 @@
\alias{ExploratorySummarizedExperiment-class}
\title{The ExploratorySummarizedExperiment class}
\description{
-Subclass of SummarizedExperiment if present in the SummarizedExperiment
-package (newer versions of Bioconductor have moved this from GenomicRanges),
-otherwise of SummarizedExperiment0.
+Subclass of SummarizedExperiment.
}
\section{Slots}{
@@ -27,5 +25,7 @@ otherwise of SummarizedExperiment0.
\item{\code{dexseq_results}}{list.}
\item{\code{read_reports}}{list.}
+
+\item{\code{gene_set_analyses_tool}}{list.}
}}
diff --git a/man/ExploratorySummarizedExperiment.Rd b/man/ExploratorySummarizedExperiment.Rd
index be91e4d..a3d43eb 100644
--- a/man/ExploratorySummarizedExperiment.Rd
+++ b/man/ExploratorySummarizedExperiment.Rd
@@ -15,7 +15,8 @@ ExploratorySummarizedExperiment(
assay_measures = list(),
gene_set_analyses = list(),
dexseq_results = list(),
- read_reports = list()
+ read_reports = list(),
+ gene_set_analyses_tool = list()
)
}
\arguments{
@@ -46,9 +47,8 @@ correspond to 'contrasts' set in the containing SummarizedExperimentList.}
\item{assay_measures}{Optional List of measures to display related to each
assay.}
-\item{gene_set_analyses}{List of lists of gene set tables keyed first by
-gene set
-type and secondly by contrast}
+\item{gene_set_analyses}{Three-level nested lists of gene set tables keyed first by
+assay, then by gene set type and then by contrast.}
\item{dexseq_results}{An optional list of \code{DEXSeqResults} objects
corresponding to the contrasts listed in the \code{contrasts} slot..}
@@ -56,6 +56,10 @@ corresponding to the contrasts listed in the \code{contrasts} slot..}
\item{read_reports}{A named list of matrices with read counts in columns
and sample names in rows. Useful for providing mapped read counts,
counts per gene type etc}
+
+\item{gene_set_analyses_tool}{Three-level nested lists of a string, nested as \code{gene_set_analyses}.
+Each string may be \code{"auto"} (the default), \code{"gsea"} or \code{"roast"}. It defines the format of the
+corresponding \code{gene_set_analyses} table.}
}
\value{
output An ExploratoryRangedSummarizedExperient object
diff --git a/man/build_enrichment_path.Rd b/man/build_enrichment_path.Rd
new file mode 100644
index 0000000..5298973
--- /dev/null
+++ b/man/build_enrichment_path.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/accessory.R
+\name{build_enrichment_path}
+\alias{build_enrichment_path}
+\title{Build path to the enrichment results}
+\usage{
+build_enrichment_path(template, contrast_info, geneset_type, direction = NULL)
+}
+\arguments{
+\item{template}{A string, such as \code{"/path/to/folder/{contrast_name}-{geneset_type}.csv"} or
+\code{"./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv"}}
+
+\item{contrast_info}{A list with contrast details: `id`, `reference`, and `target`,
+to be replaced in template.}
+
+\item{geneset_type}{The name of the geneset type, to be replaced in the template}
+
+\item{direction}{Either `"up"`, `"down"` or `NULL`, used to determine how the replacement will happen.}
+}
+\value{
+A string similar to template, but with the templates replaced
+}
+\description{
+Build path to the enrichment results
+}
+\details{
+The template accepts the following:
+
+\describe{
+ \item{\code{\{contrast_name\}}}{Will be replaced by \code{contrast_info$id} argument}
+ \item{\code{\{geneset_type\}}}{Will be replaced by the \code{geneset_type} argument}
+ \item{\code{\{target|reference\}}}{If the \code{direction} argument is \code{"up"}, will be replaced
+ with \code{contrast_info$target}, if it is \code{"down"}, \code{contrast_info$reference} will be used instead.}
+}
+}
+\examples{
+build_enrichment_path(
+ template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv",
+ contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"),
+ geneset_type = "m2.cp.v2024.1.Mm.entrez",
+ direction = "up"
+)
+
+}
diff --git a/man/cond_log2_transform_assays.Rd b/man/cond_log2_transform_assays.Rd
index 6db3ea0..7441115 100644
--- a/man/cond_log2_transform_assays.Rd
+++ b/man/cond_log2_transform_assays.Rd
@@ -9,7 +9,8 @@ cond_log2_transform_assays(
log2_assays,
threshold = 30,
reverse = FALSE,
- prettify_names = TRUE
+ prettify_names = TRUE,
+ invert_assays = FALSE
)
}
\arguments{
@@ -26,6 +27,8 @@ This is only checked if should_transform is NULL.}
\item{reverse}{Boolean, should we unlog rather than log?}
\item{prettify_names}{Boolean. Prettify element names? Passed to validate_indices().}
+
+\item{invert_assays}{Boolean, apply transform to assays NOT specified in log2_assays.}
}
\value{
A modified assay_data list.
diff --git a/man/read_contrasts.Rd b/man/read_contrasts.Rd
index c4c0e55..fcc4b0f 100644
--- a/man/read_contrasts.Rd
+++ b/man/read_contrasts.Rd
@@ -38,5 +38,13 @@ shinyngs?}
output Validated contrasts data frame
}
\description{
-Read and validate a contrasts file against sample metadata
+Checks:
+1. No duplicate contrast IDs. Ensure that the required columns (variable, reference, target) are present.
+2. Values in the contrast variable column exist as column names in the sample metadata.
+3. If blocking factors are supplied, checks that they are present in the sample metadata.
+4. Design matrix is full rank.
+5. Warn about continuous covariates (e.g. numeric patient IDs treated as continuous).
+6. Values of specified columns don't contain special characters.
+7. Verify that the specified reference and target values exist in the corresponding sample metadata column.
+8. Issue a warning if the reference and target levels are identical.
}
diff --git a/man/shinyngs.Rd b/man/shinyngs.Rd
index 6a761b3..429f03f 100644
--- a/man/shinyngs.Rd
+++ b/man/shinyngs.Rd
@@ -2,6 +2,7 @@
% Please edit documentation in R/shinyngs.R
\docType{package}
\name{shinyngs}
+\alias{shinyngs-package}
\alias{shinyngs}
\title{Interactive downstream analysis with ShinyNGS.}
\description{
@@ -212,3 +213,7 @@ button} \item{summarizematrix}{Module which takes a matrix and a factor which
groups columns, and summarises by a supplied method, mean by default} }}
}
+\author{
+\strong{Maintainer}: Jonathan Manning \email{jonathan.manning@seqera.io}
+
+}
diff --git a/man/validate_indices.Rd b/man/validate_indices.Rd
index a39970b..423bdfd 100644
--- a/man/validate_indices.Rd
+++ b/man/validate_indices.Rd
@@ -4,13 +4,20 @@
\alias{validate_indices}
\title{Validate assay indices based on a given string.}
\usage{
-validate_indices(assay_data, index_string, prettify_names = TRUE)
+validate_indices(
+ assay_data,
+ index_string,
+ invert_assays = FALSE,
+ prettify_names = TRUE
+)
}
\arguments{
\item{assay_data}{A list containing matrices as assay data.}
\item{index_string}{A string that can be a comma-separated list of integers or assay names.}
+\item{invert_assays}{Boolean, return the indices NOT specified.}
+
\item{prettify_names}{Boolean. Prettify element names?}
}
\value{
diff --git a/tests/testthat/test-accessory.R b/tests/testthat/test-accessory.R
index 06337c3..332d5c7 100644
--- a/tests/testthat/test-accessory.R
+++ b/tests/testthat/test-accessory.R
@@ -125,4 +125,99 @@ contrasts:
expect_equal(contrasts$make_contrasts_str[3], "genotypeWT.treatmentTreated.time")
unlink(yaml_file)
-})
\ No newline at end of file
+})
+
+test_that("read_enrichment_file parses file correctly", {
+ all_text <- "geneset,p value,FDR,Direction\ndummy,0.04,0.01,Up\n"
+ all_file <- tempfile(pattern = "test_shinyngs", fileext = ".csv")
+ all_df <- read.csv(textConnection(all_text), check.names = FALSE, row.names = 1)
+ write(all_text, all_file)
+
+ up_text <- "geneset,p value,FDR\ndummy,0.04,0.01\n"
+ up_file <- tempfile(pattern = "test_shinyngs", fileext = ".csv")
+ up_df <- read.csv(textConnection(up_text), check.names = FALSE, row.names = 1)
+ write(up_text, up_file)
+
+ down_text <- "geneset,p value,FDR\ndummy2,0.04,0.01\n"
+ down_file <- tempfile(pattern = "test_shinyngs", fileext = ".csv")
+ down_df <- read.csv(textConnection(down_text), check.names = FALSE, row.names = 1)
+ write(down_text, down_file)
+
+ up_df2 <- up_df
+ up_df2$Direction <- "Up"
+ down_df2 <- down_df
+ down_df2$Direction <- "Down"
+ combined_df <- rbind(up_df2, down_df2)
+
+ expect_equal(read_enrichment_file(NULL), NULL)
+ expect_equal(read_enrichment_file(all_file), all_df)
+ expect_equal(read_enrichment_file(c("up" = up_file,"down" = down_file)),
+ combined_df)
+})
+
+
+test_that("remove_nulls works", {
+ expect_equal(remove_nulls(list(1, NULL, 2)), list(1, 2))
+})
+
+test_that("build_enrichment_path replaces variables", {
+ expect_equal(
+ build_enrichment_path(
+ template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv",
+ contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"),
+ geneset_type = "m2.cp.v2024.1.Mm.entrez",
+ direction = "up"
+ ),
+ "./disease_vs_ctrl/m2.cp.v2024.1.Mm.entrez/report_for_disease.csv"
+ )
+})
+
+gst_gsea <- data.frame(
+ "NAME" = "dummy",
+ "GS
follow link to MSigDB" = "",
+ "GS DETAILS" = "",
+ "SIZE" = 0,
+ "ES" = 0,
+ "NES" = 0,
+ "NOM p-val" = 0,
+ "FDR q-val" = 0,
+ "FWER p-val" = 0,
+ "RANK AT MAX LEADING EDGE" = "",
+ "Direction" = "Up",
+ check.names = FALSE
+)
+
+gst_roast <- data.frame(
+ "NAME" = "",
+ "p value" = 0.01,
+ "FDR" = 0.01,
+ "Direction" = "Up",
+ check.names = FALSE
+)
+
+test_that("detect_enrichment_tool works", {
+ expect_equal(detect_enrichment_tool(gst_gsea), "gsea")
+ expect_equal(detect_enrichment_tool(gst_roast), "roast")
+})
+
+test_that("get_enrichment_mapping works", {
+ mappings <- list(
+ roast = list(pvalue = "p value", fdr = "FDR", direction = "Direction"),
+ gsea = list(pvalue = "NOM p-val", fdr = "FDR q-val", direction = "Direction")
+ )
+ expect_equal(get_enrichment_mapping(gst_gsea, "gsea"), mappings$gsea)
+ expect_equal(get_enrichment_mapping(gst_roast, "roast"), mappings$roast)
+})
+
+test_that("clean_enrichment_table works", {
+ df <- clean_enrichment_table(gst_gsea, "gsea")
+ expect_false("GS
follow link to MSigDB" %in% colnames(df))
+})
+
+test_that("validate_enrichment_table works", {
+ validate_enrichment_table(gst_gsea, "gsea")
+ validate_enrichment_table(gst_roast, "roast")
+ gst_gsea_wrong <- gst_gsea
+ gst_gsea_wrong[["NOM p-val"]] <- NULL
+ expect_error(validate_enrichment_table(gst_gsea_wrong, "gsea"))
+})
diff --git a/vignettes/shinyngs.Rmd b/vignettes/shinyngs.Rmd
index a502441..4c6d619 100644
--- a/vignettes/shinyngs.Rmd
+++ b/vignettes/shinyngs.Rmd
@@ -259,6 +259,7 @@ head(myannotation)
Now we can put these things together to create an 'ExploratorySummarizedExperiment:
```{r eval=TRUE}
+library(shinyngs)
myese <- ExploratorySummarizedExperiment(
assays = SimpleList(
myassays