diff --git a/DESCRIPTION b/DESCRIPTION index 031b194..c55e19e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,9 +5,10 @@ Authors@R: person("Jonathan", "Manning", email = "jonathan.manning@seqera.io", r Description: Provides Shiny applications for various array and NGS applications. Currently very RNA-seq centric, with plans for expansion. Depends: - R (>= 3.2.2), + R (>= 3.4.0), SummarizedExperiment License: AGPL (>= 3) +Encoding: UTF-8 LazyData: true Imports: cluster, @@ -53,6 +54,6 @@ Remotes: cran/d3heatmap, pinin4fjords/zhangneurons biocViews: Software -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 VignetteBuilder: knitr Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 389cf11..ee3fe7f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(annotatedHeatmap) export(anova_pca_metadata) export(barcode_plot) export(bootstrapMedian) +export(build_enrichment_path) export(calculateDendrogram) export(calculateDist) export(checkListIsSubset) diff --git a/R/ExploratorySummarizedExperiment-class.R b/R/ExploratorySummarizedExperiment-class.R index a171838..5a1c73c 100644 --- a/R/ExploratorySummarizedExperiment-class.R +++ b/R/ExploratorySummarizedExperiment-class.R @@ -1,8 +1,6 @@ #' The ExploratorySummarizedExperiment class #' -#' Subclass of SummarizedExperiment if present in the SummarizedExperiment -#' package (newer versions of Bioconductor have moved this from GenomicRanges), -#' otherwise of SummarizedExperiment0. +#' Subclass of SummarizedExperiment. #' #' @slot idfield character. #' @slot entrezgenefield character. @@ -12,21 +10,17 @@ #' @slot gene_set_analyses list. #' @slot dexseq_results list. #' @slot read_reports list. +#' @slot gene_set_analyses_tool list. #' #' @export -setClass("ExploratorySummarizedExperiment", contains = ifelse("SummarizedExperiment" %in% getClasses(where = "package:SummarizedExperiment"), "SummarizedExperiment", - "SummarizedExperiment0" -), representation = representation( +setClass("ExploratorySummarizedExperiment", contains = "SummarizedExperiment", slots = c( idfield = "character", entrezgenefield = "character", labelfield = "character", contrast_stats = "list", - assay_measures = "list", gene_set_analyses = "list", dexseq_results = "list", read_reports = "list" + assay_measures = "list", gene_set_analyses = "list", dexseq_results = "list", read_reports = "list", gene_set_analyses_tool = "list" )) setAs("RangedSummarizedExperiment", "ExploratorySummarizedExperiment", function(from) { - as( - (as(from, ifelse("SummarizedExperiment" %in% getClasses(where = "package:SummarizedExperiment"), "SummarizedExperiment", "SummarizedExperiment0"))), - "ExploratorySummarizedExperiment" - ) + as(as(from, "SummarizedExperiment"), "ExploratorySummarizedExperiment") }) #' ExploratorySummarizedExperiments @@ -63,21 +57,23 @@ setAs("RangedSummarizedExperiment", "ExploratorySummarizedExperiment", function( #' correspond to 'contrasts' set in the containing SummarizedExperimentList. #' @param assay_measures Optional List of measures to display related to each #' assay. -#' @param gene_set_analyses List of lists of gene set tables keyed first by -#' gene set -#' type and secondly by contrast +#' @param gene_set_analyses Three-level nested lists of gene set tables keyed first by +#' assay, then by gene set type and then by contrast. #' @param read_reports A named list of matrices with read counts in columns #' and sample names in rows. Useful for providing mapped read counts, #' counts per gene type etc #' @param dexseq_results An optional list of \code{DEXSeqResults} objects #' corresponding to the contrasts listed in the \code{contrasts} slot.. +#' @param gene_set_analyses_tool Three-level nested lists of a string, nested as \code{gene_set_analyses}. +#' Each string may be \code{"auto"} (the default), \code{"gsea"} or \code{"roast"}. It defines the format of the +#' corresponding \code{gene_set_analyses} table. #' #' @return output An ExploratoryRangedSummarizedExperient object #' @rawNamespace import(SummarizedExperiment, except = 'shift') #' @export ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield, labelfield = character(), entrezgenefield = character(), contrast_stats = list(), - assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), read_reports = list()) { + assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), read_reports = list(), gene_set_analyses_tool = list()) { # Reset NULLs to empty if (is.null(entrezgenefield)) { @@ -116,6 +112,9 @@ ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield annotation <- data.frame(lapply(annotation, as.character), stringsAsFactors = FALSE, check.names = FALSE, row.names = rownames(annotation))[all_rows, ] + # Ensure consistency between gene_set_analyses with gene_set_analyses_tool + gene_set_analyses_tool <- check_gene_set_analyses_tool_consistency(gene_set_analyses, gene_set_analyses_tool) + # Build the object sumexp <- SummarizedExperiment(assays = assays, colData = DataFrame(colData, check.names = FALSE)) @@ -123,6 +122,75 @@ ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield new("ExploratorySummarizedExperiment", sumexp, idfield = idfield, labelfield = labelfield, entrezgenefield = entrezgenefield, assay_measures = assay_measures, - contrast_stats = contrast_stats, gene_set_analyses = gene_set_analyses, dexseq_results = dexseq_results, read_reports = read_reports + contrast_stats = contrast_stats, gene_set_analyses = gene_set_analyses, dexseq_results = dexseq_results, read_reports = read_reports, + gene_set_analyses_tool = gene_set_analyses_tool + ) +} + +#' Ensure consistency between gene_set_analyses and gene_set_analyses_tool structures +#' @noRd +#' +#' @description +#' Ensures that the structure of \code{gene_set_analyses_tool} matches that of \code{gene_set_analyses}, +#' filling in missing elements as needed. Each entry in \code{gene_set_analyses_tool} should be a string +#' (e.g., "auto", "gsea", or "roast") corresponding to the format of the associated gene set analysis table. +#' +#' @param gene_set_analyses A three-level nested list of gene set tables, keyed by assay, gene set type, and contrast. +#' @param gene_set_analyses_tool A three-level nested list of strings, structured as \code{gene_set_analyses}, indicating the tool used for each gene set analysis. +#' +#' @return A three-level nested list of strings, matching the structure of \code{gene_set_analyses}, with missing elements filled as needed. +check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) { + # gene_set_analyses and gene_set_analyses_tool should have the same list of lists + # structure. gene_set_analyses_tool should have a single string + + if (is.null(gene_set_analyses_tool)) { + gene_set_analyses_tool <- list() + } + + valid_tools <- c("auto", "gsea", "roast") + + safe_get <- function(x, path, default="auto") { + # similar to purrr::pluck() + if (is.null(x)) { + return(default) + } + for (p in path) { + if (!p %in% names(x)) { + return(default) + } + x <- x[[p]] + if (is.null(x)) { + return(default) + } + } + x + } + + lapply( + setNames(nm=names(gene_set_analyses)), + function(assay_name) { + gene_sets <- gene_set_analyses[[assay_name]] + lapply( + setNames(nm=names(gene_sets)), + function(gene_set_name) { + contrasts <- gene_sets[[gene_set_name]] + lapply( + setNames(nm=names(contrasts)), + function(contrast_name) { + tool_name <- safe_get(gene_set_analyses_tool, c(assay_name, gene_set_name, contrast_name), "auto") + if (!is.character(tool_name) || length(tool_name) > 1 || !tool_name %in% valid_tools) { + stop( + "Invalid gene_set_analyses_tool. gene_set_analyses_tool for ", + assay_name, ",", gene_set_name, ",", contrast_name, + ". It should be one of 'auto', 'gsea' or 'roast'. Found ", + tool_name + ) + } + tool_name + } + ) + } + ) + } ) } diff --git a/R/ExploratorySummarizedExperimentList-class.R b/R/ExploratorySummarizedExperimentList-class.R index 81b0ee4..c9e0ed4 100644 --- a/R/ExploratorySummarizedExperimentList-class.R +++ b/R/ExploratorySummarizedExperimentList-class.R @@ -15,7 +15,7 @@ #' #' @export -setClass("ExploratorySummarizedExperimentList", contains = "list", representation = representation( +setClass("ExploratorySummarizedExperimentList", contains = "list", slots = c( title = "character", author = "character", description = "character", static_pdf = "character", group_vars = "character", default_groupvar = "character", contrasts = "list", url_roots = "list", gene_sets = "list", gene_set_id_type = "character", ensembl_species = "character" )) @@ -149,7 +149,7 @@ ExploratorySummarizedExperimentList <- function(eses, title = "", author = "", d } else { # Numeric IDs (like entrez will be cast to integers) - is_numeric <- all(!is.na(as.numeric(annotation[[gene_set_id_type]]))) + is_numeric <- all(!is.na(suppressWarnings(as.numeric(annotation[[gene_set_id_type]])))) } print("Processing gene sets") diff --git a/R/accessory.R b/R/accessory.R index 0ffc434..dc44055 100644 --- a/R/accessory.R +++ b/R/accessory.R @@ -555,6 +555,51 @@ eselistFromYAML <- function(configfile) { eselistFromList(config) } +#' Reads gene enrichment files +#' @noRd +#' @param contrast_spec One of: +#' - \code{NULL} (meaning no enrichment was analyzed for that contrast) +#' - a path to a file (e.g. the table output from roast) +#' - a named list with elements "up" and "down" with paths to files (e.g. +#' corresponding to gsea up-regulated and down-regulated output tables). +#' +#' The two tables from GSEA output will be combined into a single data frame. A column "Direction" with +#' values "Up" and "Down" will be added. +#' +#' @returns A data frame with the file contents (or \code{NULL}) +#' +read_enrichment_file <- function(contrast_spec) { + # contrast_spec may be one file name or two file names (up and down), or NULL + if (is.null(contrast_spec) || length(contrast_spec) == 0) { + return(NULL) + } + + read_one <- function(path) { + read.csv(path, sep = getSeparator(path), check.names = FALSE, + stringsAsFactors = FALSE, row.names = 1) + } + + if (length(contrast_spec) == 1) { + return(read_one(contrast_spec)) + } + + if (length(contrast_spec) == 2) { + # This is useful for GSEA output, that splits up and down in two tsv files. + # We read both files and set the direction + up <- read_one(contrast_spec[["up"]]) + if (nrow(up) > 0) { + up$Direction <- "Up" + } + down <- read_one(contrast_spec[["down"]]) + if (nrow(down) > 0) { + down$Direction <- "Down" + } + return(rbind(up, down)) + } + + stop("gene_set_analyses should have zero, one or two contrast files per gene_set_type") +} + #' Build an ExploratorySummarisedExperimentList from a description provided in a list #' #' @param config Hierachical named list with input components. See \code{eselistFromYAML} for detail. @@ -682,13 +727,17 @@ eselistfromConfig <- } if ("gene_set_analyses" %in% names(exp)) { + # Basic list to pass to object creation + exp$gene_set_analyses_tool <- check_gene_set_analyses_tool_consistency(exp$gene_set_analyses, exp$gene_set_analyses_tool) + ese_list$gene_set_analyses <- lapply(exp$gene_set_analyses, function(assay) { lapply(assay, function(gene_set_type) { - lapply(gene_set_type, function(contrast) { - read.csv(contrast, check.names = FALSE, stringsAsFactors = FALSE, row.names = 1) - }) + lapply(gene_set_type, read_enrichment_file) }) }) + + ese_list$gene_set_analyses <- remove_nulls(ese_list$gene_set_analyses) + ese_list$gene_set_analyses_tool <- exp$gene_set_analyses_tool } do.call(ExploratorySummarizedExperiment, ese_list) @@ -751,6 +800,19 @@ eselistfromConfig <- eselist } +#' Recursively remove NULL entries from a nested list +#' @noRd +#' @param x A list (possibly nested) from which NULL entries should be removed. +#' +#' @return The input list with all NULL entries recursively removed. +remove_nulls <- function(x) { + if (is.list(x) && !is.data.frame(x)) { + x <- lapply(x, remove_nulls) + x <- Filter(Negate(is.null), x) + } + return(x) +} + #' Read an expression matrix file and match to specified samples and features #' #' @param matrix_file Matrix file @@ -1470,3 +1532,106 @@ cond_log2_transform_assays <- function(assay_data, log2_assays, threshold = 30, return(assay_data) } + +#' Build path to the enrichment results +#' +#' @details +#' +#' The template accepts the following: +#' +#' \describe{ +#' \item{\code{\{contrast_name\}}}{Will be replaced by \code{contrast_info$id} argument} +#' \item{\code{\{geneset_type\}}}{Will be replaced by the \code{geneset_type} argument} +#' \item{\code{\{target|reference\}}}{If the \code{direction} argument is \code{"up"}, will be replaced +#' with \code{contrast_info$target}, if it is \code{"down"}, \code{contrast_info$reference} will be used instead.} +#' } +#' +#' @param template A string, such as \code{"/path/to/folder/{contrast_name}-{geneset_type}.csv"} or +#' \code{"./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv"} +#' @param contrast_info A list with contrast details: `id`, `reference`, and `target`, +#' to be replaced in template. +#' @param geneset_type The name of the geneset type, to be replaced in the template +#' @param direction Either `"up"`, `"down"` or `NULL`, used to determine how the replacement will happen. +#' +#' @returns A string similar to template, but with the templates replaced +#' @export +#' @examples +#' build_enrichment_path( +#' template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv", +#' contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"), +#' geneset_type = "m2.cp.v2024.1.Mm.entrez", +#' direction = "up" +#' ) +#' +build_enrichment_path <- function(template, contrast_info, geneset_type, direction = NULL) { + path <- template + path <- gsub("{contrast_name}", contrast_info$id, path, fixed = TRUE) + path <- gsub("{geneset_type}", geneset_type, path, fixed = TRUE) + if (!is.null(direction)) { + target_val <- if (direction == "up") contrast_info$target else contrast_info$reference + path <- gsub("{target|reference}", target_val, path, fixed = TRUE) + } + path +} + +#' Detects the enrichment tool used +#' +#' @noRd +#' @param gst The enrichment table +#' +#' @returns The enrichment tool as a string, based on whether "NOM p-val" is a column ("gsea") or +#' either "p value" or "PValue" are found ("roast") +detect_enrichment_tool <- function(gst) { + if ("NOM p-val" %in% colnames(gst)) return("gsea") + if (any(c("p value", "PValue") %in% colnames(gst))) return("roast") + stop("Could not detect enrichment tool from column names") +} + + + +#' Get the expected column names for the gene set enrichment tool +#' +#' @noRd +#' @param gst The enrichment table. +#' @param gs_tool Either `"roast"` or `"gsea"`. +#' +#' @returns A list with three elements: `"pvalue"`, `"fdr"` and `"direction"`. Each element is +#' a string pointing to the expected column name for that tool. +get_enrichment_mapping <- function(gst, gs_tool) { + mappings <- list( + roast = list(pvalue = "p value", fdr = "FDR", direction = "Direction"), + gsea = list(pvalue = "NOM p-val", fdr = "FDR q-val", direction = "Direction") + ) + if (gs_tool == "roast") { + if ("PValue" %in% colnames(gst)) { + mappings[["roast"]][["pvalue"]] <- "PValue" + } + } + mappings[[gs_tool]] +} + +# Returns an error if the table has missing expected columns. +validate_enrichment_table <- function(gst, gs_tool) { + col_map <- get_enrichment_mapping(gst, gs_tool) + # sanity checks + if (!col_map$pvalue %in% colnames(gst)) { + stop(paste0(col_map$pvalue, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", "))) + } + + if (!col_map$fdr %in% colnames(gst)) { + stop(paste0(col_map$fdr, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", "))) + } + + if (!col_map$direction %in% colnames(gst)) { + stop(paste0(col_map$direction, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", "))) + } +} + +clean_enrichment_table <- function(gst, gs_tool) { + if (gs_tool == "gsea") { + # gsea tsv files have two useless columns that can be removed: + cols_to_remove <- c("GS
follow link to MSigDB", "GS DETAILS") + gst <- gst[ , !(colnames(gst) %in% cols_to_remove), drop=FALSE] + } + gst +} \ No newline at end of file diff --git a/R/genesetanalysistable.R b/R/genesetanalysistable.R index 02f7a00..f1c9002 100644 --- a/R/genesetanalysistable.R +++ b/R/genesetanalysistable.R @@ -237,10 +237,19 @@ genesetanalysistable <- function(input, output, session, eselist) { selected_contrasts <- getSelectedContrastNumbers()[[1]] gst <- ese@gene_set_analyses[[assay]][[gene_set_types]][[as.numeric(selected_contrasts)]] - - # Rename p value if we have PValue from mroast etc() - - colnames(gst) <- sub("PValue", "p value", colnames(gst)) + + # Get the tool used for enrichment, or auto-detect it: + if ("gene_set_analyses_tool" %in% slotNames(ese)) { + gs_tool <- ese@gene_set_analyses_tool[[assay]][[gene_set_types]][[as.numeric(selected_contrasts)]] + } else { + gs_tool <- "auto" + } + if (gs_tool == "auto") { + gs_tool <- detect_enrichment_tool(gst) + } + validate_enrichment_table(gst, gs_tool) + col_map <- get_enrichment_mapping(gst, gs_tool) + gst <- clean_enrichment_table(gst, gs_tool) # Select out specific gene sets if they've been provided @@ -258,7 +267,7 @@ genesetanalysistable <- function(input, output, session, eselist) { # Apply the user's filters - gst <- gst[gst[["p value"]] < input$pval & gst[["FDR"]] < input$fdr, , drop = FALSE] + gst <- gst[gst[[col_map$pvalue]] < input$pval & gst[[col_map$fdr]] < input$fdr, , drop = FALSE] validate(need(nrow(gst) > 0, "No results matching specified filters")) @@ -272,7 +281,7 @@ genesetanalysistable <- function(input, output, session, eselist) { gene_sets <- getGeneSets() gst$significant_genes <- apply(gst, 1, function(row) { - if (row["Direction"] == "Up") { + if (row[col_map$direction] == "Up") { siggenes <- intersect(gene_sets[[getGeneSetTypes()]][[row["gene_set_id"]]], up) } else { siggenes <- intersect(gene_sets[[getGeneSetTypes()]][[row["gene_set_id"]]], down) @@ -289,7 +298,7 @@ genesetanalysistable <- function(input, output, session, eselist) { getDisplayGeneSetAnalysis <- reactive({ gst <- getGeneSetAnalysis() - # Add links, but use a prettiefied version of the gene set name that re-flows to take up less space + # Add links, but use a prettified version of the gene set name that re-flows to take up less space gst <- linkMatrix(gst, eselist@url_roots, data.frame(gene_set_id = prettifyGeneSetName(gst$gene_set_id), stringsAsFactors = FALSE)) colnames(gst) <- prettifyVariablename(colnames(gst)) @@ -297,7 +306,7 @@ genesetanalysistable <- function(input, output, session, eselist) { gst }) - # Make an explantory file name + # Make an explanatory file name makeFileName <- reactive({ gsub("[^a-zA-Z0-9_]", "_", paste("gsa", getSelectedContrastNames(), getGeneSetTypes())) diff --git a/R/genesetbarcodeplot.R b/R/genesetbarcodeplot.R index c33df3d..2c5911a 100644 --- a/R/genesetbarcodeplot.R +++ b/R/genesetbarcodeplot.R @@ -166,8 +166,23 @@ genesetbarcodeplot <- function(input, output, session, eselist) { contrast_numbers <- as.numeric(getSelectedContrastNumbers()[[1]][[1]]) if (gene_set_types %in% names(ese@gene_set_analyses[[assay]]) && gene_set_names %in% rownames(ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]])) { - fdr <- paste(signif(ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]][gene_set_names, "FDR"], 3), collapse = ",") - direction <- paste(ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]][gene_set_names, "Direction"], collapse = ",") + gst <- ese@gene_set_analyses[[assay]][[gene_set_types]][[contrast_numbers]] + + # Get the tool used for enrichment, or auto-detect it: + if ("gene_set_analyses_tool" %in% slotNames(ese)) { + gs_tool <- ese@gene_set_analyses_tool[[assay]][[gene_set_types]][[contrast_numbers]] + } else { + gs_tool <- "auto" + } + if (gs_tool == "auto") { + gs_tool <- detect_enrichment_tool(gst) + } + validate_enrichment_table(gst, gs_tool) + col_map <- get_enrichment_mapping(gst, gs_tool) + gst <- clean_enrichment_table(gst, gs_tool) + + fdr <- paste(signif(gst[gene_set_names, col_map$fdr], 3), collapse = ",") + direction <- paste(gst[gene_set_names, col_map$direction], collapse = ",") title_components <- c(title_components, paste(paste("Direction:", direction), paste("FDR:", fdr))) } else { title_components <- c(title_components, "(no association)") diff --git a/R/shinyngs.R b/R/shinyngs.R index 1e8ded2..1a6fa68 100644 --- a/R/shinyngs.R +++ b/R/shinyngs.R @@ -194,7 +194,5 @@ #' button} \item{summarizematrix}{Module which takes a matrix and a factor which #' groups columns, and summarises by a supplied method, mean by default} }} #' -#' @docType package #' @name shinyngs - -NULL +"_PACKAGE" diff --git a/exec/make_app_from_files.R b/exec/make_app_from_files.R index f5960f1..17f284a 100755 --- a/exec/make_app_from_files.R +++ b/exec/make_app_from_files.R @@ -132,6 +132,30 @@ option_list <- list( default = "padj", help = "Column in differential results files holding q values/ adjusted p values." ), + make_option( + "--enrichment_gene_sets", + type = "character", + default = NULL, + help = "Comma-separated list of the GMT files used in the enrichment analyses." + ), + make_option( + "--enrichment_filename_template", + type = "character", + default = NULL, + help = "Template of filenames containing enrichment results. For instance '{contrast_name}_enrichment_for_{geneset_type}.tsv' or, if up and down regulated results are provided separately '{contrast_name}.{geneset_type}.gsea_report_for_{target|reference}.tsv'. '{contrast_name}', '{geneset_type}', and optionally '{target|reference}' are substituted dynamically for each contrast and geneset type. If not given, no enrichment results are included" + ), + make_option( + "--enrichment_skip_missing", + action = "store_true", + default = FALSE, + help = "Ignore if any gene set enrichment result for any contrast is missing" + ), + make_option( + "--enrichment_gene_type_id", + type = "character", + default = "gene_name", + help = "Gene identifier in the enrichment gene sets. Use this to specify that the gmt files represent genes with the gene name or an entrez id" + ), make_option( c("-o", "--output_directory"), type = "character", @@ -272,9 +296,86 @@ contrast_stats[[opt$assay_entity_name]] <- lapply(contrast_stats_files, function ) }) +# Enrichment results: +# To show enrichment results we need: +# - The contrasts file opt$contrast_file +# - The gmt enrichment files used +# - The actual enrichment results +# + +if (!is.null(opt$enrichment_filename_template)) { + # Ensure we have gene sets and contrasts + if (is.null(opt$contrast_file)) { + stop("When --enrichment_filename_template is given, --contrast_file is required") + } + if (is.null(opt$enrichment_gene_sets)) { + stop("When --enrichment_filename_template is given, --enrichment_gene_sets is required") + } + + contrasts_df <- read_metadata(opt$contrast_file) + genesets_files <- simpleSplit(opt$enrichment_gene_sets) + names(genesets_files) <- tools::file_path_sans_ext(basename(genesets_files)) + + gene_set_analyses <- list( + lapply(setNames(nm=names(genesets_files)), function(geneset_type) { + lapply(setNames(nm=contrasts_df$id), function(contrast_name) { + ctrst_info <- as.list(contrasts_df[contrasts_df$id == contrast_name,]) + # Two types of opt$enrichment_filename_template are supported: + # - The template points to a single file for each contrast and geneset_type + # - The template points to two files for each contrast and geneset_type. In + # this case, one file is for the up-regulated results and another file is + # for the down-regulated results. + # + # The template may look like: + # "gsea_for_{contrast_name}_using_{geneset_type}_for_{target|reference}.tsv" + # Which for a contrast "disease_vs_healthy" with target="disease" and reference="healthy" + # and geneset_type "geo_bp", would result in shinyngs expecting two files named: + # up: "gsea_for_disease_vs_healthy_using_geo_bp_for_disease.tsv" + # down: "gsea_for_disease_vs_healthy_using_geo_bp_for_healthy.tsv" + # + # If the template does not include {target|reference} we assume there is + # a single file per contrast and geneset type, for instance: + # "gsea_for_{contrast_name}_using_{geneset_type}_results.tsv" + # becomes: + # "gsea_for_disease_vs_healthy_using_geo_bp_results.tsv" + if (grepl("target\\|reference", opt$enrichment_filename_template)) { + up_file <- build_enrichment_path(opt$enrichment_filename_template, ctrst_info, geneset_type, "up") + down_file <- build_enrichment_path(opt$enrichment_filename_template, ctrst_info, geneset_type, "down") + if (file.exists(up_file) && file.exists(down_file)) { + list("up" = up_file, "down" = down_file) + } else { + if (opt$enrichment_skip_missing) { + NULL + } else { + stop(sprintf("both enrichment files should exist: %s and %s", up_file, down_file)) + } + } + } else { + enrichment_file <- build_enrichment_path(opt$enrichment_filename_template, ctrst_info, geneset_type) + if (file.exists(enrichment_file)) { + enrichment_file + } else { + if (opt$enrichment_skip_missing) { + NULL + } else { + stop(sprintf("enrichment file: %s does not exist", enrichment_file)) + } + } + } + }) + }) + ) + names(gene_set_analyses) <- names(assay_files)[contrast_stats_assay] +} else { + gene_set_analyses <- list() + genesets_files <- list() +} + + + ################################################ ################################################ -## Build the app ## +## Build the app ## ################################################ ################################################ @@ -294,7 +395,8 @@ experiments[[opt$assay_entity_name]] <- list( file = x, measure = "counts" ) - }) + }), + "gene_set_analyses" = gene_set_analyses ) shiny_config <- list( @@ -324,6 +426,12 @@ if (!is.null(opt$description)) { shiny_config[['report']] <- opt$report_markdown_file } +if (length(genesets_files) > 0) { + shiny_config[["gene_set_id_type"]] <- opt$enrichment_gene_type_id + shiny_config[["gene_sets"]] <- genesets_files +} + + myesel <- eselistfromConfig( shiny_config, log2_assays = opt$log2_assays, @@ -332,7 +440,7 @@ myesel <- eselistfromConfig( # Write output -dir.create(opt$output_directory, showWarnings = FALSE) +dir.create(opt$output_directory, showWarnings = FALSE, recursive = TRUE) saveRDS(myesel, file = file.path(opt$output_directory, "data.rds")) writeLines( c( @@ -346,7 +454,7 @@ writeLines( ) # If deployment has been indicated, try to do that. Needs SHINYAPPS_SECRET AND -# SHINYAPPS_TOKEN to be set in the evironment +# SHINYAPPS_TOKEN to be set in the environment if (opt$deploy_app) { library(rsconnect) diff --git a/man/ExploratorySummarizedExperiment-class.Rd b/man/ExploratorySummarizedExperiment-class.Rd index ddf1fd4..1603c2d 100644 --- a/man/ExploratorySummarizedExperiment-class.Rd +++ b/man/ExploratorySummarizedExperiment-class.Rd @@ -5,9 +5,7 @@ \alias{ExploratorySummarizedExperiment-class} \title{The ExploratorySummarizedExperiment class} \description{ -Subclass of SummarizedExperiment if present in the SummarizedExperiment -package (newer versions of Bioconductor have moved this from GenomicRanges), -otherwise of SummarizedExperiment0. +Subclass of SummarizedExperiment. } \section{Slots}{ @@ -27,5 +25,7 @@ otherwise of SummarizedExperiment0. \item{\code{dexseq_results}}{list.} \item{\code{read_reports}}{list.} + +\item{\code{gene_set_analyses_tool}}{list.} }} diff --git a/man/ExploratorySummarizedExperiment.Rd b/man/ExploratorySummarizedExperiment.Rd index be91e4d..a3d43eb 100644 --- a/man/ExploratorySummarizedExperiment.Rd +++ b/man/ExploratorySummarizedExperiment.Rd @@ -15,7 +15,8 @@ ExploratorySummarizedExperiment( assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), - read_reports = list() + read_reports = list(), + gene_set_analyses_tool = list() ) } \arguments{ @@ -46,9 +47,8 @@ correspond to 'contrasts' set in the containing SummarizedExperimentList.} \item{assay_measures}{Optional List of measures to display related to each assay.} -\item{gene_set_analyses}{List of lists of gene set tables keyed first by -gene set -type and secondly by contrast} +\item{gene_set_analyses}{Three-level nested lists of gene set tables keyed first by +assay, then by gene set type and then by contrast.} \item{dexseq_results}{An optional list of \code{DEXSeqResults} objects corresponding to the contrasts listed in the \code{contrasts} slot..} @@ -56,6 +56,10 @@ corresponding to the contrasts listed in the \code{contrasts} slot..} \item{read_reports}{A named list of matrices with read counts in columns and sample names in rows. Useful for providing mapped read counts, counts per gene type etc} + +\item{gene_set_analyses_tool}{Three-level nested lists of a string, nested as \code{gene_set_analyses}. +Each string may be \code{"auto"} (the default), \code{"gsea"} or \code{"roast"}. It defines the format of the +corresponding \code{gene_set_analyses} table.} } \value{ output An ExploratoryRangedSummarizedExperient object diff --git a/man/build_enrichment_path.Rd b/man/build_enrichment_path.Rd new file mode 100644 index 0000000..5298973 --- /dev/null +++ b/man/build_enrichment_path.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/accessory.R +\name{build_enrichment_path} +\alias{build_enrichment_path} +\title{Build path to the enrichment results} +\usage{ +build_enrichment_path(template, contrast_info, geneset_type, direction = NULL) +} +\arguments{ +\item{template}{A string, such as \code{"/path/to/folder/{contrast_name}-{geneset_type}.csv"} or +\code{"./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv"}} + +\item{contrast_info}{A list with contrast details: `id`, `reference`, and `target`, +to be replaced in template.} + +\item{geneset_type}{The name of the geneset type, to be replaced in the template} + +\item{direction}{Either `"up"`, `"down"` or `NULL`, used to determine how the replacement will happen.} +} +\value{ +A string similar to template, but with the templates replaced +} +\description{ +Build path to the enrichment results +} +\details{ +The template accepts the following: + +\describe{ + \item{\code{\{contrast_name\}}}{Will be replaced by \code{contrast_info$id} argument} + \item{\code{\{geneset_type\}}}{Will be replaced by the \code{geneset_type} argument} + \item{\code{\{target|reference\}}}{If the \code{direction} argument is \code{"up"}, will be replaced + with \code{contrast_info$target}, if it is \code{"down"}, \code{contrast_info$reference} will be used instead.} +} +} +\examples{ +build_enrichment_path( + template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv", + contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"), + geneset_type = "m2.cp.v2024.1.Mm.entrez", + direction = "up" +) + +} diff --git a/man/cond_log2_transform_assays.Rd b/man/cond_log2_transform_assays.Rd index 6db3ea0..7441115 100644 --- a/man/cond_log2_transform_assays.Rd +++ b/man/cond_log2_transform_assays.Rd @@ -9,7 +9,8 @@ cond_log2_transform_assays( log2_assays, threshold = 30, reverse = FALSE, - prettify_names = TRUE + prettify_names = TRUE, + invert_assays = FALSE ) } \arguments{ @@ -26,6 +27,8 @@ This is only checked if should_transform is NULL.} \item{reverse}{Boolean, should we unlog rather than log?} \item{prettify_names}{Boolean. Prettify element names? Passed to validate_indices().} + +\item{invert_assays}{Boolean, apply transform to assays NOT specified in log2_assays.} } \value{ A modified assay_data list. diff --git a/man/read_contrasts.Rd b/man/read_contrasts.Rd index c4c0e55..fcc4b0f 100644 --- a/man/read_contrasts.Rd +++ b/man/read_contrasts.Rd @@ -38,5 +38,13 @@ shinyngs?} output Validated contrasts data frame } \description{ -Read and validate a contrasts file against sample metadata +Checks: +1. No duplicate contrast IDs. Ensure that the required columns (variable, reference, target) are present. +2. Values in the contrast variable column exist as column names in the sample metadata. +3. If blocking factors are supplied, checks that they are present in the sample metadata. +4. Design matrix is full rank. +5. Warn about continuous covariates (e.g. numeric patient IDs treated as continuous). +6. Values of specified columns don't contain special characters. +7. Verify that the specified reference and target values exist in the corresponding sample metadata column. +8. Issue a warning if the reference and target levels are identical. } diff --git a/man/shinyngs.Rd b/man/shinyngs.Rd index 6a761b3..429f03f 100644 --- a/man/shinyngs.Rd +++ b/man/shinyngs.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/shinyngs.R \docType{package} \name{shinyngs} +\alias{shinyngs-package} \alias{shinyngs} \title{Interactive downstream analysis with ShinyNGS.} \description{ @@ -212,3 +213,7 @@ button} \item{summarizematrix}{Module which takes a matrix and a factor which groups columns, and summarises by a supplied method, mean by default} }} } +\author{ +\strong{Maintainer}: Jonathan Manning \email{jonathan.manning@seqera.io} + +} diff --git a/man/validate_indices.Rd b/man/validate_indices.Rd index a39970b..423bdfd 100644 --- a/man/validate_indices.Rd +++ b/man/validate_indices.Rd @@ -4,13 +4,20 @@ \alias{validate_indices} \title{Validate assay indices based on a given string.} \usage{ -validate_indices(assay_data, index_string, prettify_names = TRUE) +validate_indices( + assay_data, + index_string, + invert_assays = FALSE, + prettify_names = TRUE +) } \arguments{ \item{assay_data}{A list containing matrices as assay data.} \item{index_string}{A string that can be a comma-separated list of integers or assay names.} +\item{invert_assays}{Boolean, return the indices NOT specified.} + \item{prettify_names}{Boolean. Prettify element names?} } \value{ diff --git a/tests/testthat/test-accessory.R b/tests/testthat/test-accessory.R index 06337c3..332d5c7 100644 --- a/tests/testthat/test-accessory.R +++ b/tests/testthat/test-accessory.R @@ -125,4 +125,99 @@ contrasts: expect_equal(contrasts$make_contrasts_str[3], "genotypeWT.treatmentTreated.time") unlink(yaml_file) -}) \ No newline at end of file +}) + +test_that("read_enrichment_file parses file correctly", { + all_text <- "geneset,p value,FDR,Direction\ndummy,0.04,0.01,Up\n" + all_file <- tempfile(pattern = "test_shinyngs", fileext = ".csv") + all_df <- read.csv(textConnection(all_text), check.names = FALSE, row.names = 1) + write(all_text, all_file) + + up_text <- "geneset,p value,FDR\ndummy,0.04,0.01\n" + up_file <- tempfile(pattern = "test_shinyngs", fileext = ".csv") + up_df <- read.csv(textConnection(up_text), check.names = FALSE, row.names = 1) + write(up_text, up_file) + + down_text <- "geneset,p value,FDR\ndummy2,0.04,0.01\n" + down_file <- tempfile(pattern = "test_shinyngs", fileext = ".csv") + down_df <- read.csv(textConnection(down_text), check.names = FALSE, row.names = 1) + write(down_text, down_file) + + up_df2 <- up_df + up_df2$Direction <- "Up" + down_df2 <- down_df + down_df2$Direction <- "Down" + combined_df <- rbind(up_df2, down_df2) + + expect_equal(read_enrichment_file(NULL), NULL) + expect_equal(read_enrichment_file(all_file), all_df) + expect_equal(read_enrichment_file(c("up" = up_file,"down" = down_file)), + combined_df) +}) + + +test_that("remove_nulls works", { + expect_equal(remove_nulls(list(1, NULL, 2)), list(1, 2)) +}) + +test_that("build_enrichment_path replaces variables", { + expect_equal( + build_enrichment_path( + template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv", + contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"), + geneset_type = "m2.cp.v2024.1.Mm.entrez", + direction = "up" + ), + "./disease_vs_ctrl/m2.cp.v2024.1.Mm.entrez/report_for_disease.csv" + ) +}) + +gst_gsea <- data.frame( + "NAME" = "dummy", + "GS
follow link to MSigDB" = "", + "GS DETAILS" = "", + "SIZE" = 0, + "ES" = 0, + "NES" = 0, + "NOM p-val" = 0, + "FDR q-val" = 0, + "FWER p-val" = 0, + "RANK AT MAX LEADING EDGE" = "", + "Direction" = "Up", + check.names = FALSE +) + +gst_roast <- data.frame( + "NAME" = "", + "p value" = 0.01, + "FDR" = 0.01, + "Direction" = "Up", + check.names = FALSE +) + +test_that("detect_enrichment_tool works", { + expect_equal(detect_enrichment_tool(gst_gsea), "gsea") + expect_equal(detect_enrichment_tool(gst_roast), "roast") +}) + +test_that("get_enrichment_mapping works", { + mappings <- list( + roast = list(pvalue = "p value", fdr = "FDR", direction = "Direction"), + gsea = list(pvalue = "NOM p-val", fdr = "FDR q-val", direction = "Direction") + ) + expect_equal(get_enrichment_mapping(gst_gsea, "gsea"), mappings$gsea) + expect_equal(get_enrichment_mapping(gst_roast, "roast"), mappings$roast) +}) + +test_that("clean_enrichment_table works", { + df <- clean_enrichment_table(gst_gsea, "gsea") + expect_false("GS
follow link to MSigDB" %in% colnames(df)) +}) + +test_that("validate_enrichment_table works", { + validate_enrichment_table(gst_gsea, "gsea") + validate_enrichment_table(gst_roast, "roast") + gst_gsea_wrong <- gst_gsea + gst_gsea_wrong[["NOM p-val"]] <- NULL + expect_error(validate_enrichment_table(gst_gsea_wrong, "gsea")) +}) diff --git a/vignettes/shinyngs.Rmd b/vignettes/shinyngs.Rmd index a502441..4c6d619 100644 --- a/vignettes/shinyngs.Rmd +++ b/vignettes/shinyngs.Rmd @@ -259,6 +259,7 @@ head(myannotation) Now we can put these things together to create an 'ExploratorySummarizedExperiment: ```{r eval=TRUE} +library(shinyngs) myese <- ExploratorySummarizedExperiment( assays = SimpleList( myassays