pinin4fjords · zeehio · Oct 29, 2025 · Oct 29, 2025 · Nov 8, 2025 · Nov 8, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,9 +5,10 @@ Authors@R: person("Jonathan", "Manning", email = "jonathan.manning@seqera.io", r
 Description: Provides Shiny applications for various array and NGS applications.
     Currently very RNA-seq centric, with plans for expansion.
 Depends:
-    R (>= 3.2.2),
+    R (>= 3.4.0),
     SummarizedExperiment
 License: AGPL (>= 3)
+Encoding: UTF-8
 LazyData: true
 Imports:
     cluster,
@@ -53,6 +54,6 @@ Remotes:
     cran/d3heatmap,
     pinin4fjords/zhangneurons
 biocViews: Software
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 VignetteBuilder: knitr
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(annotatedHeatmap)
 export(anova_pca_metadata)
 export(barcode_plot)
 export(bootstrapMedian)
+export(build_enrichment_path)
 export(calculateDendrogram)
 export(calculateDist)
 export(checkListIsSubset)

diff --git a/R/ExploratorySummarizedExperiment-class.R b/R/ExploratorySummarizedExperiment-class.R
@@ -1,8 +1,6 @@
 #' The ExploratorySummarizedExperiment class
 #'
-#' Subclass of SummarizedExperiment if present in the SummarizedExperiment
-#' package (newer versions of Bioconductor have moved this from GenomicRanges),
-#' otherwise of SummarizedExperiment0.
+#' Subclass of SummarizedExperiment.
 #'
 #' @slot idfield character.
 #' @slot entrezgenefield character.
@@ -12,21 +10,17 @@
 #' @slot gene_set_analyses list.
 #' @slot dexseq_results list.
 #' @slot read_reports list.
+#' @slot gene_set_analyses_tool list.
 #'
 #' @export
 
-setClass("ExploratorySummarizedExperiment", contains = ifelse("SummarizedExperiment" %in% getClasses(where = "package:SummarizedExperiment"), "SummarizedExperiment",
-  "SummarizedExperiment0"
-), representation = representation(
+setClass("ExploratorySummarizedExperiment", contains = "SummarizedExperiment", slots = c(
   idfield = "character", entrezgenefield = "character", labelfield = "character", contrast_stats = "list",
-  assay_measures = "list", gene_set_analyses = "list", dexseq_results = "list", read_reports = "list"
+  assay_measures = "list", gene_set_analyses = "list", dexseq_results = "list", read_reports = "list", gene_set_analyses_tool = "list"
 ))
 
 setAs("RangedSummarizedExperiment", "ExploratorySummarizedExperiment", function(from) {
-  as(
-    (as(from, ifelse("SummarizedExperiment" %in% getClasses(where = "package:SummarizedExperiment"), "SummarizedExperiment", "SummarizedExperiment0"))),
-    "ExploratorySummarizedExperiment"
-  )
+  as(as(from, "SummarizedExperiment"), "ExploratorySummarizedExperiment")
 })
 
 #' ExploratorySummarizedExperiments
@@ -63,21 +57,23 @@ setAs("RangedSummarizedExperiment", "ExploratorySummarizedExperiment", function(
 #' correspond to 'contrasts' set in the containing SummarizedExperimentList.
 #' @param assay_measures Optional List of measures to display related to each
 #' assay.
-#' @param gene_set_analyses List of lists of gene set tables keyed first by
-#' gene set
-#' type and secondly by contrast
+#' @param gene_set_analyses Three-level nested lists of gene set tables keyed first by
+#' assay, then by gene set type and then by contrast.
 #' @param read_reports A named list of matrices with read counts in columns
 #' and sample names in rows. Useful for providing mapped read counts,
 #' counts per gene type etc
 #' @param dexseq_results An optional list of \code{DEXSeqResults} objects
 #' corresponding to the contrasts listed in the \code{contrasts} slot..
+#' @param gene_set_analyses_tool Three-level nested lists of a string, nested as \code{gene_set_analyses}.
+#' Each string may be \code{"auto"} (the default), \code{"gsea"} or \code{"roast"}. It defines the format of the
+#' corresponding \code{gene_set_analyses} table.
 #'
 #' @return output An ExploratoryRangedSummarizedExperient object
 #' @rawNamespace import(SummarizedExperiment, except = 'shift')
 #' @export
 
 ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield, labelfield = character(), entrezgenefield = character(), contrast_stats = list(),
-                                            assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), read_reports = list()) {
+                                            assay_measures = list(), gene_set_analyses = list(), dexseq_results = list(), read_reports = list(), gene_set_analyses_tool = list()) {
   # Reset NULLs to empty
 
   if (is.null(entrezgenefield)) {
@@ -116,13 +112,85 @@ ExploratorySummarizedExperiment <- function(assays, colData, annotation, idfield
 
   annotation <- data.frame(lapply(annotation, as.character), stringsAsFactors = FALSE, check.names = FALSE, row.names = rownames(annotation))[all_rows, ]
 
+  # Ensure consistency between gene_set_analyses with gene_set_analyses_tool
+  gene_set_analyses_tool <- check_gene_set_analyses_tool_consistency(gene_set_analyses, gene_set_analyses_tool)
+
   # Build the object
 
   sumexp <- SummarizedExperiment(assays = assays, colData = DataFrame(colData, check.names = FALSE))
   mcols(sumexp) <- annotation
 
   new("ExploratorySummarizedExperiment", sumexp,
     idfield = idfield, labelfield = labelfield, entrezgenefield = entrezgenefield, assay_measures = assay_measures,
-    contrast_stats = contrast_stats, gene_set_analyses = gene_set_analyses, dexseq_results = dexseq_results, read_reports = read_reports
+    contrast_stats = contrast_stats, gene_set_analyses = gene_set_analyses, dexseq_results = dexseq_results, read_reports = read_reports,
+    gene_set_analyses_tool = gene_set_analyses_tool
+  )
+}
+
+#' Ensure consistency between gene_set_analyses and gene_set_analyses_tool structures
+#' @noRd
+#'
+#' @description
+#' Ensures that the structure of \code{gene_set_analyses_tool} matches that of \code{gene_set_analyses},
+#' filling in missing elements as needed. Each entry in \code{gene_set_analyses_tool} should be a string
+#' (e.g., "auto", "gsea", or "roast") corresponding to the format of the associated gene set analysis table.
+#'
+#' @param gene_set_analyses A three-level nested list of gene set tables, keyed by assay, gene set type, and contrast.
+#' @param gene_set_analyses_tool A three-level nested list of strings, structured as \code{gene_set_analyses}, indicating the tool used for each gene set analysis.
+#'
+#' @return A three-level nested list of strings, matching the structure of \code{gene_set_analyses}, with missing elements filled as needed.
+check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) {
+  # gene_set_analyses and gene_set_analyses_tool should have the same list of lists
+  # structure. gene_set_analyses_tool should have a single string
+
+  if (is.null(gene_set_analyses_tool)) {
+    gene_set_analyses_tool <- list()
+  }
+
+  valid_tools <- c("auto", "gsea", "roast")
+
+  safe_get <- function(x, path, default="auto") {
+    # similar to purrr::pluck()
+    if (is.null(x)) {
+      return(default)
+    }
+    for (p in path) {
+      if (!p %in% names(x)) {
+        return(default)
+      }
+      x <- x[[p]]
+      if (is.null(x)) {
+        return(default)
+      }
+    }
+    x
+  }
+
+  lapply(
+    setNames(nm=names(gene_set_analyses)),
+    function(assay_name) {
+      gene_sets <- gene_set_analyses[[assay_name]]
+      lapply(
+        setNames(nm=names(gene_sets)),
+        function(gene_set_name) {
+          contrasts <- gene_sets[[gene_set_name]]
+          lapply(
+            setNames(nm=names(contrasts)),
+            function(contrast_name) {
+              tool_name <- safe_get(gene_set_analyses_tool, c(assay_name, gene_set_name, contrast_name), "auto")
+              if (!is.character(tool_name) || length(tool_name) > 1 || !tool_name %in% valid_tools) {
+                stop(
+                  "Invalid gene_set_analyses_tool. gene_set_analyses_tool for ",
+                  assay_name, ",", gene_set_name, ",", contrast_name,
+                  ". It should be one of 'auto', 'gsea' or 'roast'. Found ",
+                  tool_name
+                )
+              }
+              tool_name
+            }
+          )
+        }
+      )
+    }
   )
 }
-check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) {
-  # gene_set_analyses and gene_set_analyses_tool should have the same list of lists
-  # structure. gene_set_analyses_tool should have a single string
-  
-  # Create default structure if necessary:
-  out <- list()
-  
-  if (is.null(gene_set_analyses_tool)) {
-    gene_set_analyses_tool <- list()
-  }
-  for (assay_name in names(gene_set_analyses)) {
-    if (!assay_name %in% names(gene_set_analyses_tool)) {
-      gene_set_analyses_tool[[assay_name]] <- list()
-    }
-    for (gs_type in names(gene_set_analyses[[assay_name]])) {
-      if (! gs_type %in% names(gene_set_analyses_tool[[assay_name]])) {
-        gene_set_analyses_tool[[assay_name]][[gs_type]] <- list()
-      }
-      for (contrast_name in names(gene_set_analyses[[assay_name]][[gs_type]])) {
-        if (! contrast_name %in% names(gene_set_analyses_tool[[assay_name]][[gs_type]])) {
-          gene_set_analyses_tool[[assay_name]][[gs_type]][[contrast_name]] <- "auto"
-        } else {
-          tool_name <- gene_set_analyses_tool[[assay_name]][[gs_type]][[contrast_name]]
-          if (!is.character(tool_name) || length(tool_name) > 1) {
-            stop(paste0("Invalid gene_set_analyses_tool. gene_set_analyses_tool for ",
-                        gs_type, " and ", contrast_name, " should be one of 'auto', 'gsea' or 'roast'. Found ",
-                        paste0(tool_name, collapse=",")))
-          }
-          if (! tool_name %in% c("auto", "gsea", "roast")) {
-            stop(paste0("Invalid gene_set_analyses_tool. gene_set_analyses_tool for ",
-                        gs_type, " and ", contrast_name, " should be one of 'auto', 'gsea' or 'roast'. Found ",
-                        tool_name))
-          }
-        }
-      }
-      # In case gene_set_analyses_tool has other entries, just keep the ones matching gene_set_analyses
-      contrasts_ordered <- names(gene_set_analyses[[assay_name]][[gs_type]])
-      gene_set_analyses_tool[[assay_name]][[gs_type]] <- gene_set_analyses_tool[[assay_name]][[gs_type]][contrasts_ordered]
-    }
-    gene_set_type_names_ordered <- names(gene_set_analyses[[assay_name]])
-    gene_set_analyses_tool[[assay_name]] <- gene_set_analyses_tool[[assay_name]][gene_set_type_names_ordered]
-  }
-  analysis_names_ordered <- names(gene_set_analyses_tool)
-  gene_set_analyses_tool <- gene_set_analyses_tool[analysis_names_ordered]
-  gene_set_analyses_tool
-}
+check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) {
+  if (is.null(gene_set_analyses_tool)) {
+    gene_set_analyses_tool <- list()
+  }
+  
+  valid_tools <- c("auto", "gsea", "roast")
+  
+  # Recursive function to mirror structure and validate
+  mirror_and_validate <- function(gsa_node, gst_node, path = "") {
+    if (is.data.frame(gsa_node)) {
+      # Leaf node - should have a tool string
+      tool <- if (is.null(gst_node)) "auto" else gst_node
+      
+      if (!is.character(tool) || length(tool) != 1 || !tool %in% valid_tools) {
+        stop(sprintf("Invalid gene_set_analyses_tool at %s: expected one of %s, got %s",
+                     path, paste(valid_tools, collapse=", "), paste(tool, collapse=",")))
+      }
+      return(tool)
+    }
+    
+    # Recurse through structure
+    lapply(setNames(names(gsa_node), names(gsa_node)), function(name) {
+      mirror_and_validate(gsa_node[[name]], gst_node[[name]], paste0(path, "/", name))
+    })
+  }
+  
+  mirror_and_validate(gene_set_analyses, gene_set_analyses_tool)
+}
-check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) {
-  # gene_set_analyses and gene_set_analyses_tool should have the same list of lists
-  # structure. gene_set_analyses_tool should have a single string
-  
-  # Create default structure if necessary:
-  out <- list()
-  
-  if (is.null(gene_set_analyses_tool)) {
-    gene_set_analyses_tool <- list()
-  }
-  for (assay_name in names(gene_set_analyses)) {
-    if (!assay_name %in% names(gene_set_analyses_tool)) {
-      gene_set_analyses_tool[[assay_name]] <- list()
-    }
-    for (gs_type in names(gene_set_analyses[[assay_name]])) {
-      if (! gs_type %in% names(gene_set_analyses_tool[[assay_name]])) {
-        gene_set_analyses_tool[[assay_name]][[gs_type]] <- list()
-      }
-      for (contrast_name in names(gene_set_analyses[[assay_name]][[gs_type]])) {
-        if (! contrast_name %in% names(gene_set_analyses_tool[[assay_name]][[gs_type]])) {
-          gene_set_analyses_tool[[assay_name]][[gs_type]][[contrast_name]] <- "auto"
-        } else {
-          tool_name <- gene_set_analyses_tool[[assay_name]][[gs_type]][[contrast_name]]
-          if (!is.character(tool_name) || length(tool_name) > 1) {
-            stop(paste0("Invalid gene_set_analyses_tool. gene_set_analyses_tool for ",
-                        gs_type, " and ", contrast_name, " should be one of 'auto', 'gsea' or 'roast'. Found ",
-                        paste0(tool_name, collapse=",")))
-          }
-          if (! tool_name %in% c("auto", "gsea", "roast")) {
-            stop(paste0("Invalid gene_set_analyses_tool. gene_set_analyses_tool for ",
-                        gs_type, " and ", contrast_name, " should be one of 'auto', 'gsea' or 'roast'. Found ",
-                        tool_name))
-          }
-        }
-      }
-      # In case gene_set_analyses_tool has other entries, just keep the ones matching gene_set_analyses
-      contrasts_ordered <- names(gene_set_analyses[[assay_name]][[gs_type]])
-      gene_set_analyses_tool[[assay_name]][[gs_type]] <- gene_set_analyses_tool[[assay_name]][[gs_type]][contrasts_ordered]
-    }
-    gene_set_type_names_ordered <- names(gene_set_analyses[[assay_name]])
-    gene_set_analyses_tool[[assay_name]] <- gene_set_analyses_tool[[assay_name]][gene_set_type_names_ordered]
-  }
-  analysis_names_ordered <- names(gene_set_analyses_tool)
-  gene_set_analyses_tool <- gene_set_analyses_tool[analysis_names_ordered]
-  gene_set_analyses_tool
-}
+check_gene_set_analyses_tool_consistency <- function(gene_set_analyses, gene_set_analyses_tool) {
+  if (is.null(gene_set_analyses_tool)) {
+    gene_set_analyses_tool <- list()
+  }
+  
+  valid_tools <- c("auto", "gsea", "roast")
+  
+  # Recursive function to mirror structure and validate
+  mirror_and_validate <- function(gsa_node, gst_node, path = "") {
+    if (is.data.frame(gsa_node)) {
+      # Leaf node - should have a tool string
+      tool <- if (is.null(gst_node)) "auto" else gst_node
+      
+      if (!is.character(tool) || length(tool) != 1 || !tool %in% valid_tools) {
+        stop(sprintf("Invalid gene_set_analyses_tool at %s: expected one of %s, got %s",
+                     path, paste(valid_tools, collapse=", "), paste(tool, collapse=",")))
+      }
+      return(tool)
+    }
+    
+    # Recurse through structure
+    lapply(setNames(names(gsa_node), names(gsa_node)), function(name) {
+      mirror_and_validate(gsa_node[[name]], gst_node[[name]], paste0(path, "/", name))
+    })
+  }
+  
+  mirror_and_validate(gene_set_analyses, gene_set_analyses_tool)
+}
diff --git a/R/ExploratorySummarizedExperimentList-class.R b/R/ExploratorySummarizedExperimentList-class.R
@@ -15,7 +15,7 @@
 #'
 #' @export
 
-setClass("ExploratorySummarizedExperimentList", contains = "list", representation = representation(
+setClass("ExploratorySummarizedExperimentList", contains = "list", slots = c(
   title = "character", author = "character", description = "character", static_pdf = "character",
   group_vars = "character", default_groupvar = "character", contrasts = "list", url_roots = "list", gene_sets = "list", gene_set_id_type = "character", ensembl_species = "character"
 ))
@@ -149,7 +149,7 @@ ExploratorySummarizedExperimentList <- function(eses, title = "", author = "", d
       } else {
         # Numeric IDs (like entrez will be cast to integers)
 
-        is_numeric <- all(!is.na(as.numeric(annotation[[gene_set_id_type]])))
+        is_numeric <- all(!is.na(suppressWarnings(as.numeric(annotation[[gene_set_id_type]]))))
       }
 
       print("Processing gene sets")

diff --git a/R/accessory.R b/R/accessory.R
@@ -555,6 +555,51 @@ eselistFromYAML <- function(configfile) {
   eselistFromList(config)
 }
 
+#' Reads gene enrichment files
+#' @noRd
+#' @param contrast_spec One of:
+#' - \code{NULL} (meaning no enrichment was analyzed for that contrast)
+#' - a path to a file (e.g. the table output from roast)
+#' - a named list with elements "up" and "down" with paths to files (e.g.
+#'  corresponding to gsea up-regulated and down-regulated output tables).
+#'  
+#'  The two tables from GSEA output will be combined into a single data frame. A column "Direction" with
+#'  values "Up" and "Down" will be added.
+#'
+#' @returns A data frame with the file contents (or \code{NULL})
+#'
+read_enrichment_file <- function(contrast_spec) {
+  # contrast_spec may be one file name or two file names (up and down), or NULL
+  if (is.null(contrast_spec) || length(contrast_spec) == 0) {
+    return(NULL)
+  }
+
+  read_one <- function(path) {
+    read.csv(path, sep = getSeparator(path), check.names = FALSE, 
+             stringsAsFactors = FALSE, row.names = 1)
+  }
+
+  if (length(contrast_spec) == 1) {
+    return(read_one(contrast_spec))
+  }
+
+  if (length(contrast_spec) == 2) {
+    # This is useful for GSEA output, that splits up and down in two tsv files.
+    # We read both files and set the direction
+    up <- read_one(contrast_spec[["up"]])
+    if (nrow(up) > 0) {
+      up$Direction <- "Up"
+    }
+    down <- read_one(contrast_spec[["down"]])
+    if (nrow(down) > 0) {
+      down$Direction <- "Down"
+    }
+    return(rbind(up, down))
+  }
+
+  stop("gene_set_analyses should have zero, one or two contrast files per gene_set_type")
+}
+
 #' Build an ExploratorySummarisedExperimentList from a description provided in a list
 #'
 #' @param config Hierachical named list with input components. See \code{eselistFromYAML} for detail.
@@ -682,13 +727,17 @@ eselistfromConfig <-
       }
 
       if ("gene_set_analyses" %in% names(exp)) {
+        # Basic list to pass to object creation
+        exp$gene_set_analyses_tool <- check_gene_set_analyses_tool_consistency(exp$gene_set_analyses, exp$gene_set_analyses_tool)
+
         ese_list$gene_set_analyses <- lapply(exp$gene_set_analyses, function(assay) {
           lapply(assay, function(gene_set_type) {
-            lapply(gene_set_type, function(contrast) {
-              read.csv(contrast, check.names = FALSE, stringsAsFactors = FALSE, row.names = 1)
-            })
+            lapply(gene_set_type, read_enrichment_file)
           })
         })
+
+        ese_list$gene_set_analyses <- remove_nulls(ese_list$gene_set_analyses)
+        ese_list$gene_set_analyses_tool <- exp$gene_set_analyses_tool
       }
 
       do.call(ExploratorySummarizedExperiment, ese_list)
@@ -751,6 +800,19 @@ eselistfromConfig <-
     eselist
   }
 
+#' Recursively remove NULL entries from a nested list
+#' @noRd
+#' @param x A list (possibly nested) from which NULL entries should be removed.
+#'
+#' @return The input list with all NULL entries recursively removed.
+remove_nulls <- function(x) {
+  if (is.list(x) && !is.data.frame(x)) {
+    x <- lapply(x, remove_nulls)
+    x <- Filter(Negate(is.null), x)
+  }
+  return(x)
+}
+
 #' Read an expression matrix file and match to specified samples and features
 #'
 #' @param matrix_file Matrix file
@@ -1470,3 +1532,106 @@ cond_log2_transform_assays <- function(assay_data, log2_assays, threshold = 30,
   return(assay_data)
 }
 
+
+#' Build path to the enrichment results
+#' 
+#' @details
+#' 
+#' The template accepts the following:
+#' 
+#' \describe{
+#'   \item{\code{\{contrast_name\}}}{Will be replaced by \code{contrast_info$id} argument}
+#'   \item{\code{\{geneset_type\}}}{Will be replaced by the \code{geneset_type} argument}
+#'   \item{\code{\{target|reference\}}}{If the \code{direction} argument is \code{"up"}, will be replaced
+#'   with \code{contrast_info$target}, if it is \code{"down"}, \code{contrast_info$reference} will be used instead.}
+#' }
+#'
+#' @param template A string, such as \code{"/path/to/folder/{contrast_name}-{geneset_type}.csv"} or
+#' \code{"./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv"}
+#' @param contrast_info  A list with contrast details: `id`, `reference`, and `target`,
+#'   to be replaced in template.
+#' @param geneset_type The name of the geneset type, to be replaced in the template
+#' @param direction Either `"up"`, `"down"` or `NULL`, used to determine how the replacement will happen.
+#'
+#' @returns A string similar to template, but with the templates replaced
+#' @export
+#' @examples
+#' build_enrichment_path(
+#'   template = "./{contrast_name}/{geneset_type}/report_for_{target|reference}.csv",
+#'   contrast_info = list(id="disease_vs_ctrl", reference="control", target="disease"),
+#'   geneset_type = "m2.cp.v2024.1.Mm.entrez",
+#'   direction = "up"
+#' )
+#' 
+build_enrichment_path <- function(template, contrast_info, geneset_type, direction = NULL) {
+  path <- template
+  path <- gsub("{contrast_name}", contrast_info$id, path, fixed = TRUE)
+  path <- gsub("{geneset_type}", geneset_type, path, fixed = TRUE)
+  if (!is.null(direction)) {
+    target_val <- if (direction == "up") contrast_info$target else contrast_info$reference
+    path <- gsub("{target|reference}", target_val, path, fixed = TRUE)
+  }
+  path
+}
+
+#' Detects the enrichment tool used
+#'
+#' @noRd
+#' @param gst The enrichment table
+#'
+#' @returns The enrichment tool as a string, based on whether "NOM p-val" is a column ("gsea") or
+#' either "p value" or "PValue" are found ("roast")
+detect_enrichment_tool <- function(gst) {
+  if ("NOM p-val" %in% colnames(gst)) return("gsea")
+  if (any(c("p value", "PValue") %in% colnames(gst))) return("roast")
+  stop("Could not detect enrichment tool from column names")
+}
+
+
+
+#' Get the expected column names for the gene set enrichment tool
+#'
+#' @noRd
+#' @param gst The enrichment table.
+#' @param gs_tool Either `"roast"` or `"gsea"`.
+#'
+#' @returns A list with three elements: `"pvalue"`, `"fdr"` and `"direction"`. Each element is
+#' a string pointing to the expected column name for that tool.
+get_enrichment_mapping <- function(gst, gs_tool) {
+  mappings <- list(
+    roast = list(pvalue = "p value", fdr = "FDR", direction = "Direction"),
+    gsea = list(pvalue = "NOM p-val", fdr = "FDR q-val", direction = "Direction")
+  )
+  if (gs_tool == "roast") {
+    if ("PValue" %in% colnames(gst)) {
+      mappings[["roast"]][["pvalue"]] <- "PValue"
+    }
+  }
+  mappings[[gs_tool]]
+}
+
+# Returns an error if the table has missing expected columns.
+validate_enrichment_table <- function(gst, gs_tool) {
+  col_map <- get_enrichment_mapping(gst, gs_tool)
+  # sanity checks
+  if (!col_map$pvalue %in% colnames(gst)) {
+    stop(paste0(col_map$pvalue, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", ")))
+  }
+
+  if (!col_map$fdr %in% colnames(gst)) {
+    stop(paste0(col_map$fdr, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", ")))
+  }
+
+  if (!col_map$direction %in% colnames(gst)) {
+    stop(paste0(col_map$direction, " column not found in gst. Found: ", paste0(colnames(gst), collapse=", ")))
+  }
+}
+
+clean_enrichment_table <- function(gst, gs_tool) {
+  if (gs_tool == "gsea") {
+    # gsea tsv files have two useless columns that can be removed:
+    cols_to_remove <- c("GS<br> follow link to MSigDB", "GS DETAILS")
+    gst <- gst[ , !(colnames(gst) %in% cols_to_remove), drop=FALSE]
+  }
+  gst
+}