feat: handle empty variable in contrasts_yml by allowing report_grouping_variable.

atrigila · atrigila · commit 800c6de80e57 · 2025-06-27T14:53:09.000Z
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
@@ -972,24 +972,54 @@ if (!is.null(params$functional_method)){
     cat("\n#### ", toupper(params$functional_method) ," {.tabset}\n")
 
     if (params$functional_method == 'gsea') {
+
+    # Only keep contrasts that have both reference and target
+    gsea_contrasts <- contrasts[
+        !is.na(contrasts$reference) & contrasts$reference != "" &
+        !is.na(contrasts$target) & contrasts$target != "",
+    ]
+
+    if (nrow(gsea_contrasts) == 0) {
+        warning("No contrasts with reference and target defined. Skipping GSEA report section.")
+    } else {
+
         for (gmt_file in simpleSplit(params$gene_sets_files)) {
-        gmt_name <- basename(tools::file_path_sans_ext(gmt_file))
-        cat("\n##### ", gmt_name ," {.tabset}\n")
-
-        reference_gsea_tables <- paste0(differential_names, ".", gmt_name, '.gsea_report_for_', contrasts$reference, '.tsv')
-        target_gsea_tables <- paste0(differential_names, ".", gmt_name, '.gsea_report_for_', contrasts$target, '.tsv')
-        for (i in 1:nrow(contrasts)){
-            cat("\n###### ", contrast_descriptions[i], "\n")
-            target_gsea_results <- read_metadata(target_gsea_tables[i])[,c(-2,-3)]
-            target_gsea_results <- round_dataframe_columns(target_gsea_results, digits=params$round_digits)
-            print( htmltools::tagList(datatable(target_gsea_results, caption = paste0("\nTarget (", contrasts$target[i], ")\n"), rownames = FALSE) ))
-            ref_gsea_results <- read_metadata(reference_gsea_tables[i])[,c(-2,-3)]
-            ref_gsea_results <- round_dataframe_columns(ref_gsea_results, digits=params$round_digits)
-            print( htmltools::tagList(datatable(ref_gsea_results, caption = paste0("\nReference (", contrasts$reference[i], ")\n"), rownames = FALSE) ))
+            gmt_name <- basename(tools::file_path_sans_ext(gmt_file))
+            cat("\n##### ", gmt_name ," {.tabset}\n")
+
+            reference_gsea_tables <- paste0(differential_names, ".", gmt_name, '.gsea_report_for_', gsea_contrasts$reference, '.tsv')
+            target_gsea_tables <- paste0(differential_names, ".", gmt_name, '.gsea_report_for_', gsea_contrasts$target, '.tsv')
+
+            for (i in seq_len(nrow(gsea_contrasts))) {
+                cat("\n###### ", contrast_descriptions[i], "\n")
+
+                if (file.exists(target_gsea_tables[i])) {
+                    target_gsea_results <- read_metadata(target_gsea_tables[i])[,c(-2,-3)]
+                    target_gsea_results <- round_dataframe_columns(target_gsea_results, digits=params$round_digits)
+                    print( htmltools::tagList(
+                        datatable(target_gsea_results,
+                            caption = paste0("\nTarget (", gsea_contrasts$target[i], ")\n"),
+                            rownames = FALSE)
+                    ))
+                } else {
+                    cat("\n*Target GSEA file missing: ", target_gsea_tables[i], "*\n")
+                }
+
+                if (file.exists(reference_gsea_tables[i])) {
+                    ref_gsea_results <- read_metadata(reference_gsea_tables[i])[,c(-2,-3)]
+                    ref_gsea_results <- round_dataframe_columns(ref_gsea_results, digits=params$round_digits)
+                    print( htmltools::tagList(
+                        datatable(ref_gsea_results,
+                            caption = paste0("\nReference (", gsea_contrasts$reference[i], ")\n"),
+                            rownames = FALSE)
+                    ))
+                } else {
+                    cat("\n*Reference GSEA file missing: ", reference_gsea_tables[i], "*\n")
+                }
+            }
         }
     }
-
-    } else if (params$functional_method == 'gprofiler2') {
+    }  else if (params$functional_method == 'gprofiler2') {
 
         cat(paste0("\nThis section contains the results tables of the pathway analysis which was done with the R package gprofiler2. The differential fraction is the number of differential genes in a pathway divided by that pathway's size, i.e. the number of genes annotated for the pathway.",
         ifelse(params$gprofiler2_significant, paste0(" Enrichment was only considered if significant, i.e. adjusted p-value <= ", params$gprofiler2_max_qval, "."), "Enrichment was also considered if not significant."), "\n"))
diff --git a/nextflow.config b/nextflow.config
@@ -186,6 +186,7 @@ params {
 
     // Report options
     skip_reports                    = false
+    report_grouping_variable        = null
 
     // Note: for shinyapps deployment, in addition to setting these values,
     // SHINYAPPS_TOKEN and SHINYAPPS_SECRET must be available to the
diff --git a/subworkflows/local/utils_nfcore_differentialabundance_pipeline/main.nf b/subworkflows/local/utils_nfcore_differentialabundance_pipeline/main.nf
@@ -206,6 +206,23 @@ def validateInputParameters(paramsets) {
         if (!(row.contrasts_yml || row.contrasts)) {
             error("Either '--contrasts' and '--contrasts_yml' must be set. Please specify one of these options to define contrasts.")
         }
+
+        if (row.contrasts_yml && !row.skip_reports && !row.report_grouping_variable) {
+            log.warn """
+                    =======================================================================
+                    You are using a formula-based contrasts YAML file (--contrasts_yml)
+                    but have not provided --report_grouping_variable.
+
+                    This can cause errors when rendering reports if no grouping variable
+                    is available in the contrasts table (the 'variable' column is empty).
+
+                    Please specify a grouping variable to use in reports, e.g.:
+                        --report_grouping_variable 'treatment'
+
+                    Or set --skip_reports if you do not need reporting.
+                    =======================================================================
+                    """
+        }
     }
 }
 
diff --git a/tests/test_rnaseq_limma.nf.test b/tests/test_rnaseq_limma.nf.test
@@ -76,7 +76,7 @@ nextflow_pipeline {
                 matrix          = "https://github.com/nf-core/test-datasets/raw/differentialabundance/modules_testdata/variancepartition_dream/counts.tsv"
                 contrasts_yml   = "https://github.com/nf-core/test-datasets/raw/differentialabundance/testdata/formula_contrasts/rnaseq_complex_contrast.yaml"
                 exploratory_log2_assays = "raw"
-                skip_reports    = true
+                report_grouping_variable = 'treatment'
             }
         }
 
diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf
@@ -789,6 +789,27 @@ workflow DIFFERENTIALABUNDANCE {
         .groupTuple()                                 // [ meta, [meta with contrast], [functional results] ]
         .map { [it[0], it.tail().tail().flatten()] }  // [ meta, [functional results] ]
 
+// If users provide a `report_grouping_variable` then update the contrasts file 'variable' column with that information
+    if (params.report_grouping_variable) {
+        ch_validated_contrast = ch_validated_contrast
+            .splitCsv(header: true, sep: '\t')
+            .map { meta, row ->
+                def variable = row.variable?.trim()
+                if (!variable || variable == 'NA') {
+                    row.variable = params.report_grouping_variable
+                }
+                [meta, row]
+            }
+            .groupTuple()
+            .map { meta, rows ->
+                def header = rows[0].keySet().join('\t')
+                def lines = rows.collect { it.values().join('\t') }
+                def content = ([header] + lines).join('\n')
+                def outFile = file("${workflow.workDir}/${meta.id ?: meta.paramset_name}_contrast_variable_filled.tsv")
+                outFile.text = content
+                [meta, outFile]
+            }
+    }
     // Prepare input for report generation
     // Each paramset will generate one markdown report by gathering all the files created with the same paramset
 

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ nextflow_pipeline {`
`76`	`76`	`matrix = "https://github.com/nf-core/test-datasets/raw/differentialabundance/modules_testdata/variancepartition_dream/counts.tsv"`
`77`	`77`	`contrasts_yml = "https://github.com/nf-core/test-datasets/raw/differentialabundance/testdata/formula_contrasts/rnaseq_complex_contrast.yaml"`
`78`	`78`	`exploratory_log2_assays = "raw"`
`79`		`- skip_reports = true`
	`79`	`+ report_grouping_variable = 'treatment'`
`80`	`80`	`}`
`81`	`81`	`}`
`82`	`82`