seqeralabs · edmundmiller · Feb 21, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/README.md b/README.md
@@ -56,6 +56,26 @@ nextflow run seqeralabs/nf-aggregate \
 
 If you are using a Seqera Platform Enterprise instance that is secured with a private CA SSL certificate not recognized by default Java certificate authorities, you can specify a custom `cacerts` store path through the `--java_truststore_path` parameter and optionally, a password with the `--java_truststore_password`. This certificate will be used to achieve connectivity with your Seqera Platform instance through API and CLI.
 
+### Benchmark reports
+
+If you want to generate a benchmark report comparing yours runs, you can include a `group` column in your `run_ids.csv` file. This will be used to group the runs in the report.
+
+```
+id,workspace,group
+3VcLMAI8wyy0Ld,community/showcase,group1
+4VLRs7nuqbAhDy,community/showcase,group2
+```
+
+You can also include a `benchmark_aws_cur_report` parameter to include the AWS Cost and Usage Report in the benchmark report. This should be a path to a valid AWS Cost and Usage Report CSV file (locally or in your cloud bucket). To run nf-aggregate with benchmark reports, you can use the following command:
+
+```
+nextflow run seqeralabs/nf-aggregate \
+    --input run_ids.csv \
+    --outdir ./results \
+    --run_benchmark \
+    --benchmark_aws_cur_report ./aws_cost_report.parquet
+```
+
 ## Output
 
 The results from the pipeline will be published in the path specified by the `--outdir` and will consist of the following contents:

diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -1,5 +1,5 @@
 {
-    "$schema": "http://json-schema.org/draft-07/schema",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
     "$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/assets/schema_input.json",
     "title": "nf-aggregate pipeline - params.input schema",
     "description": "Schema for the file provided with params.input",
@@ -10,12 +10,20 @@
             "id": {
                 "type": "string",
                 "pattern": "^[A-Za-z0-9]{9,14}$",
-                "errorMessage": "Please provide a valid Seqera Platform run identifier"
+                "errorMessage": "Please provide a valid Seqera Platform run identifier",
+                "meta": ["id"]
             },
             "workspace": {
                 "type": "string",
                 "pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}/[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
-                "errorMessage": "Please provide a valid Seqera Platform Workspace name"
+                "errorMessage": "Please provide a valid Seqera Platform Workspace name",
+                "meta": ["workspace"]
+            },
+            "group": {
+                "type": "string",
+                "pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
+                "errorMessage": "Please provide a valid group name",
+                "meta": ["group"]
             }
         },
         "required": ["id", "workspace"]

diff --git a/modules/local/benchmark_report/main.nf b/modules/local/benchmark_report/main.nf
@@ -0,0 +1,50 @@
+process BENCHMARK_REPORT {
+
+    container 'cr.seqera.io/scidev/benchmark-reports:sha-7fe0d8e'
+
+    input:
+    path run_dumps
+    val  groups
+    path benchmark_aws_cur_report
+
+    output:
+    path "benchmark_report.html" , emit: benchmark_html
+    path "versions.yml"          , emit: versions
+
+    script:
+    def aws_cost_param = benchmark_aws_cur_report ? "--profile cost -P aws_cost:\$TASK_DIR/${benchmark_aws_cur_report}" : ""
+    def benchmark_samplesheet = "benchmark_samplesheet.csv"
+
+    """
+    # Set up R environment from renv
+    export R_LIBS_USER=/project/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu
+    # Store task work directory at beginning
+    TASK_DIR="\$PWD"
+
+    # Create the samplesheet header
+    echo "group,file_path" > ${benchmark_samplesheet}
+
+    # Add each group and file path with full task directory path
+    ${groups.withIndex().collect { group, idx ->
+        "echo '${group},/project/${run_dumps[idx]}' >> ${benchmark_samplesheet}"
+    }.join('\n')}
+
+    # Copy run dumps to /project directory
+    cp -r ${run_dumps} /project/
+
+    cd /project
+    quarto render main_benchmark_report.qmd \\
+        -P log_csv:"\$TASK_DIR/"${benchmark_samplesheet} \\
+        $aws_cost_param \\
+        --output-dir .\\
+        --output benchmark_report.html
+
+    cp /project/benchmark_report.html "\$TASK_DIR/"
+    cd "\$TASK_DIR/"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        quarto-cli: \$(quarto -v)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/benchmark_report/nextflow.config b/modules/local/benchmark_report/nextflow.config
@@ -0,0 +1,9 @@
+process {
+    withName: 'BENCHMARK_REPORT' {
+        publishDir = [
+            path: { "${params.outdir}/${metaOut?.projectName?.replace("/", "_") ?: ""}/benchmark_report" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.json') ? null : filename }
+        ]
+    }
+}
diff --git a/nextflow.config b/nextflow.config
@@ -27,6 +27,10 @@ params {
     multiqc_logo                 = null
     skip_multiqc                 = false
 
+    // Benchmark report options
+    run_benchmark                = false
+    benchmark_aws_cur_report     = null
+
     // Boilerplate options
     outdir                       = 'results'
     publish_dir_mode             = 'copy'
@@ -52,6 +56,7 @@ process {
     errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
     maxRetries    = 1
     maxErrors     = '-1'
+
 }
 
 profiles {
@@ -103,7 +108,7 @@ profiles {
         shifter.enabled         = false
         charliecloud.enabled    = false
         apptainer.enabled       = false
-        docker.runOptions       = '-u $(id -u):$(id -g)'
+        // docker.runOptions       = '-u $(id -u):$(id -g)'
     }
     arm {
         docker.runOptions       = '-u $(id -u):$(id -g) --platform=linux/amd64'
@@ -172,7 +177,7 @@ singularity.registry = 'quay.io'
 
 // Nextflow plugins
 plugins {
-    id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -1,10 +1,10 @@
 {
-    "$schema": "http://json-schema.org/draft-07/schema",
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
     "$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/nextflow_schema.json",
     "title": "seqeralabs/nf-aggregate pipeline parameters",
     "description": "Minimal nf-core pipeline compatible with template",
     "type": "object",
-    "definitions": {
+    "$defs": {
         "input_output_options": {
             "title": "Input/output options",
             "type": "object",
@@ -26,7 +26,8 @@
                     "type": "string",
                     "format": "directory-path",
                     "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
-                    "fa_icon": "fas fa-folder-open"
+                    "fa_icon": "fas fa-folder-open",
+                    "default": "results"
                 }
             }
         },
@@ -67,6 +68,18 @@
                     "type": "boolean",
                     "description": "Skip MultiQC.",
                     "fa_icon": "fas fa-fast-forward"
+                },
+                "run_benchmark": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-tachometer-alt",
+                    "description": "Compile a benchmarking report for Seqera Platform runs."
+                },
+                "benchmark_aws_cur_report": {
+                    "type": "string",
+                    "fa_icon": "fas fa-dollar-sign",
+                    "description": "AWS CUR report from data exports.",
+                    "pattern": "^\\S+\\.parquet",
+                    "format": "file-path"
                 }
             },
             "required": ["seqera_api_endpoint"]
@@ -169,13 +182,13 @@
     },
     "allOf": [
         {
-            "$ref": "#/definitions/input_output_options"
+            "$ref": "#/$defs/input_output_options"
         },
         {
-            "$ref": "#/definitions/pipeline_options"
+            "$ref": "#/$defs/pipeline_options"
         },
         {
-            "$ref": "#/definitions/generic_options"
+            "$ref": "#/$defs/generic_options"
         }
     ]
 }
diff --git a/subworkflows/local/utils_nf_aggregate/main.nf b/subworkflows/local/utils_nf_aggregate/main.nf
@@ -15,6 +15,7 @@ import java.nio.file.Paths
 include { UTILS_NEXTFLOW_PIPELINE   } from '../../nf-core/utils_nextflow_pipeline/main'
 include { getWorkflowVersion        } from '../../nf-core/utils_nextflow_pipeline/main'
 include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin/main.nf'
+include { samplesheetToList }         from 'plugin/nf-schema'
 
 /*
 ========================================================================================
@@ -53,9 +54,8 @@ workflow PIPELINE_INITIALISATION {
 
     // Read in ids from --input file
     Channel
-        .from(file(params.input))
-        .splitCsv(header:true, sep:',', strip:true)
-        .unique()
+        .fromList(samplesheetToList(params.input, "assets/schema_input.json"))
+        .flatten()
         .set { ch_ids }
 
     emit:

diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf
diff --git a/tower.yml b/tower.yml
@@ -3,3 +3,5 @@ reports:
     display: "MultiQC HTML report"
   "*_gantt.html":
     display: "GANTT plot of task execution in a run, grouped by 'instance-id' if available."
+  benchmark_report.html:
+    display: "Benchmarking HTML report"
diff --git a/workflows/nf_aggregate/main.nf b/workflows/nf_aggregate/main.nf
@@ -5,10 +5,11 @@
 include { SEQERA_RUNS_DUMP     } from '../../modules/local/seqera_runs_dump'
 include { PLOT_RUN_GANTT       } from '../../modules/local/plot_run_gantt'
 include { MULTIQC              } from '../../modules/nf-core/multiqc'
+include { BENCHMARK_REPORT     } from '../../modules/local/benchmark_report'
 include { paramsSummaryMultiqc } from '../../subworkflows/local/utils_nf_aggregate'
 include { getProcessVersions   } from '../../subworkflows/local/utils_nf_aggregate'
 include { getWorkflowVersions  } from '../../subworkflows/local/utils_nf_aggregate'
-include { paramsSummaryMap     } from 'plugin/nf-validation'
+include { paramsSummaryMap     } from 'plugin/nf-schema'
 
 workflow NF_AGGREGATE {
 
@@ -86,6 +87,19 @@ workflow NF_AGGREGATE {
         ch_multiqc_report = MULTIQC.out.report
     }
 
+    //
+    // MODULE: Generate benchmark report
+    //
+    if (params.run_benchmark) {
+        aws_cur_report = params.benchmark_aws_cur_report ? Channel.fromPath(params.benchmark_aws_cur_report) : []
+
+        BENCHMARK_REPORT (
+            SEQERA_RUNS_DUMP.out.run_dump.collect{it[1]},
+            SEQERA_RUNS_DUMP.out.run_dump.collect{it[0].group},
+            aws_cur_report
+        )
+    }
+
     emit:
     multiqc_report = ch_multiqc_report
     versions       = ch_versions

diff --git a/workflows/nf_aggregate/nextflow.config b/workflows/nf_aggregate/nextflow.config
@@ -1,3 +1,4 @@
 includeConfig '../../modules/local/seqera_runs_dump/nextflow.config'
 includeConfig '../../modules/local/plot_run_gantt/nextflow.config'
 includeConfig '../../modules/nf-core/multiqc/nextflow.config'
+includeConfig '../../modules/local/benchmark_report/nextflow.config'