diff --git a/README.md b/README.md index 16fce74..b9453b5 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,26 @@ nextflow run seqeralabs/nf-aggregate \ If you are using a Seqera Platform Enterprise instance that is secured with a private CA SSL certificate not recognized by default Java certificate authorities, you can specify a custom `cacerts` store path through the `--java_truststore_path` parameter and optionally, a password with the `--java_truststore_password`. This certificate will be used to achieve connectivity with your Seqera Platform instance through API and CLI. +### Benchmark reports + +If you want to generate a benchmark report comparing multiple runs, you can include a `group` column in your `run_ids.csv` file. This allows you to organize and analyze runs based on custom groupings in the final report. + +``` +id,workspace,group +3VcLMAI8wyy0Ld,community/showcase,group1 +4VLRs7nuqbAhDy,community/showcase,group2 +``` + +To incorporate AWS cost data into the benchmark report, use the benchmark_aws_cur_report parameter. This should point to a valid AWS Cost and Usage Report (CUR) file in Parquet format, supporting both CUR 1.0 and CUR 2.0 schemas. The file can be stored locally or in a cloud bucket. To run nf-aggregate and generate benchmark reports, you can use the following command: + +``` +nextflow run seqeralabs/nf-aggregate \ + --input run_ids.csv \ + --outdir ./results \ + --run_benchmark \ + --benchmark_aws_cur_report ./aws_cost_report.parquet +``` + ## Output The results from the pipeline will be published in the path specified by the `--outdir` and will consist of the following contents: diff --git a/assets/schema_input.json b/assets/schema_input.json index 31fcce1..24be97f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/assets/schema_input.json", "title": "nf-aggregate pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -10,12 +10,20 @@ "id": { "type": "string", "pattern": "^[A-Za-z0-9]{9,14}$", - "errorMessage": "Please provide a valid Seqera Platform run identifier" + "errorMessage": "Please provide a valid Seqera Platform run identifier", + "meta": ["id"] }, "workspace": { "type": "string", "pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}/[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$", - "errorMessage": "Please provide a valid Seqera Platform Workspace name" + "errorMessage": "Please provide a valid Seqera Platform Workspace name", + "meta": ["workspace"] + }, + "group": { + "type": "string", + "pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$", + "errorMessage": "Please provide a valid group name", + "meta": ["group"] } }, "required": ["id", "workspace"] diff --git a/modules/local/benchmark_report/main.nf b/modules/local/benchmark_report/main.nf new file mode 100644 index 0000000..42db33b --- /dev/null +++ b/modules/local/benchmark_report/main.nf @@ -0,0 +1,48 @@ +process BENCHMARK_REPORT { + + container 'cr.seqera.io/scidev/benchmark-reports:sha-b370978' + + input: + path run_dumps + val groups + path benchmark_aws_cur_report + + output: + path "benchmark_report.html" , emit: benchmark_html + path "versions.yml" , emit: versions + + script: + def aws_cost_param = benchmark_aws_cur_report ? "--profile cost -P aws_cost:\$TASK_DIR/${benchmark_aws_cur_report}" : "" + def benchmark_samplesheet = "benchmark_samplesheet.csv" + """ + # Set up R environment from renv + export R_LIBS_USER=/project/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu + TASK_DIR="\$PWD" + + # Setup cache directories + export QUARTO_CACHE=/tmp/quarto/cache + export XDG_CACHE_HOME=/tmp/quarto + + # Create the benchmark samplesheet csv + echo "group,file_path" > ${benchmark_samplesheet} + ${groups.withIndex().collect { group, idx -> + "echo \"${group},\$TASK_DIR/${run_dumps[idx]}\" >> ${benchmark_samplesheet}" + }.join('\n')} + + cd /project + quarto render main_benchmark_report.qmd \\ + -P log_csv:"\$TASK_DIR/"${benchmark_samplesheet} \\ + $aws_cost_param \\ + --output-dir .\\ + --output benchmark_report.html + + cp /project/benchmark_report.html "\$TASK_DIR/" + cd "\$TASK_DIR/" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r: \$(R --version | head -1 | sed 's/R version \\([0-9.]*\\).*/\\1/') + quarto-cli: \$(quarto --version | head -1 | sed 's/quarto //g') +END_VERSIONS + """ +} diff --git a/modules/local/benchmark_report/nextflow.config b/modules/local/benchmark_report/nextflow.config new file mode 100644 index 0000000..0db6f23 --- /dev/null +++ b/modules/local/benchmark_report/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: 'BENCHMARK_REPORT' { + publishDir = [ + path: { "${params.outdir}/${metaOut?.projectName?.replace("/", "_") ?: ""}/benchmark_report" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.json') ? null : filename } + ] + containerOptions = "--user root" + } +} diff --git a/nextflow.config b/nextflow.config index 151f85b..178d2a2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,10 @@ params { multiqc_logo = null skip_multiqc = false + // Benchmark report options + generate_benchmark_report = false + benchmark_aws_cur_report = null + // Boilerplate options outdir = 'results' publish_dir_mode = 'copy' @@ -38,7 +42,6 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = '' validationShowHiddenParams = false validationSkipDuplicateCheck = false validate_params = true @@ -52,6 +55,7 @@ process { errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' + } profiles { @@ -172,7 +176,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container diff --git a/nextflow_schema.json b/nextflow_schema.json index f8fd597..cd41a13 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/nextflow_schema.json", "title": "seqeralabs/nf-aggregate pipeline parameters", "description": "Minimal nf-core pipeline compatible with template", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -26,7 +26,8 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "results" } } }, @@ -67,6 +68,18 @@ "type": "boolean", "description": "Skip MultiQC.", "fa_icon": "fas fa-fast-forward" + }, + "generate_benchmark_report": { + "type": "boolean", + "fa_icon": "fas fa-tachometer-alt", + "description": "Compile a benchmarking report for Seqera Platform runs." + }, + "benchmark_aws_cur_report": { + "type": "string", + "fa_icon": "fas fa-dollar-sign", + "description": "AWS CUR report from data exports.", + "pattern": "^\\S+\\.parquet", + "format": "file-path" } }, "required": ["seqera_api_endpoint"] @@ -127,7 +140,8 @@ "modules_testdata_base_path": { "type": "string", "description": "Base path / URL for data used in the modules", - "hidden": true + "hidden": true, + "default": "s3://ngi-igenomes/testdata/nf-core/modules/" }, "validate_params": { "type": "boolean", @@ -169,13 +183,13 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/pipeline_options" + "$ref": "#/$defs/pipeline_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/subworkflows/local/utils_nf_aggregate/main.nf b/subworkflows/local/utils_nf_aggregate/main.nf index 89d0e09..c04e321 100644 --- a/subworkflows/local/utils_nf_aggregate/main.nf +++ b/subworkflows/local/utils_nf_aggregate/main.nf @@ -15,6 +15,7 @@ import java.nio.file.Paths include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline/main' include { getWorkflowVersion } from '../../nf-core/utils_nextflow_pipeline/main' include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin/main.nf' +include { samplesheetToList } from 'plugin/nf-schema' /* ======================================================================================== @@ -53,9 +54,8 @@ workflow PIPELINE_INITIALISATION { // Read in ids from --input file Channel - .from(file(params.input)) - .splitCsv(header:true, sep:',', strip:true) - .unique() + .fromList(samplesheetToList(params.input, "assets/schema_input.json")) + .flatten() .set { ch_ids } emit: diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf index 2585b65..50155c0 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -8,9 +8,9 @@ ======================================================================================== */ -include { paramsHelp } from 'plugin/nf-validation' -include { paramsSummaryLog } from 'plugin/nf-validation' -include { validateParameters } from 'plugin/nf-validation' +include { paramsHelp } from 'plugin/nf-schema' +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' /* ======================================================================================== diff --git a/tower.yml b/tower.yml index 013a577..be9013a 100644 --- a/tower.yml +++ b/tower.yml @@ -3,3 +3,5 @@ reports: display: "MultiQC HTML report" "*_gantt.html": display: "GANTT plot of task execution in a run, grouped by 'instance-id' if available." + benchmark_report.html: + display: "Benchmarking HTML report" diff --git a/workflows/nf_aggregate/main.nf b/workflows/nf_aggregate/main.nf index 0296708..f0da299 100644 --- a/workflows/nf_aggregate/main.nf +++ b/workflows/nf_aggregate/main.nf @@ -5,10 +5,11 @@ include { SEQERA_RUNS_DUMP } from '../../modules/local/seqera_runs_dump' include { PLOT_RUN_GANTT } from '../../modules/local/plot_run_gantt' include { MULTIQC } from '../../modules/nf-core/multiqc' +include { BENCHMARK_REPORT } from '../../modules/local/benchmark_report' include { paramsSummaryMultiqc } from '../../subworkflows/local/utils_nf_aggregate' include { getProcessVersions } from '../../subworkflows/local/utils_nf_aggregate' include { getWorkflowVersions } from '../../subworkflows/local/utils_nf_aggregate' -include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-schema' workflow NF_AGGREGATE { @@ -56,6 +57,20 @@ workflow NF_AGGREGATE { ) ch_versions = ch_versions.mix(PLOT_RUN_GANTT.out.versions.first()) + // + // MODULE: Generate benchmark report + // + if (params.generate_benchmark_report) { + aws_cur_report = params.benchmark_aws_cur_report ? Channel.fromPath(params.benchmark_aws_cur_report) : [] + + BENCHMARK_REPORT ( + SEQERA_RUNS_DUMP.out.run_dump.collect{it[1]}, + SEQERA_RUNS_DUMP.out.run_dump.collect{it[0].group}, + aws_cur_report + ) + ch_versions = ch_versions.mix(BENCHMARK_REPORT.out.versions.first()) + } + // // Collate software versions // diff --git a/workflows/nf_aggregate/nextflow.config b/workflows/nf_aggregate/nextflow.config index a3728fc..a383f56 100644 --- a/workflows/nf_aggregate/nextflow.config +++ b/workflows/nf_aggregate/nextflow.config @@ -1,3 +1,4 @@ includeConfig '../../modules/local/seqera_runs_dump/nextflow.config' includeConfig '../../modules/local/plot_run_gantt/nextflow.config' includeConfig '../../modules/nf-core/multiqc/nextflow.config' +includeConfig '../../modules/local/benchmark_report/nextflow.config'