Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6401998
Merge pull request #62 from seqeralabs/dev
drpatelh Nov 12, 2024
cdcf64b
First test implementation of benchmarks to nf-aggregate
FloWuenne Feb 14, 2025
712e7f7
First working alpha version.
FloWuenne Feb 14, 2025
15fcb6b
Updated benchmark params.
FloWuenne Feb 14, 2025
476d6dc
Fully working basic version of QL reports.
FloWuenne Feb 14, 2025
7aaa147
Updated and fixed schema
FloWuenne Feb 14, 2025
a372a1d
Ported from nf-validation to nf-schema2.3.0
FloWuenne Feb 14, 2025
9b9ca1c
Remove unwanted samplesheet test file.
FloWuenne Feb 14, 2025
1503f20
Small edits to benchmark report process.
FloWuenne Feb 15, 2025
704eb4b
Renamed cur report param and updated schema.
FloWuenne Feb 20, 2025
403b1da
Added AWS Cur report to quarto render call.
FloWuenne Feb 20, 2025
d35a849
Testing stageinMode copy for benchmark reports.
FloWuenne Feb 20, 2025
1f5a256
Another attempt to make it work on AWS.
FloWuenne Feb 20, 2025
d7779a5
Added full task workdir to AWS cost report path.
FloWuenne Feb 21, 2025
6a007a6
Removed debug and copy mode and added short docs.
FloWuenne Feb 21, 2025
cfc6ea7
Removed old samplesheet creation logic from workflow main.nf
FloWuenne Feb 21, 2025
af45928
Fix spacing in nextflow.config.
FloWuenne Feb 21, 2025
7a72ee9
Added containerOptions to run as root to BENCHMARK_REPORT.
FloWuenne Feb 24, 2025
7438376
Move containerOptions into nextflow.config for BENCHMARK_REPORT.
FloWuenne Feb 24, 2025
93db367
Updated benchmark_reports container version.
FloWuenne Feb 24, 2025
fb4265e
Update README.md
FloWuenne Feb 25, 2025
ea6ec6c
Update README.md
FloWuenne Feb 25, 2025
c38e05f
Integrated review comments, updated schema, added versions to MultiQC.
FloWuenne Feb 25, 2025
ee39861
Removed deprecated parameter validationSchemaIgnoreParams from config…
FloWuenne Feb 25, 2025
7306b35
Fixed versions.yml formatting for MultiQC in BENCHMARK_REPORT
FloWuenne Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,26 @@ nextflow run seqeralabs/nf-aggregate \

If you are using a Seqera Platform Enterprise instance that is secured with a private CA SSL certificate not recognized by default Java certificate authorities, you can specify a custom `cacerts` store path through the `--java_truststore_path` parameter and optionally, a password with the `--java_truststore_password`. This certificate will be used to achieve connectivity with your Seqera Platform instance through API and CLI.

### Benchmark reports

If you want to generate a benchmark report comparing multiple runs, you can include a `group` column in your `run_ids.csv` file. This allows you to organize and analyze runs based on custom groupings in the final report.

```
id,workspace,group
3VcLMAI8wyy0Ld,community/showcase,group1
4VLRs7nuqbAhDy,community/showcase,group2
```

To incorporate AWS cost data into the benchmark report, use the benchmark_aws_cur_report parameter. This should point to a valid AWS Cost and Usage Report (CUR) file in Parquet format, supporting both CUR 1.0 and CUR 2.0 schemas. The file can be stored locally or in a cloud bucket. To run nf-aggregate and generate benchmark reports, you can use the following command:

```
nextflow run seqeralabs/nf-aggregate \
--input run_ids.csv \
--outdir ./results \
--run_benchmark \
--benchmark_aws_cur_report ./aws_cost_report.parquet
```

## Output

The results from the pipeline will be published in the path specified by the `--outdir` and will consist of the following contents:
Expand Down
14 changes: 11 additions & 3 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/assets/schema_input.json",
"title": "nf-aggregate pipeline - params.input schema",
"description": "Schema for the file provided with params.input",
Expand All @@ -10,12 +10,20 @@
"id": {
"type": "string",
"pattern": "^[A-Za-z0-9]{9,14}$",
"errorMessage": "Please provide a valid Seqera Platform run identifier"
"errorMessage": "Please provide a valid Seqera Platform run identifier",
"meta": ["id"]
},
"workspace": {
"type": "string",
"pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}/[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
"errorMessage": "Please provide a valid Seqera Platform Workspace name"
"errorMessage": "Please provide a valid Seqera Platform Workspace name",
"meta": ["workspace"]
},
"group": {
"type": "string",
"pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
"errorMessage": "Please provide a valid group name",
"meta": ["group"]
}
},
"required": ["id", "workspace"]
Expand Down
48 changes: 48 additions & 0 deletions modules/local/benchmark_report/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process BENCHMARK_REPORT {

container 'cr.seqera.io/scidev/benchmark-reports:sha-b370978'

input:
path run_dumps
val groups
path benchmark_aws_cur_report

output:
path "benchmark_report.html" , emit: benchmark_html
path "versions.yml" , emit: versions

script:
def aws_cost_param = benchmark_aws_cur_report ? "--profile cost -P aws_cost:\$TASK_DIR/${benchmark_aws_cur_report}" : ""
def benchmark_samplesheet = "benchmark_samplesheet.csv"
"""
# Set up R environment from renv
export R_LIBS_USER=/project/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu
TASK_DIR="\$PWD"

# Setup cache directories
export QUARTO_CACHE=/tmp/quarto/cache
export XDG_CACHE_HOME=/tmp/quarto

# Create the benchmark samplesheet csv
echo "group,file_path" > ${benchmark_samplesheet}
${groups.withIndex().collect { group, idx ->
"echo \"${group},\$TASK_DIR/${run_dumps[idx]}\" >> ${benchmark_samplesheet}"
}.join('\n')}

cd /project
quarto render main_benchmark_report.qmd \\
-P log_csv:"\$TASK_DIR/"${benchmark_samplesheet} \\
$aws_cost_param \\
--output-dir .\\
--output benchmark_report.html

cp /project/benchmark_report.html "\$TASK_DIR/"
cd "\$TASK_DIR/"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
r: \$(R --version | head -1 | sed 's/R version \\([0-9.]*\\).*/\\1/')
quarto-cli: \$(quarto --version | head -1 | sed 's/quarto //g')
END_VERSIONS
"""
}
10 changes: 10 additions & 0 deletions modules/local/benchmark_report/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
process {
withName: 'BENCHMARK_REPORT' {
publishDir = [
path: { "${params.outdir}/${metaOut?.projectName?.replace("/", "_") ?: ""}/benchmark_report" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.json') ? null : filename }
]
containerOptions = "--user root"
}
}
8 changes: 6 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ params {
multiqc_logo = null
skip_multiqc = false

// Benchmark report options
generate_benchmark_report = false
benchmark_aws_cur_report = null

// Boilerplate options
outdir = 'results'
publish_dir_mode = 'copy'
Expand All @@ -38,7 +42,6 @@ params {
// Schema validation default options
validationFailUnrecognisedParams = false
validationLenientMode = false
validationSchemaIgnoreParams = ''
validationShowHiddenParams = false
validationSkipDuplicateCheck = false
validate_params = true
Expand All @@ -52,6 +55,7 @@ process {
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'

}

profiles {
Expand Down Expand Up @@ -172,7 +176,7 @@ singularity.registry = 'quay.io'

// Nextflow plugins
plugins {
id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
Expand Down
28 changes: 21 additions & 7 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/nextflow_schema.json",
"title": "seqeralabs/nf-aggregate pipeline parameters",
"description": "Minimal nf-core pipeline compatible with template",
"type": "object",
"definitions": {
"$defs": {
"input_output_options": {
"title": "Input/output options",
"type": "object",
Expand All @@ -26,7 +26,8 @@
"type": "string",
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
"fa_icon": "fas fa-folder-open",
"default": "results"
}
}
},
Expand Down Expand Up @@ -67,6 +68,18 @@
"type": "boolean",
"description": "Skip MultiQC.",
"fa_icon": "fas fa-fast-forward"
},
"generate_benchmark_report": {
"type": "boolean",
"fa_icon": "fas fa-tachometer-alt",
"description": "Compile a benchmarking report for Seqera Platform runs."
},
"benchmark_aws_cur_report": {
"type": "string",
"fa_icon": "fas fa-dollar-sign",
"description": "AWS CUR report from data exports.",
"pattern": "^\\S+\\.parquet",
"format": "file-path"
}
},
"required": ["seqera_api_endpoint"]
Expand Down Expand Up @@ -127,7 +140,8 @@
"modules_testdata_base_path": {
"type": "string",
"description": "Base path / URL for data used in the modules",
"hidden": true
"hidden": true,
"default": "s3://ngi-igenomes/testdata/nf-core/modules/"
},
"validate_params": {
"type": "boolean",
Expand Down Expand Up @@ -169,13 +183,13 @@
},
"allOf": [
{
"$ref": "#/definitions/input_output_options"
"$ref": "#/$defs/input_output_options"
},
{
"$ref": "#/definitions/pipeline_options"
"$ref": "#/$defs/pipeline_options"
},
{
"$ref": "#/definitions/generic_options"
"$ref": "#/$defs/generic_options"
}
]
}
6 changes: 3 additions & 3 deletions subworkflows/local/utils_nf_aggregate/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import java.nio.file.Paths
include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline/main'
include { getWorkflowVersion } from '../../nf-core/utils_nextflow_pipeline/main'
include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin/main.nf'
include { samplesheetToList } from 'plugin/nf-schema'

/*
========================================================================================
Expand Down Expand Up @@ -53,9 +54,8 @@ workflow PIPELINE_INITIALISATION {

// Read in ids from --input file
Channel
.from(file(params.input))
.splitCsv(header:true, sep:',', strip:true)
.unique()
.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
.flatten()
.set { ch_ids }

emit:
Expand Down
6 changes: 3 additions & 3 deletions subworkflows/nf-core/utils_nfvalidation_plugin/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions tower.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ reports:
display: "MultiQC HTML report"
"*_gantt.html":
display: "GANTT plot of task execution in a run, grouped by 'instance-id' if available."
benchmark_report.html:
display: "Benchmarking HTML report"
17 changes: 16 additions & 1 deletion workflows/nf_aggregate/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
include { SEQERA_RUNS_DUMP } from '../../modules/local/seqera_runs_dump'
include { PLOT_RUN_GANTT } from '../../modules/local/plot_run_gantt'
include { MULTIQC } from '../../modules/nf-core/multiqc'
include { BENCHMARK_REPORT } from '../../modules/local/benchmark_report'
include { paramsSummaryMultiqc } from '../../subworkflows/local/utils_nf_aggregate'
include { getProcessVersions } from '../../subworkflows/local/utils_nf_aggregate'
include { getWorkflowVersions } from '../../subworkflows/local/utils_nf_aggregate'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMap } from 'plugin/nf-schema'

workflow NF_AGGREGATE {

Expand Down Expand Up @@ -56,6 +57,20 @@ workflow NF_AGGREGATE {
)
ch_versions = ch_versions.mix(PLOT_RUN_GANTT.out.versions.first())

//
// MODULE: Generate benchmark report
//
if (params.generate_benchmark_report) {
aws_cur_report = params.benchmark_aws_cur_report ? Channel.fromPath(params.benchmark_aws_cur_report) : []

BENCHMARK_REPORT (
SEQERA_RUNS_DUMP.out.run_dump.collect{it[1]},
SEQERA_RUNS_DUMP.out.run_dump.collect{it[0].group},
aws_cur_report
)
ch_versions = ch_versions.mix(BENCHMARK_REPORT.out.versions.first())
}

//
// Collate software versions
//
Expand Down
1 change: 1 addition & 0 deletions workflows/nf_aggregate/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
includeConfig '../../modules/local/seqera_runs_dump/nextflow.config'
includeConfig '../../modules/local/plot_run_gantt/nextflow.config'
includeConfig '../../modules/nf-core/multiqc/nextflow.config'
includeConfig '../../modules/local/benchmark_report/nextflow.config'
Loading