Skip to content

Commit fcf5126

Browse files
authored
Merge pull request #74 from FloWuenne/add_benchmark_reports
Added process to generate benchmarking reports.
2 parents 95f1667 + 7306b35 commit fcf5126

File tree

11 files changed

+141
-19
lines changed

11 files changed

+141
-19
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,26 @@ nextflow run seqeralabs/nf-aggregate \
5656

5757
If you are using a Seqera Platform Enterprise instance that is secured with a private CA SSL certificate not recognized by default Java certificate authorities, you can specify a custom `cacerts` store path through the `--java_truststore_path` parameter and optionally, a password with the `--java_truststore_password`. This certificate will be used to achieve connectivity with your Seqera Platform instance through API and CLI.
5858

59+
### Benchmark reports
60+
61+
If you want to generate a benchmark report comparing multiple runs, you can include a `group` column in your `run_ids.csv` file. This allows you to organize and analyze runs based on custom groupings in the final report.
62+
63+
```
64+
id,workspace,group
65+
3VcLMAI8wyy0Ld,community/showcase,group1
66+
4VLRs7nuqbAhDy,community/showcase,group2
67+
```
68+
69+
To incorporate AWS cost data into the benchmark report, use the benchmark_aws_cur_report parameter. This should point to a valid AWS Cost and Usage Report (CUR) file in Parquet format, supporting both CUR 1.0 and CUR 2.0 schemas. The file can be stored locally or in a cloud bucket. To run nf-aggregate and generate benchmark reports, you can use the following command:
70+
71+
```
72+
nextflow run seqeralabs/nf-aggregate \
73+
--input run_ids.csv \
74+
--outdir ./results \
75+
--run_benchmark \
76+
--benchmark_aws_cur_report ./aws_cost_report.parquet
77+
```
78+
5979
## Output
6080

6181
The results from the pipeline will be published in the path specified by the `--outdir` and will consist of the following contents:

assets/schema_input.json

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"$schema": "http://json-schema.org/draft-07/schema",
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
33
"$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/assets/schema_input.json",
44
"title": "nf-aggregate pipeline - params.input schema",
55
"description": "Schema for the file provided with params.input",
@@ -10,12 +10,20 @@
1010
"id": {
1111
"type": "string",
1212
"pattern": "^[A-Za-z0-9]{9,14}$",
13-
"errorMessage": "Please provide a valid Seqera Platform run identifier"
13+
"errorMessage": "Please provide a valid Seqera Platform run identifier",
14+
"meta": ["id"]
1415
},
1516
"workspace": {
1617
"type": "string",
1718
"pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}/[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
18-
"errorMessage": "Please provide a valid Seqera Platform Workspace name"
19+
"errorMessage": "Please provide a valid Seqera Platform Workspace name",
20+
"meta": ["workspace"]
21+
},
22+
"group": {
23+
"type": "string",
24+
"pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
25+
"errorMessage": "Please provide a valid group name",
26+
"meta": ["group"]
1927
}
2028
},
2129
"required": ["id", "workspace"]
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
process BENCHMARK_REPORT {
2+
3+
container 'cr.seqera.io/scidev/benchmark-reports:sha-b370978'
4+
5+
input:
6+
path run_dumps
7+
val groups
8+
path benchmark_aws_cur_report
9+
10+
output:
11+
path "benchmark_report.html" , emit: benchmark_html
12+
path "versions.yml" , emit: versions
13+
14+
script:
15+
def aws_cost_param = benchmark_aws_cur_report ? "--profile cost -P aws_cost:\$TASK_DIR/${benchmark_aws_cur_report}" : ""
16+
def benchmark_samplesheet = "benchmark_samplesheet.csv"
17+
"""
18+
# Set up R environment from renv
19+
export R_LIBS_USER=/project/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu
20+
TASK_DIR="\$PWD"
21+
22+
# Setup cache directories
23+
export QUARTO_CACHE=/tmp/quarto/cache
24+
export XDG_CACHE_HOME=/tmp/quarto
25+
26+
# Create the benchmark samplesheet csv
27+
echo "group,file_path" > ${benchmark_samplesheet}
28+
${groups.withIndex().collect { group, idx ->
29+
"echo \"${group},\$TASK_DIR/${run_dumps[idx]}\" >> ${benchmark_samplesheet}"
30+
}.join('\n')}
31+
32+
cd /project
33+
quarto render main_benchmark_report.qmd \\
34+
-P log_csv:"\$TASK_DIR/"${benchmark_samplesheet} \\
35+
$aws_cost_param \\
36+
--output-dir .\\
37+
--output benchmark_report.html
38+
39+
cp /project/benchmark_report.html "\$TASK_DIR/"
40+
cd "\$TASK_DIR/"
41+
42+
cat <<-END_VERSIONS > versions.yml
43+
"${task.process}":
44+
r: \$(R --version | head -1 | sed 's/R version \\([0-9.]*\\).*/\\1/')
45+
quarto-cli: \$(quarto --version | head -1 | sed 's/quarto //g')
46+
END_VERSIONS
47+
"""
48+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
process {
2+
withName: 'BENCHMARK_REPORT' {
3+
publishDir = [
4+
path: { "${params.outdir}/${metaOut?.projectName?.replace("/", "_") ?: ""}/benchmark_report" },
5+
mode: params.publish_dir_mode,
6+
saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.json') ? null : filename }
7+
]
8+
containerOptions = "--user root"
9+
}
10+
}

nextflow.config

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ params {
2727
multiqc_logo = null
2828
skip_multiqc = false
2929

30+
// Benchmark report options
31+
generate_benchmark_report = false
32+
benchmark_aws_cur_report = null
33+
3034
// Boilerplate options
3135
outdir = 'results'
3236
publish_dir_mode = 'copy'
@@ -38,7 +42,6 @@ params {
3842
// Schema validation default options
3943
validationFailUnrecognisedParams = false
4044
validationLenientMode = false
41-
validationSchemaIgnoreParams = ''
4245
validationShowHiddenParams = false
4346
validationSkipDuplicateCheck = false
4447
validate_params = true
@@ -52,6 +55,7 @@ process {
5255
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
5356
maxRetries = 1
5457
maxErrors = '-1'
58+
5559
}
5660

5761
profiles {
@@ -172,7 +176,7 @@ singularity.registry = 'quay.io'
172176

173177
// Nextflow plugins
174178
plugins {
175-
id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
179+
id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
176180
}
177181

178182
// Export these variables to prevent local Python/R libraries from conflicting with those in the container

nextflow_schema.json

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"$schema": "http://json-schema.org/draft-07/schema",
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
33
"$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/nextflow_schema.json",
44
"title": "seqeralabs/nf-aggregate pipeline parameters",
55
"description": "Minimal nf-core pipeline compatible with template",
66
"type": "object",
7-
"definitions": {
7+
"$defs": {
88
"input_output_options": {
99
"title": "Input/output options",
1010
"type": "object",
@@ -26,7 +26,8 @@
2626
"type": "string",
2727
"format": "directory-path",
2828
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
29-
"fa_icon": "fas fa-folder-open"
29+
"fa_icon": "fas fa-folder-open",
30+
"default": "results"
3031
}
3132
}
3233
},
@@ -67,6 +68,18 @@
6768
"type": "boolean",
6869
"description": "Skip MultiQC.",
6970
"fa_icon": "fas fa-fast-forward"
71+
},
72+
"generate_benchmark_report": {
73+
"type": "boolean",
74+
"fa_icon": "fas fa-tachometer-alt",
75+
"description": "Compile a benchmarking report for Seqera Platform runs."
76+
},
77+
"benchmark_aws_cur_report": {
78+
"type": "string",
79+
"fa_icon": "fas fa-dollar-sign",
80+
"description": "AWS CUR report from data exports.",
81+
"pattern": "^\\S+\\.parquet",
82+
"format": "file-path"
7083
}
7184
},
7285
"required": ["seqera_api_endpoint"]
@@ -127,7 +140,8 @@
127140
"modules_testdata_base_path": {
128141
"type": "string",
129142
"description": "Base path / URL for data used in the modules",
130-
"hidden": true
143+
"hidden": true,
144+
"default": "s3://ngi-igenomes/testdata/nf-core/modules/"
131145
},
132146
"validate_params": {
133147
"type": "boolean",
@@ -169,13 +183,13 @@
169183
},
170184
"allOf": [
171185
{
172-
"$ref": "#/definitions/input_output_options"
186+
"$ref": "#/$defs/input_output_options"
173187
},
174188
{
175-
"$ref": "#/definitions/pipeline_options"
189+
"$ref": "#/$defs/pipeline_options"
176190
},
177191
{
178-
"$ref": "#/definitions/generic_options"
192+
"$ref": "#/$defs/generic_options"
179193
}
180194
]
181195
}

subworkflows/local/utils_nf_aggregate/main.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import java.nio.file.Paths
1515
include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline/main'
1616
include { getWorkflowVersion } from '../../nf-core/utils_nextflow_pipeline/main'
1717
include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin/main.nf'
18+
include { samplesheetToList } from 'plugin/nf-schema'
1819

1920
/*
2021
========================================================================================
@@ -53,9 +54,8 @@ workflow PIPELINE_INITIALISATION {
5354

5455
// Read in ids from --input file
5556
Channel
56-
.from(file(params.input))
57-
.splitCsv(header:true, sep:',', strip:true)
58-
.unique()
57+
.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
58+
.flatten()
5959
.set { ch_ids }
6060

6161
emit:

subworkflows/nf-core/utils_nfvalidation_plugin/main.nf

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tower.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ reports:
33
display: "MultiQC HTML report"
44
"*_gantt.html":
55
display: "GANTT plot of task execution in a run, grouped by 'instance-id' if available."
6+
benchmark_report.html:
7+
display: "Benchmarking HTML report"

workflows/nf_aggregate/main.nf

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
include { SEQERA_RUNS_DUMP } from '../../modules/local/seqera_runs_dump'
66
include { PLOT_RUN_GANTT } from '../../modules/local/plot_run_gantt'
77
include { MULTIQC } from '../../modules/nf-core/multiqc'
8+
include { BENCHMARK_REPORT } from '../../modules/local/benchmark_report'
89
include { paramsSummaryMultiqc } from '../../subworkflows/local/utils_nf_aggregate'
910
include { getProcessVersions } from '../../subworkflows/local/utils_nf_aggregate'
1011
include { getWorkflowVersions } from '../../subworkflows/local/utils_nf_aggregate'
11-
include { paramsSummaryMap } from 'plugin/nf-validation'
12+
include { paramsSummaryMap } from 'plugin/nf-schema'
1213

1314
workflow NF_AGGREGATE {
1415

@@ -56,6 +57,20 @@ workflow NF_AGGREGATE {
5657
)
5758
ch_versions = ch_versions.mix(PLOT_RUN_GANTT.out.versions.first())
5859

60+
//
61+
// MODULE: Generate benchmark report
62+
//
63+
if (params.generate_benchmark_report) {
64+
aws_cur_report = params.benchmark_aws_cur_report ? Channel.fromPath(params.benchmark_aws_cur_report) : []
65+
66+
BENCHMARK_REPORT (
67+
SEQERA_RUNS_DUMP.out.run_dump.collect{it[1]},
68+
SEQERA_RUNS_DUMP.out.run_dump.collect{it[0].group},
69+
aws_cur_report
70+
)
71+
ch_versions = ch_versions.mix(BENCHMARK_REPORT.out.versions.first())
72+
}
73+
5974
//
6075
// Collate software versions
6176
//

0 commit comments

Comments
 (0)