Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,26 @@ nextflow run seqeralabs/nf-aggregate \

If you are using a Seqera Platform Enterprise instance that is secured with a private CA SSL certificate not recognized by default Java certificate authorities, you can specify a custom `cacerts` store path through the `--java_truststore_path` parameter and optionally, a password with the `--java_truststore_password`. This certificate will be used to achieve connectivity with your Seqera Platform instance through API and CLI.

### Benchmark reports

If you want to generate a benchmark report comparing yours runs, you can include a `group` column in your `run_ids.csv` file. This will be used to group the runs in the report.

```
id,workspace,group
3VcLMAI8wyy0Ld,community/showcase,group1
4VLRs7nuqbAhDy,community/showcase,group2
```

You can also include a `benchmark_aws_cur_report` parameter to include the AWS Cost and Usage Report in the benchmark report. This should be a path to a valid AWS Cost and Usage Report CSV file (locally or in your cloud bucket). To run nf-aggregate with benchmark reports, you can use the following command:

```
nextflow run seqeralabs/nf-aggregate \
--input run_ids.csv \
--outdir ./results \
--run_benchmark \
--benchmark_aws_cur_report ./aws_cost_report.parquet
```

## Output

The results from the pipeline will be published in the path specified by the `--outdir` and will consist of the following contents:
Expand Down
14 changes: 11 additions & 3 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/assets/schema_input.json",
"title": "nf-aggregate pipeline - params.input schema",
"description": "Schema for the file provided with params.input",
Expand All @@ -10,12 +10,20 @@
"id": {
"type": "string",
"pattern": "^[A-Za-z0-9]{9,14}$",
"errorMessage": "Please provide a valid Seqera Platform run identifier"
"errorMessage": "Please provide a valid Seqera Platform run identifier",
"meta": ["id"]
},
"workspace": {
"type": "string",
"pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}/[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
"errorMessage": "Please provide a valid Seqera Platform Workspace name"
"errorMessage": "Please provide a valid Seqera Platform Workspace name",
"meta": ["workspace"]
},
"group": {
"type": "string",
"pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$",
"errorMessage": "Please provide a valid group name",
"meta": ["group"]
}
},
"required": ["id", "workspace"]
Expand Down
50 changes: 50 additions & 0 deletions modules/local/benchmark_report/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process BENCHMARK_REPORT {

container 'cr.seqera.io/scidev/benchmark-reports:sha-7fe0d8e'

input:
path run_dumps
val groups
path benchmark_aws_cur_report

output:
path "benchmark_report.html" , emit: benchmark_html
path "versions.yml" , emit: versions

script:
def aws_cost_param = benchmark_aws_cur_report ? "--profile cost -P aws_cost:\$TASK_DIR/${benchmark_aws_cur_report}" : ""
def benchmark_samplesheet = "benchmark_samplesheet.csv"

"""
# Set up R environment from renv
export R_LIBS_USER=/project/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu
# Store task work directory at beginning
TASK_DIR="\$PWD"

# Create the samplesheet header
echo "group,file_path" > ${benchmark_samplesheet}

# Add each group and file path with full task directory path
${groups.withIndex().collect { group, idx ->
"echo '${group},/project/${run_dumps[idx]}' >> ${benchmark_samplesheet}"
}.join('\n')}

# Copy run dumps to /project directory
cp -r ${run_dumps} /project/

cd /project
quarto render main_benchmark_report.qmd \\
-P log_csv:"\$TASK_DIR/"${benchmark_samplesheet} \\
$aws_cost_param \\
--output-dir .\\
--output benchmark_report.html

cp /project/benchmark_report.html "\$TASK_DIR/"
cd "\$TASK_DIR/"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
quarto-cli: \$(quarto -v)
END_VERSIONS
"""
}
9 changes: 9 additions & 0 deletions modules/local/benchmark_report/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
process {
withName: 'BENCHMARK_REPORT' {
publishDir = [
path: { "${params.outdir}/${metaOut?.projectName?.replace("/", "_") ?: ""}/benchmark_report" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.json') ? null : filename }
]
}
}
9 changes: 7 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ params {
multiqc_logo = null
skip_multiqc = false

// Benchmark report options
run_benchmark = false
benchmark_aws_cur_report = null

// Boilerplate options
outdir = 'results'
publish_dir_mode = 'copy'
Expand All @@ -52,6 +56,7 @@ process {
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'

}

profiles {
Expand Down Expand Up @@ -103,7 +108,7 @@ profiles {
shifter.enabled = false
charliecloud.enabled = false
apptainer.enabled = false
docker.runOptions = '-u $(id -u):$(id -g)'
// docker.runOptions = '-u $(id -u):$(id -g)'
}
arm {
docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
Expand Down Expand Up @@ -172,7 +177,7 @@ singularity.registry = 'quay.io'

// Nextflow plugins
plugins {
id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
Expand Down
25 changes: 19 additions & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/seqeralabs/nf-aggregate/main/nextflow_schema.json",
"title": "seqeralabs/nf-aggregate pipeline parameters",
"description": "Minimal nf-core pipeline compatible with template",
"type": "object",
"definitions": {
"$defs": {
"input_output_options": {
"title": "Input/output options",
"type": "object",
Expand All @@ -26,7 +26,8 @@
"type": "string",
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
"fa_icon": "fas fa-folder-open",
"default": "results"
}
}
},
Expand Down Expand Up @@ -67,6 +68,18 @@
"type": "boolean",
"description": "Skip MultiQC.",
"fa_icon": "fas fa-fast-forward"
},
"run_benchmark": {
"type": "boolean",
"fa_icon": "fas fa-tachometer-alt",
"description": "Compile a benchmarking report for Seqera Platform runs."
},
"benchmark_aws_cur_report": {
"type": "string",
"fa_icon": "fas fa-dollar-sign",
"description": "AWS CUR report from data exports.",
"pattern": "^\\S+\\.parquet",
"format": "file-path"
}
},
"required": ["seqera_api_endpoint"]
Expand Down Expand Up @@ -169,13 +182,13 @@
},
"allOf": [
{
"$ref": "#/definitions/input_output_options"
"$ref": "#/$defs/input_output_options"
},
{
"$ref": "#/definitions/pipeline_options"
"$ref": "#/$defs/pipeline_options"
},
{
"$ref": "#/definitions/generic_options"
"$ref": "#/$defs/generic_options"
}
]
}
6 changes: 3 additions & 3 deletions subworkflows/local/utils_nf_aggregate/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import java.nio.file.Paths
include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline/main'
include { getWorkflowVersion } from '../../nf-core/utils_nextflow_pipeline/main'
include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin/main.nf'
include { samplesheetToList } from 'plugin/nf-schema'

/*
========================================================================================
Expand Down Expand Up @@ -53,9 +54,8 @@ workflow PIPELINE_INITIALISATION {

// Read in ids from --input file
Channel
.from(file(params.input))
.splitCsv(header:true, sep:',', strip:true)
.unique()
.fromList(samplesheetToList(params.input, "assets/schema_input.json"))
.flatten()
.set { ch_ids }

emit:
Expand Down
6 changes: 3 additions & 3 deletions subworkflows/nf-core/utils_nfvalidation_plugin/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions tower.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ reports:
display: "MultiQC HTML report"
"*_gantt.html":
display: "GANTT plot of task execution in a run, grouped by 'instance-id' if available."
benchmark_report.html:
display: "Benchmarking HTML report"
16 changes: 15 additions & 1 deletion workflows/nf_aggregate/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
include { SEQERA_RUNS_DUMP } from '../../modules/local/seqera_runs_dump'
include { PLOT_RUN_GANTT } from '../../modules/local/plot_run_gantt'
include { MULTIQC } from '../../modules/nf-core/multiqc'
include { BENCHMARK_REPORT } from '../../modules/local/benchmark_report'
include { paramsSummaryMultiqc } from '../../subworkflows/local/utils_nf_aggregate'
include { getProcessVersions } from '../../subworkflows/local/utils_nf_aggregate'
include { getWorkflowVersions } from '../../subworkflows/local/utils_nf_aggregate'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMap } from 'plugin/nf-schema'

workflow NF_AGGREGATE {

Expand Down Expand Up @@ -86,6 +87,19 @@ workflow NF_AGGREGATE {
ch_multiqc_report = MULTIQC.out.report
}

//
// MODULE: Generate benchmark report
//
if (params.run_benchmark) {
aws_cur_report = params.benchmark_aws_cur_report ? Channel.fromPath(params.benchmark_aws_cur_report) : []

BENCHMARK_REPORT (
SEQERA_RUNS_DUMP.out.run_dump.collect{it[1]},
SEQERA_RUNS_DUMP.out.run_dump.collect{it[0].group},
aws_cur_report
)
}

emit:
multiqc_report = ch_multiqc_report
versions = ch_versions
Expand Down
1 change: 1 addition & 0 deletions workflows/nf_aggregate/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
includeConfig '../../modules/local/seqera_runs_dump/nextflow.config'
includeConfig '../../modules/local/plot_run_gantt/nextflow.config'
includeConfig '../../modules/nf-core/multiqc/nextflow.config'
includeConfig '../../modules/local/benchmark_report/nextflow.config'
Loading