diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e09..87cc8ef 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,32 @@ +// +// MAIN PRODUCTION DEVCONTAINER CONFIG +// Uses image that is pre-built and pushed to GitHub +// See .github/.devcontainer/devcontainer.json for build +// { - "name": "nfcore", - "image": "nfcore/gitpod:latest", - "remoteUser": "gitpod", - "runArgs": ["--privileged"], - - // Configure tool-specific properties. + "name": "nf-aggregate", + "image": "ghcr.io/nextflow-io/training:latest", + "workspaceFolder": "/workspaces/nf-aggregate", + "remoteUser": "root", + "remoteEnv": { + // Nextflow installation version + "NXF_HOME": "/workspaces/.nextflow", + "NXF_EDGE": "0", + "NXF_VER": "24.10.4", + // Other env vars + "HOST_PROJECT_PATH": "/workspaces/nf-aggregate", + "SHELL": "/bin/bash" // Ush bash + }, + "onCreateCommand": "bash .devcontainer/setup.sh", "customizations": { - // Configure properties specific to VS Code. "vscode": { - // Set *default* container specific settings.json values on container create. + "extensions": ["nf-core.nf-core-extensionpack"], + // Use Python from conda "settings": { "python.defaultInterpreterPath": "/opt/conda/bin/python" }, - - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + // Use bash + "terminal.integrated.defaultProfile.linux": "bash" } } } diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100644 index 0000000..c9b04c2 --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# Fix for Java options +printf 'unset JAVA_TOOL_OPTIONS\n' >> $HOME/.bashrc +unset JAVA_TOOL_OPTIONS + +# Customise the terminal command prompt +printf "export PS1='\\[\\e[3;36m\\]\${PWD#/workspaces/} ->\\[\\e[0m\\] '\n" >> $HOME/.bashrc +export PS1='\[\e[3;36m\]${PWD#/workspaces/} ->\[\e[0m\] ' + +# Force Java environment variables to use conda installation +printf 'export JAVA_HOME=/opt/conda\nexport JAVA_CMD=/opt/conda/bin/java\n' >> $HOME/.bashrc +export JAVA_HOME=/opt/conda +export JAVA_CMD=/opt/conda/bin/java + +# Update Nextflow +nextflow self-update +nextflow -version + +cat /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.vscode/settings.json b/.vscode/settings.json index a33b527..af2d783 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "markdown.styles": ["public/vscode_markdown.css"] + "markdown.styles": ["public/vscode_markdown.css"], + "nextflow.telemetry.enabled": false } diff --git a/CHANGELOG.md b/CHANGELOG.md index ccdc68b..a4ce06b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## 0.6.0 +## 0.7.0 + +Special thanks to the following for their contributions to the release: + +- [Edmund Miller](https://github.com/edmundmiller) +- [Florian Wuennemann](https://github.com/FloWuenne) +- [Maxime Garcia](https://github.com/maxulysse) + +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. + +### Enhancements & fixes + +- [PR #88](https://github.com/seqeralabs/nf-aggregate/pull/88) - Update tw cli container version to 0.11.2 and allow .nextflow.log to be missing from tw call +- [PR #89](https://github.com/seqeralabs/nf-aggregate/pull/89) - Enable usage of external run dumps with nf-aggregate & update devcontainer specifications + +### Software dependencies + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `tower-cli` | 0.9.2 | 0.11.2 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[0.6.0](https://github.com/seqeralabs/nf-aggregate/releases/tag/0.6.0] - 2024-03-31) ### Credits diff --git a/README.md b/README.md index 2e2158e..08f8862 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,21 @@ id,workspace,group 4VLRs7nuqbAhDy,community/showcase,group2 ``` +## Use logs from an external Seqera Platform deployment + +Sometimes we want to compile benchmark reports from runs from two different Seqera platform deployments, for example a dev and a production environment to compare performance. External logs in nf-aggregate can be used by specifying the workspace as `external` and providing some additional optional columns that point to the log folder and specify whether these external logs contain fusion logs (did you export them with the `--add-fusion-logs` flag in your `tw run dumps`. If they do contain fusion logs, you can generate a gannt plot for them, as for runs supplied only via id.) + +Here is an example of using a mix of run ids for which we want to extract logs from our platform deployment and some run logs from another deployment we want to compare. In the example below, `1JI5B1avuj3o58` is a run that contains fusion logs, while `1vsww7GjKBsVNa` does not contain fusion logs. + +``` +id,workspace,group,logs,fusion +3VcLMAI8wyy0Ld,community/showcase,group1, +1JI5B1avuj3o58,external,group2,/path/to/my/run_dumps_tarball.tar.gz,true +1vsww7GjKBsVNa,external,group2,/path/to/my/run_dumps_folder,false +``` + +## Incorporate AWS split cost allocation data + To incorporate AWS cost data into the benchmark report, use the `benchmark_aws_cur_report` parameter. This should point to a valid AWS Cost and Usage Report (CUR) file in Parquet format, currently only supporting CUR 1.0. The file can be stored locally or in a cloud bucket. To run nf-aggregate and generate benchmark reports, you can use the following command: diff --git a/assets/schema_input.json b/assets/schema_input.json index 71224b4..d470d43 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -15,8 +15,8 @@ }, "workspace": { "type": "string", - "pattern": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}/[a-zA-Z0-9](?:[a-zA-Z0-9]|[-_](?=[a-zA-Z0-9])){1,38}$", - "errorMessage": "Please provide a valid Seqera Platform Workspace name", + "pattern": "^(?:external|[a-zA-Z0-9][a-zA-Z0-9-_]{0,38}/[a-zA-Z0-9][a-zA-Z0-9-_]{0,38})$", + "errorMessage": "Please provide a valid Seqera Platform Workspace name or 'external'", "meta": ["workspace"] }, "group": { @@ -24,6 +24,17 @@ "pattern": "^[a-zA-Z0-9][-a-zA-Z0-9_ ]{0,37}$", "errorMessage": "Please provide a valid group name", "meta": ["group"] + }, + "logs": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+$", + "errorMessage": "Please provide a valid file path to your Seqera Platform logs.", + "meta": ["logs"] + }, + "fusion": { + "type": "boolean", + "meta": ["fusion"] } }, "required": ["id", "workspace"] diff --git a/nextflow.config b/nextflow.config index f14bfca..0a1b329 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,6 +62,12 @@ profiles { test { params.input = "${projectDir}/workflows/nf_aggregate/assets/test_run_ids.csv" } + test_benchmark { + params.input = "${projectDir}/workflows/nf_aggregate/assets/test_benchmark.csv" + params.generate_benchmark_report = true + params.skip_run_gantt = true + params.skip_multiqc = true + } test_full { params.input = "${projectDir}/workflows/nf_aggregate/assets/test_run_ids.csv" } @@ -320,7 +326,7 @@ manifest { // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.2.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { diff --git a/subworkflows/local/utils_nf_aggregate/tests/main.nf.test.snap b/subworkflows/local/utils_nf_aggregate/tests/main.nf.test.snap index dd52c0d..aabb3a7 100644 --- a/subworkflows/local/utils_nf_aggregate/tests/main.nf.test.snap +++ b/subworkflows/local/utils_nf_aggregate/tests/main.nf.test.snap @@ -3,46 +3,76 @@ "content": [ [ { + "fusion": [ + + ], "group": [ ], "id": "2lXd1j7OwZVfxh", + "logs": [ + + ], "workspace": "community/showcase" }, { + "fusion": [ + + ], "group": [ ], "id": "38QXz4OfQDpwOV", + "logs": [ + + ], "workspace": "community/showcase" }, { + "fusion": [ + + ], "group": [ ], "id": "3iFMo0NtH1Byvy", + "logs": [ + + ], "workspace": "community/showcase" }, { + "fusion": [ + + ], "group": [ ], "id": "4Bi5xBK6E2Nbhj", + "logs": [ + + ], "workspace": "community/showcase" }, { + "fusion": [ + + ], "group": [ ], "id": "4LWT4uaXDaGcDY", + "logs": [ + + ], "workspace": "community/showcase" } ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.01.0" + "nextflow": "24.10.4" }, - "timestamp": "2025-02-26T16:10:24.54507716" + "timestamp": "2025-04-19T22:12:59.208411" } } \ No newline at end of file diff --git a/tests/.nftignore b/tests/.nftignore index 5b450ea..4e875f1 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -10,3 +10,4 @@ multiqc/multiqc_report.html nf-core_*/gantt/*_gantt.html pipeline_info/*.{html,json,txt,yml} **/runs_dump/**/service-info.json +benchmark_report/benchmark_report.html diff --git a/tests/main.nf.test b/tests/main.nf.test index a7c8da5..8ad3db3 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -13,6 +13,39 @@ nextflow_pipeline { } } + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/collated_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("-profile test_benchmark") { + + when { + params { + input = "${projectDir}/workflows/nf_aggregate/assets/test_benchmark.csv" + outdir = "$outputDir" + generate_benchmark_report = true + skip_run_gantt = true + skip_multiqc = true + } + } + then { // stable_name: All files + folders in ${params.outdir}/ with a stable name def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap index 149f94a..1cd5321 100644 --- a/tests/main.nf.test.snap +++ b/tests/main.nf.test.snap @@ -149,8 +149,36 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.02.3" + "nextflow": "24.10.4" }, - "timestamp": "2025-04-14T16:39:50.36237" + "timestamp": "2025-04-30T02:04:58.464733" + }, + "-profile test_benchmark": { + "content": [ + 1, + { + "Workflow": { + "seqeralabs/nf-aggregate": "0.6.0" + }, + "BENCHMARK_REPORT": { + "r": "4.4.2", + "quarto-cli": "1.5.55" + } + }, + [ + "benchmark_report", + "benchmark_report/benchmark_report.html", + "pipeline_info", + "pipeline_info/collated_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-04-30T02:09:18.441826" } -} \ No newline at end of file +} diff --git a/workflows/nf_aggregate/assets/logs/2VeeQznQdp3rxo.tar.gz b/workflows/nf_aggregate/assets/logs/2VeeQznQdp3rxo.tar.gz new file mode 100644 index 0000000..5b31955 Binary files /dev/null and b/workflows/nf_aggregate/assets/logs/2VeeQznQdp3rxo.tar.gz differ diff --git a/workflows/nf_aggregate/assets/logs/3qLktusgg2GQxJ.tar.gz b/workflows/nf_aggregate/assets/logs/3qLktusgg2GQxJ.tar.gz new file mode 100644 index 0000000..ad21f3f Binary files /dev/null and b/workflows/nf_aggregate/assets/logs/3qLktusgg2GQxJ.tar.gz differ diff --git a/workflows/nf_aggregate/assets/test_benchmark.csv b/workflows/nf_aggregate/assets/test_benchmark.csv new file mode 100644 index 0000000..1acd2bc --- /dev/null +++ b/workflows/nf_aggregate/assets/test_benchmark.csv @@ -0,0 +1,3 @@ +id,workspace,group,logs,fusion +3qLktusgg2GQxJ,external,test1,/workflows/nf_aggregate/assets/logs/3qLktusgg2GQxJ.tar.gz,true +2VeeQznQdp3rxo,external,test2,/workflows/nf_aggregate/assets/logs/2VeeQznQdp3rxo.tar.gz,true diff --git a/workflows/nf_aggregate/main.nf b/workflows/nf_aggregate/main.nf index ea75eee..57d6cfe 100644 --- a/workflows/nf_aggregate/main.nf +++ b/workflows/nf_aggregate/main.nf @@ -24,6 +24,19 @@ workflow NF_AGGREGATE { main: + // Split ids into runs to fetch logs from platform deployment and runs provided externally + ids + .branch {meta -> + external: meta.workspace == 'external' + if (meta.logs =~ /workflows\/nf_aggregate\/assets\/logs\//) { + return [meta, projectDir + meta.logs] + } else { + return [meta, meta.logs] + } + fetch_run_dumps: true + } + .set { ids_split } + ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() @@ -32,36 +45,42 @@ workflow NF_AGGREGATE { // SEQERA_RUNS_DUMP( - ids, + ids_split.fetch_run_dumps, seqera_api_endpoint, java_truststore_path ?: '', java_truststore_password ?: '', ) ch_versions = ch_versions.mix(SEQERA_RUNS_DUMP.out.versions) + // Merge run dumps with external runs + SEQERA_RUNS_DUMP.out.run_dump + .mix(ids_split.external) + .set{ ch_all_runs } + // // MODULE: Generate Gantt chart for workflow execution // - SEQERA_RUNS_DUMP.out.run_dump - .filter { meta, _run_dir -> - meta.fusion && !skip_run_gantt - } - .set { ch_runs_for_gantt } + if(!params.skip_run_gantt){ + ch_all_runs + .filter { meta, _run_dir -> meta.fusion} + .set { ch_runs_for_gantt } - PLOT_RUN_GANTT( - ch_runs_for_gantt - ) - ch_versions = ch_versions.mix(PLOT_RUN_GANTT.out.versions) + PLOT_RUN_GANTT( + ch_runs_for_gantt + ) + ch_versions = ch_versions.mix(PLOT_RUN_GANTT.out.versions) + } // // MODULE: Generate benchmark report // if (params.generate_benchmark_report) { + // Check if cur report is specified aws_cur_report = params.benchmark_aws_cur_report ? Channel.fromPath(params.benchmark_aws_cur_report) : [] BENCHMARK_REPORT( - SEQERA_RUNS_DUMP.out.run_dump.collect { it[1] }, - SEQERA_RUNS_DUMP.out.run_dump.collect { it[0].group }, + ch_all_runs.collect { it[1] }, + ch_all_runs.collect { it[0].group }, aws_cur_report, params.remove_failed_tasks, ) @@ -88,7 +107,7 @@ workflow NF_AGGREGATE { ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(SEQERA_RUNS_DUMP.out.run_dump.collect { it[1] }) + ch_multiqc_files = ch_multiqc_files.mix(ch_all_runs.collect { it[1] }) MULTIQC( ch_multiqc_files.collect(),