Merge pull request #314 from NationalGenomicsInfrastructure/monthly

ssjunnebo · web-flow · commit 23fe4720860d · 2025-02-04T13:53:14.000+01:00
Monthly deployment
diff --git a/env_vars/site_sthlm_env_all.yml b/env_vars/site_sthlm_env_all.yml
@@ -25,6 +25,7 @@ multiqc_options:
   save_remote: True
   template: ngi
   output_fn_name: multiqc_report.html
+  preserve_module_raw_data: True
 
 multiqc_swedac_accredited: True
 multiqc_sshkey: /path/to/ssh_key
diff --git a/host_vars/deploy/main.yml b/host_vars/deploy/main.yml
@@ -50,7 +50,7 @@ sarek_dest: "{{ sw_path }}/sarek/{{ sarek_tag | replace('.', '_') }}"
 rnaseq_tag: "3.8.1"
 rnaseq_dest: "{{ sw_path }}/rnaseq/{{ rnaseq_tag | replace('.', '_') }}"
 
-demultiplex_tag: "1.4.1"
+demultiplex_tag: "1.5.4"
 
 # File with tools/software version in the deployed env
 deployed_tool_versions: "{{ ngi_resources }}/deployed_tools.{{ site }}.version"
diff --git a/roles/arteria-sequencing-report-ws/defaults/main.yml b/roles/arteria-sequencing-report-ws/defaults/main.yml
@@ -1,7 +1,7 @@
 ---
 
 seqreport_service_repo: https://github.com/arteria-project/sequencing-report-service.git
-seqreport_service_version: v1.5.1-rc2
+seqreport_service_version: v1.5.2
 
 arteria_service_name: arteria-sequencing-report-ws
 arteria_sequencing_report_wrapper: "{{ arteria_service_config_root }}/arteria_sequencing_report_wrapper.sh"
diff --git a/roles/arteria-sequencing-report-ws/templates/nextflow_configs/demultiplex.config.j2 b/roles/arteria-sequencing-report-ws/templates/nextflow_configs/demultiplex.config.j2
@@ -10,8 +10,16 @@
 ----------------------------------------------------------------------------------------
 */
 
+cleanup = true
+
 plugins {
-    id 'nf-validation@{{ nf_validation_version }}'
+    id 'nf-schema@{{ nf_schema_version }}'
+}
+
+profiles {
+    uppmax {
+        includeConfig "${projectDir}/../configs/conf/uppmax.config"
+    }
 }
 
 process {
@@ -26,19 +34,21 @@ process {
         publishDir = [
             [
                 path: { "${params.outdir}/Unaligned/" },
-                pattern: "**_S[1-9]*_*.fastq.gz",
+                pattern: "output/**_S[1-9]*_*.fastq.gz",
                 mode: "link",
+                saveAs: { filename -> filename.minus("output/") }
             ],
             [
                 path: { "${params.outdir}/Unaligned/" },
-                pattern: "Undetermined_S0_*.fastq.gz",
+                pattern: "output/**Undetermined_S0_*.fastq.gz",
                 mode: "link",
+                saveAs: { filename -> filename.minus("output/") }
             ],
             [
                 // Gather and write Reports and Stats
                 path: { "${params.outdir}/Unaligned/" },
                 mode: "link",
-                pattern: "Stats",
+                pattern: "output/{Stats,Reports}",
                 saveAs: {filename -> filename.split("/")[-1] }
             ],
             [
@@ -49,5 +59,4 @@ process {
             ],
         ]
     }
-
 }
diff --git a/roles/arteria-sequencing-report-ws/templates/pipeline_configs/demultiplex.yml.j2 b/roles/arteria-sequencing-report-ws/templates/pipeline_configs/demultiplex.yml.j2
@@ -20,7 +20,7 @@ pipeline_parameters:
   outdir: "{runfolder_path}"
   project: {{uppmax_project}}
   demultiplexer: "bcl2fastq"
-  skip_tools: "fastp,falco,multiqc,md5sum"
+  skip_tools: "samshee,checkqc,fastp,falco,md5sum,kraken,multiqc"
 input_samplesheet_content: |
   id,samplesheet,lane,flowcell
   {runfolder_name},{runfolder_path}/SampleSheet.csv,,{runfolder_path}
diff --git a/roles/multiqc/defaults/main.yml b/roles/multiqc/defaults/main.yml
@@ -1,6 +1,6 @@
 multiqc_repo: https://github.com/MultiQC/MultiQC.git
 multiqc_dest: "{{ sw_path }}/multiqc"
-multiqc_version: "v1.25.2"
+multiqc_version: "v1.26"
 
 multiqc_ngi_repo: https://github.com/NationalGenomicsInfrastructure/MultiQC_NGI.git
 multiqc_ngi_dest: "{{ sw_path }}/multiqc_ngi"
diff --git a/roles/nextflow/defaults/main.yml b/roles/nextflow/defaults/main.yml
@@ -1,8 +1,8 @@
 java_home: /sw/comp/java/x86_64/OracleJDK_11.0.9
 nextflow_java: "{{ java_home }}"
-nextflow_version_tag: 24.04.1
+nextflow_version_tag: 24.04.2
 nextflow_download_url: https://github.com/nextflow-io/nextflow/releases/download/v{{ nextflow_version_tag }}/nextflow
-nf_validation_version: 1.1.2
+nf_schema_version: 2.1.1
 nextflow_local_env:
   NXF_HOME: "{{ nextflow_dest }}/workfiles"
   NXF_OPTS: -Xms1g -Xmx3500m
@@ -21,8 +21,10 @@ nextflow_env:
   PATH: "{{ tools_path.PATH }}"
 nextflow_plugins:
   - name: nf-validation
-    version: "{{ nf_validation_version }}"
+    version: 1.1.2
   - name: nf-prov
     version: 1.2.1
   - name: nf-tower
     version: 1.6.3
+  - name: nf-schema # Will replace nf-validation once we update all our pipelines
+    version: "{{ nf_schema_version }}"
diff --git a/roles/nextflow/templates/nextflow_miarka_site.config.j2 b/roles/nextflow/templates/nextflow_miarka_site.config.j2
@@ -1,9 +1,5 @@
 
-{% if site == "upps" %}
-cleanup = false
-{% else %}
 cleanup = true
-{% endif %}
 
 params {
 
diff --git a/roles/nf-core/defaults/main.yml b/roles/nf-core/defaults/main.yml
@@ -73,6 +73,7 @@ nf_core_delivery_readmes:
     - DELIVERY.README.SAREK.txt
     - DELIVERY.README.SAREK.BATCH.txt
     - DELIVERY.README.SAREK.WES.md
+    - DELIVERY.README.SAREK.md
     - apply_recalibration.sh
   rnaseq:
     - DELIVERY.README.RNASEQ.md
@@ -89,4 +90,4 @@ igenomes:
     build: GRCh38
     type: gatk
 
-ncbi_eutils_url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils
+ncbi_eutils_url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils
diff --git a/roles/nf-core/templates/DELIVERY.README.SAREK.WES.md.j2 b/roles/nf-core/templates/DELIVERY.README.SAREK.WES.md.j2
@@ -1,109 +1,69 @@
 # DELIVERY OF RESULTS FROM EXOME ANALYSIS WITH SAREK
 
 ## Analysis
-Samples were analysed with the Sarek pipeline release {{ release }}. In short, the pipeline does the following:
-Reads from fastq-files were mapped to a reference genome using BWA.
-Bam-files were de-duplicated with GATK MarkDuplicates.
-Base quality score recalibration tables were created with GATK BaseRecalibrator. 
-The tables were then used in GATK ApplyBQSR to create recalibrated bam-files.
-SNVs and small indels were called with GATK HaplotypeCaller.
-Variants were annoted with SnpEff.
+Samples were analysed with the Sarek pipeline release {{ release }}. 
 
-For details on the pipeline, folder structure and how to interpret results, please refer to the Sarek documentation:
-https://nf-co.re/sarek/{{ release }}
+The workflow processes raw data from FastQ inputs, aligns the reads, mark duplicates and performs base recalibration.
+SNVs and small indels are called with GATK HaplotypeCaller and DeepVariant. SnpEff annotated calls are reported in 
+separate vcf files for each caller as well as concatenated vcf files with the combined result.
+In addition to Sarek pipeline analysis, target region coverage was evaluated with Picard CollectHsMetrics.
 
-After running the pipeline, Picard CollectHsMetrics was used to evaluate the coverage
+For information regarding the pipeline, folder structure and how to interpret results, please refer to the Sarek documentation:
+[https://nf-co.re/sarek/{{ release }}](https://nf-co.re/sarek/{{ release }})
 
-## Delivery structure, directories and files:
+Detailed information about standard outputs from the pipeline can be found [here](https://nf-co.re/sarek/{{ release }}/output).
 
-```
+The directory also contains the file checksums.md5, which should be used to verify the integrity of the files after transfer.
 
-├── Annotation
-│   ├── <sample1 name>
-│   │   └── snpEff
-│   └── <sample2 name>
-│       └── snpEff
-├── DELIVERY.README.SAREK.WES.md
-├── pipeline_info
-│   ├── results_description.html
-│   └── software_versions.csv
-├── Preprocessing
-│   ├── TSV
-│   │   ├── duplicates_marked_no_table.tsv
-│   │   ├── duplicates_marked_no_table_<sample1 name>.tsv
-│   │   ├── duplicates_marked_no_table_<sample2 name>.tsv
-│   │   ├── duplicates_marked.tsv
-│   │   ├── duplicates_marked_<sample1 name>.tsv
-│   │   └── duplicates_marked_<sample2 name>.tsv
-│   ├── <sample1 name>
-│   │   └── DuplicatesMarked
-│   │       ├── <sample1 name>.md.bam
-│   │       ├── <sample1 name>.md.bam.bai
-│   │       └── <sample1 name>.recal.table
-│   └── <sample2 name>
-│       └── DuplicatesMarked
-│           ├── <sample2 name>.md.bam
-│           ├── <sample2 name>.md.bam.bai
-│           └── <sample2 name>.recal.table
-├── Reports
-│   ├── SequenceQC
-│   │   ├── <runfolder 1>
-│   │   │   ├── <runfolder 1>_<project>_multiqc_report_data.zip
-│   │   │   └── <runfolder 1>_<project>_multiqc_report.html
-│   │   └── <runfolder 2>
-│   │       ├── <runfolder 2>_<project>_multiqc_report_data.zip
-│   │       └── <runfolder 2>_<project>_multiqc_report.html
-│   ├── MultiQC
-│   │   ├── <project>_multiqc_report_data.zip
-│   │   └── <project>_multiqc_report.html
-│   ├── <sample1 name>
-│   │   ├── bamQC
-│   │   ├── BCFToolsStats
-│   │   ├── FastQC
-│   │   ├── HsMetrics
-│   │   ├── MarkDuplicates
-│   │   ├── SamToolsStats
-│   │   ├── snpEff
-│   │   └── VCFTools
-│   └── <sample2 name>
-│       ├── bamQC
-│       ├── BCFToolsStats
-│       ├── FastQC
-│       ├── HsMetrics
-│       ├── MarkDuplicates
-│       ├── SamToolsStats
-│       ├── snpEff
-│       └── VCFTools
-├── Resources
-│   └── apply_recalibration.sh
-├── <sample1 name>.lst
-├── <sample1 name>.md5
-├── <sample2 name>.lst
-├── <sample2 name>.md5
-└── VariantCalling
-    ├── <sample1 name>
-    │   ├── HaplotypeCaller
-    │   └── HaplotypeCallerGVCF
-    └── <sample2 name>
-        ├── HaplotypeCaller
-        └── HaplotypeCallerGVCF
+
+## Delivery structure
 
 ```
+├── checksums.md5
+├── DELIVERY.README.SAREK.WES.md
+├── results
+    ├── add
+    ├── annotation
+    │   ├── deepvariant
+    │   └── haplotypecaller
+    ├── csv
+    ├── multiqc
+    ├── pipeline_info
+    ├── preprocessing
+    │   ├── fastp
+    │   ├── recalibrated
+    │   └── recal_table
+    ├── reference
+    │   └── intervals
+    ├── reports
+    │   ├── bcftools
+    │   ├── fastp
+    │   ├── fastqc
+    │   ├── HsMetrics
+    │   ├── markduplicates
+    │   ├── mosdepth
+    │   ├── samtools
+    │   ├── snpeff
+    │   └── vcftools
+    ├── tabix
+    └── variant_calling
+        ├── concat
+        ├── deepvariant
+        └── haplotypecaller
 
-## FASTQ files
+```
 
-FASTQ files are not included in the delivery, but can be regenerated from the BAM files.
-We recommend using https://github.com/qbic-pipelines/bamtofastq, refer to its documentation for usage.
 
 ## Known issues
-
 - Twist bait intervals are not publicly available and therefore, when running CollectHsMetrics (Picard), the target intervals are used to specify both target and bait. 
 This will lead to some incorrect entries in the HsMetrics table in the MultiQC-report, i.e. entries regarding baits should be neglected.
 
-## Additional information 
 
-- The original target file used for the analysis can be found here https://www.twistbioscience.com/resources/bed-file/twist-human-comprehensive-exome-panel-bed-files
-Note that each region in this file was padded with 100 bp upstream and downstream before submitting it to the pipeline. 
+## Additional information 
+- The original target file used for the analysis can be found [here](https://www.twistbioscience.com/resources/data-files/comprehensive-exome-bed-files)
+Note that each region in this file was padded with 100 bp upstream and downstream before submitting it to the pipeline (available in results/reference/intervals). 
 - Note that samples that are sequenced on more than one flowcell/lane will be suffixed accordingly for some modules in the MultiQC report.
-A sample that has been sequenced twice will for some metrics be presented as a joint vaule for <sample name>, and with one value per run, i.e. <sample name>_1 and <sample_name>_2. 
-- To apply the recalibrations table to the deduplicated .bam-files use the script Resources/apply_recalibration.sh
+A sample that has been sequenced twice will for some metrics be presented as a joint vaule for <sample name>, and with one value per run, i.e. <sample name>_1 and <sample_name>_2.
+- Output from GATK MarkDuplicates have been removed from the results folder.
+Duplicate marked cram files can be requested up to 60 days after delivery.
+
diff --git a/roles/nf-core/templates/DELIVERY.README.SAREK.md.j2 b/roles/nf-core/templates/DELIVERY.README.SAREK.md.j2
@@ -0,0 +1,63 @@
+# DELIVERY OF RESULTS FROM WGS ANALYSIS WITH SAREK
+
+## Analysis
+Samples were analysed with the Sarek pipeline release {{ release }}. 
+
+The workflow processes raw data from FastQ inputs, aligns the reads, mark duplicates and performs base recalibration.
+SNVs and small indels are called with GATK HaplotypeCaller and DeepVariant. SnpEff annotated calls are reported in 
+separate vcf files for each caller as well as concatenated vcf files with the combined result.
+
+For information regarding the pipeline, folder structure and how to interpret results, please refer to the Sarek documentation:
+[https://nf-co.re/sarek/{{ release }}](https://nf-co.re/sarek/{{ release }})
+
+Detailed information about standard outputs from the pipeline can be found [here](https://nf-co.re/sarek/{{ release }}/output).
+
+The directory also contains the file checksums.md5, which should be used to verify the integrity of the files after transfer.
+
+
+## Delivery structure
+
+```
+├── checksums.md5
+├── DELIVERY.README.SAREK.md
+├── results
+    ├── add
+    ├── annotation
+    │   ├── deepvariant
+    │   └── haplotypecaller
+    ├── csv
+    ├── multiqc
+    ├── pipeline_info
+    ├── preprocessing
+    │   ├── fastp
+    │   ├── recalibrated
+    │   └── recal_table
+    ├── reference
+    │   └── intervals
+    ├── reports
+    │   ├── bcftools
+    │   ├── fastp
+    │   ├── fastqc
+    │   ├── markduplicates
+    │   ├── mosdepth
+    │   ├── samtools
+    │   ├── snpeff
+    │   └── vcftools
+    ├── tabix
+    └── variant_calling
+        ├── concat
+        ├── deepvariant
+        └── haplotypecaller
+
+```
+
+
+## Known issues
+
+
+## Additional information 
+- Note that samples that are sequenced on more than one flowcell/lane will be suffixed accordingly for some modules in the MultiQC report.
+A sample that has been sequenced twice will for some metrics be presented as a joint vaule for <sample name>, and with one value per run, i.e. <sample name>_1 and <sample_name>_2.
+- Output from GATK MarkDuplicates have been removed from the results folder.
+Duplicate marked cram files can be requested up to 60 days after delivery.
+
diff --git a/roles/ngi_pipeline/defaults/main.yml b/roles/ngi_pipeline/defaults/main.yml
@@ -1,7 +1,7 @@
 ---
 ngi_pipeline_repo: https://github.com/NationalGenomicsInfrastructure/ngi_pipeline.git
 ngi_pipeline_dest: "{{ sw_path }}/ngi_pipeline"
-ngi_pipeline_version: 084be9d502a63626dd5e3a2ca0f04d0a82055d2d
+ngi_pipeline_version: 63c6bf8e9f54c4b699f7299ffa6b7048be32b8d4
 
 ngi_pipeline_log: "{{ ngi_log_path }}/ngi_pipeline.log"
 ngi_pipeline_db_path: "{{ ngi_pipeline_path }}/db"
diff --git a/roles/taca/defaults/main.yml b/roles/taca/defaults/main.yml
@@ -1,15 +1,15 @@
 ---
 taca_ngi_repo: https://github.com/NationalGenomicsInfrastructure/taca-ngi-pipeline.git
 taca_ngi_dest: "{{ sw_path }}/taca-ngi-pipeline"
-taca_ngi_version: ac821fc87e616cc27abd154fa92f9b52164b306c
+taca_ngi_version: 6715a4b4f26bd4a88fd2a04a04a2feb1937a9d76
 
 flowcell_parser_repo: https://github.com/NationalGenomicsInfrastructure/flowcell_parser.git
 flowcell_parser_dest: "{{ sw_path }}/flowcell_parser"
-flowcell_parser_version: cfe18abb6b758a6389a9d6aa75f1866d517dc36e
+flowcell_parser_version: 0d0b8e781daf71d782b95201c4ec2d3c5ef02fb7
 
 taca_repo: https://github.com/NationalGenomicsInfrastructure/TACA.git
 taca_dest: "{{ sw_path }}/TACA"
-taca_version: 43b763b44be324c9d8ef6841d9c56e2a85cd2d62
+taca_version: 3c6e004509cceb49f0fc1a8c3dc816862b90c8a7
 
 ngi_pipeline_analysisdir: "{{ ngi_pipeline_workdir }}/ANALYSIS"
 ngi_pipeline_datadir: "{{ ngi_pipeline_workdir }}/DATA"
diff --git a/roles/taca/tasks/main.yml b/roles/taca/tasks/main.yml
@@ -44,6 +44,7 @@
     - { config_tpl: "taca_cleanup", config_dst: "taca_cleanup", item_type: cleanup, config_site: ["sthlm"] }
     - { config_tpl: "app_specific_delivery", config_dst: "taca_fastq_delivery", item_type: fastq, config_site: ["sthlm"] }
     - { config_tpl: "taca_runfolder_delivery", config_dst: "taca_runfolder_delivery", item_type: runfolder, config_site: ["sthlm"] }
+    - { config_tpl: "taca_organise_and_deliver", config_dst: "taca_organise_and_deliver", config_site: ["sthlm"] }
     - { config_tpl: "app_specific_delivery", config_dst: "taca_rna_delivery", item_type: rna, config_site: ["sthlm"] }
     - { config_tpl: "app_specific_delivery", config_dst: "taca_denovo_delivery", item_type: denovo, config_site: ["sthlm"] }
 
diff --git a/roles/taca/templates/site_taca_organise_and_deliver.yml.j2 b/roles/taca/templates/site_taca_organise_and_deliver.yml.j2
diff --git a/roles/tarzan/templates/kong.yml.j2 b/roles/tarzan/templates/kong.yml.j2