Skip to content

Commit 23fe472

Browse files
authored
Merge pull request #314 from NationalGenomicsInfrastructure/monthly
Monthly deployment
2 parents 0bd4e2d + 54443f1 commit 23fe472

File tree

16 files changed

+162
-111
lines changed

16 files changed

+162
-111
lines changed

env_vars/site_sthlm_env_all.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ multiqc_options:
2525
save_remote: True
2626
template: ngi
2727
output_fn_name: multiqc_report.html
28+
preserve_module_raw_data: True
2829

2930
multiqc_swedac_accredited: True
3031
multiqc_sshkey: /path/to/ssh_key

host_vars/deploy/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ sarek_dest: "{{ sw_path }}/sarek/{{ sarek_tag | replace('.', '_') }}"
5050
rnaseq_tag: "3.8.1"
5151
rnaseq_dest: "{{ sw_path }}/rnaseq/{{ rnaseq_tag | replace('.', '_') }}"
5252

53-
demultiplex_tag: "1.4.1"
53+
demultiplex_tag: "1.5.4"
5454

5555
# File with tools/software version in the deployed env
5656
deployed_tool_versions: "{{ ngi_resources }}/deployed_tools.{{ site }}.version"

roles/arteria-sequencing-report-ws/defaults/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22

33
seqreport_service_repo: https://github.com/arteria-project/sequencing-report-service.git
4-
seqreport_service_version: v1.5.1-rc2
4+
seqreport_service_version: v1.5.2
55

66
arteria_service_name: arteria-sequencing-report-ws
77
arteria_sequencing_report_wrapper: "{{ arteria_service_config_root }}/arteria_sequencing_report_wrapper.sh"

roles/arteria-sequencing-report-ws/templates/nextflow_configs/demultiplex.config.j2

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,16 @@
1010
----------------------------------------------------------------------------------------
1111
*/
1212

13+
cleanup = true
14+
1315
plugins {
14-
id 'nf-validation@{{ nf_validation_version }}'
16+
id 'nf-schema@{{ nf_schema_version }}'
17+
}
18+
19+
profiles {
20+
uppmax {
21+
includeConfig "${projectDir}/../configs/conf/uppmax.config"
22+
}
1523
}
1624

1725
process {
@@ -26,19 +34,21 @@ process {
2634
publishDir = [
2735
[
2836
path: { "${params.outdir}/Unaligned/" },
29-
pattern: "**_S[1-9]*_*.fastq.gz",
37+
pattern: "output/**_S[1-9]*_*.fastq.gz",
3038
mode: "link",
39+
saveAs: { filename -> filename.minus("output/") }
3140
],
3241
[
3342
path: { "${params.outdir}/Unaligned/" },
34-
pattern: "Undetermined_S0_*.fastq.gz",
43+
pattern: "output/**Undetermined_S0_*.fastq.gz",
3544
mode: "link",
45+
saveAs: { filename -> filename.minus("output/") }
3646
],
3747
[
3848
// Gather and write Reports and Stats
3949
path: { "${params.outdir}/Unaligned/" },
4050
mode: "link",
41-
pattern: "Stats",
51+
pattern: "output/{Stats,Reports}",
4252
saveAs: {filename -> filename.split("/")[-1] }
4353
],
4454
[
@@ -49,5 +59,4 @@ process {
4959
],
5060
]
5161
}
52-
5362
}

roles/arteria-sequencing-report-ws/templates/pipeline_configs/demultiplex.yml.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pipeline_parameters:
2020
outdir: "{runfolder_path}"
2121
project: {{uppmax_project}}
2222
demultiplexer: "bcl2fastq"
23-
skip_tools: "fastp,falco,multiqc,md5sum"
23+
skip_tools: "samshee,checkqc,fastp,falco,md5sum,kraken,multiqc"
2424
input_samplesheet_content: |
2525
id,samplesheet,lane,flowcell
2626
{runfolder_name},{runfolder_path}/SampleSheet.csv,,{runfolder_path}

roles/multiqc/defaults/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
multiqc_repo: https://github.com/MultiQC/MultiQC.git
22
multiqc_dest: "{{ sw_path }}/multiqc"
3-
multiqc_version: "v1.25.2"
3+
multiqc_version: "v1.26"
44

55
multiqc_ngi_repo: https://github.com/NationalGenomicsInfrastructure/MultiQC_NGI.git
66
multiqc_ngi_dest: "{{ sw_path }}/multiqc_ngi"

roles/nextflow/defaults/main.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
java_home: /sw/comp/java/x86_64/OracleJDK_11.0.9
22
nextflow_java: "{{ java_home }}"
3-
nextflow_version_tag: 24.04.1
3+
nextflow_version_tag: 24.04.2
44
nextflow_download_url: https://github.com/nextflow-io/nextflow/releases/download/v{{ nextflow_version_tag }}/nextflow
5-
nf_validation_version: 1.1.2
5+
nf_schema_version: 2.1.1
66
nextflow_local_env:
77
NXF_HOME: "{{ nextflow_dest }}/workfiles"
88
NXF_OPTS: -Xms1g -Xmx3500m
@@ -21,8 +21,10 @@ nextflow_env:
2121
PATH: "{{ tools_path.PATH }}"
2222
nextflow_plugins:
2323
- name: nf-validation
24-
version: "{{ nf_validation_version }}"
24+
version: 1.1.2
2525
- name: nf-prov
2626
version: 1.2.1
2727
- name: nf-tower
2828
version: 1.6.3
29+
- name: nf-schema # Will replace nf-validation once we update all our pipelines
30+
version: "{{ nf_schema_version }}"

roles/nextflow/templates/nextflow_miarka_site.config.j2

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11

2-
{% if site == "upps" %}
3-
cleanup = false
4-
{% else %}
52
cleanup = true
6-
{% endif %}
73

84
params {
95

roles/nf-core/defaults/main.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ nf_core_delivery_readmes:
7373
- DELIVERY.README.SAREK.txt
7474
- DELIVERY.README.SAREK.BATCH.txt
7575
- DELIVERY.README.SAREK.WES.md
76+
- DELIVERY.README.SAREK.md
7677
- apply_recalibration.sh
7778
rnaseq:
7879
- DELIVERY.README.RNASEQ.md
@@ -89,4 +90,4 @@ igenomes:
8990
build: GRCh38
9091
type: gatk
9192

92-
ncbi_eutils_url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils
93+
ncbi_eutils_url: https://eutils.ncbi.nlm.nih.gov/entrez/eutils
Lines changed: 50 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,109 +1,69 @@
11
# DELIVERY OF RESULTS FROM EXOME ANALYSIS WITH SAREK
22

33
## Analysis
4-
Samples were analysed with the Sarek pipeline release {{ release }}. In short, the pipeline does the following:
5-
Reads from fastq-files were mapped to a reference genome using BWA.
6-
Bam-files were de-duplicated with GATK MarkDuplicates.
7-
Base quality score recalibration tables were created with GATK BaseRecalibrator.
8-
The tables were then used in GATK ApplyBQSR to create recalibrated bam-files.
9-
SNVs and small indels were called with GATK HaplotypeCaller.
10-
Variants were annoted with SnpEff.
4+
Samples were analysed with the Sarek pipeline release {{ release }}.
115

12-
For details on the pipeline, folder structure and how to interpret results, please refer to the Sarek documentation:
13-
https://nf-co.re/sarek/{{ release }}
6+
The workflow processes raw data from FastQ inputs, aligns the reads, mark duplicates and performs base recalibration.
7+
SNVs and small indels are called with GATK HaplotypeCaller and DeepVariant. SnpEff annotated calls are reported in
8+
separate vcf files for each caller as well as concatenated vcf files with the combined result.
9+
In addition to Sarek pipeline analysis, target region coverage was evaluated with Picard CollectHsMetrics.
1410

15-
After running the pipeline, Picard CollectHsMetrics was used to evaluate the coverage
11+
For information regarding the pipeline, folder structure and how to interpret results, please refer to the Sarek documentation:
12+
[https://nf-co.re/sarek/{{ release }}](https://nf-co.re/sarek/{{ release }})
1613

17-
## Delivery structure, directories and files:
14+
Detailed information about standard outputs from the pipeline can be found [here](https://nf-co.re/sarek/{{ release }}/output).
1815

19-
```
16+
The directory also contains the file checksums.md5, which should be used to verify the integrity of the files after transfer.
2017

21-
├── Annotation
22-
│ ├── <sample1 name>
23-
│ │ └── snpEff
24-
│ └── <sample2 name>
25-
│ └── snpEff
26-
├── DELIVERY.README.SAREK.WES.md
27-
├── pipeline_info
28-
│ ├── results_description.html
29-
│ └── software_versions.csv
30-
├── Preprocessing
31-
│ ├── TSV
32-
│ │ ├── duplicates_marked_no_table.tsv
33-
│ │ ├── duplicates_marked_no_table_<sample1 name>.tsv
34-
│ │ ├── duplicates_marked_no_table_<sample2 name>.tsv
35-
│ │ ├── duplicates_marked.tsv
36-
│ │ ├── duplicates_marked_<sample1 name>.tsv
37-
│ │ └── duplicates_marked_<sample2 name>.tsv
38-
│ ├── <sample1 name>
39-
│ │ └── DuplicatesMarked
40-
│ │ ├── <sample1 name>.md.bam
41-
│ │ ├── <sample1 name>.md.bam.bai
42-
│ │ └── <sample1 name>.recal.table
43-
│ └── <sample2 name>
44-
│ └── DuplicatesMarked
45-
│ ├── <sample2 name>.md.bam
46-
│ ├── <sample2 name>.md.bam.bai
47-
│ └── <sample2 name>.recal.table
48-
├── Reports
49-
│ ├── SequenceQC
50-
│ │ ├── <runfolder 1>
51-
│ │ │ ├── <runfolder 1>_<project>_multiqc_report_data.zip
52-
│ │ │ └── <runfolder 1>_<project>_multiqc_report.html
53-
│ │ └── <runfolder 2>
54-
│ │ ├── <runfolder 2>_<project>_multiqc_report_data.zip
55-
│ │ └── <runfolder 2>_<project>_multiqc_report.html
56-
│ ├── MultiQC
57-
│ │ ├── <project>_multiqc_report_data.zip
58-
│ │ └── <project>_multiqc_report.html
59-
│ ├── <sample1 name>
60-
│ │ ├── bamQC
61-
│ │ ├── BCFToolsStats
62-
│ │ ├── FastQC
63-
│ │ ├── HsMetrics
64-
│ │ ├── MarkDuplicates
65-
│ │ ├── SamToolsStats
66-
│ │ ├── snpEff
67-
│ │ └── VCFTools
68-
│ └── <sample2 name>
69-
│ ├── bamQC
70-
│ ├── BCFToolsStats
71-
│ ├── FastQC
72-
│ ├── HsMetrics
73-
│ ├── MarkDuplicates
74-
│ ├── SamToolsStats
75-
│ ├── snpEff
76-
│ └── VCFTools
77-
├── Resources
78-
│ └── apply_recalibration.sh
79-
├── <sample1 name>.lst
80-
├── <sample1 name>.md5
81-
├── <sample2 name>.lst
82-
├── <sample2 name>.md5
83-
└── VariantCalling
84-
├── <sample1 name>
85-
│ ├── HaplotypeCaller
86-
│ └── HaplotypeCallerGVCF
87-
└── <sample2 name>
88-
├── HaplotypeCaller
89-
└── HaplotypeCallerGVCF
18+
19+
## Delivery structure
9020

9121
```
22+
├── checksums.md5
23+
├── DELIVERY.README.SAREK.WES.md
24+
├── results
25+
├── add
26+
├── annotation
27+
│ ├── deepvariant
28+
│ └── haplotypecaller
29+
├── csv
30+
├── multiqc
31+
├── pipeline_info
32+
├── preprocessing
33+
│ ├── fastp
34+
│ ├── recalibrated
35+
│ └── recal_table
36+
├── reference
37+
│ └── intervals
38+
├── reports
39+
│ ├── bcftools
40+
│ ├── fastp
41+
│ ├── fastqc
42+
│ ├── HsMetrics
43+
│ ├── markduplicates
44+
│ ├── mosdepth
45+
│ ├── samtools
46+
│ ├── snpeff
47+
│ └── vcftools
48+
├── tabix
49+
└── variant_calling
50+
├── concat
51+
├── deepvariant
52+
└── haplotypecaller
9253

93-
## FASTQ files
54+
```
9455

95-
FASTQ files are not included in the delivery, but can be regenerated from the BAM files.
96-
We recommend using https://github.com/qbic-pipelines/bamtofastq, refer to its documentation for usage.
9756

9857
## Known issues
99-
10058
- Twist bait intervals are not publicly available and therefore, when running CollectHsMetrics (Picard), the target intervals are used to specify both target and bait.
10159
This will lead to some incorrect entries in the HsMetrics table in the MultiQC-report, i.e. entries regarding baits should be neglected.
10260

103-
## Additional information
10461

105-
- The original target file used for the analysis can be found here https://www.twistbioscience.com/resources/bed-file/twist-human-comprehensive-exome-panel-bed-files
106-
Note that each region in this file was padded with 100 bp upstream and downstream before submitting it to the pipeline.
62+
## Additional information
63+
- The original target file used for the analysis can be found [here](https://www.twistbioscience.com/resources/data-files/comprehensive-exome-bed-files)
64+
Note that each region in this file was padded with 100 bp upstream and downstream before submitting it to the pipeline (available in results/reference/intervals).
10765
- Note that samples that are sequenced on more than one flowcell/lane will be suffixed accordingly for some modules in the MultiQC report.
108-
A sample that has been sequenced twice will for some metrics be presented as a joint vaule for <sample name>, and with one value per run, i.e. <sample name>_1 and <sample_name>_2.
109-
- To apply the recalibrations table to the deduplicated .bam-files use the script Resources/apply_recalibration.sh
66+
A sample that has been sequenced twice will for some metrics be presented as a joint vaule for <sample name>, and with one value per run, i.e. <sample name>_1 and <sample_name>_2.
67+
- Output from GATK MarkDuplicates have been removed from the results folder.
68+
Duplicate marked cram files can be requested up to 60 days after delivery.
69+

0 commit comments

Comments
 (0)