Skip to content

Commit 562a66e

Browse files
authored
Merge branch 'master' into workflows_of_workflows
2 parents 42ace69 + 09b226c commit 562a66e

File tree

19 files changed

+5650
-466
lines changed

19 files changed

+5650
-466
lines changed

docs/nf4_science/genomics/04_testing.md

Lines changed: 480 additions & 466 deletions
Large diffs are not rendered by default.

mkdocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ nav:
2929
- nf4_science/genomics/01_per_sample_variant_calling.md
3030
- nf4_science/genomics/02_joint_calling.md
3131
- nf4_science/genomics/03_modules.md
32+
- nf4_science/genomics/04_testing.md
3233

3334
- Nextflow for RNAseq:
3435
- nf4_science/rnaseq/index.md
@@ -191,6 +192,7 @@ plugins:
191192
- side_quests/nf-test.md
192193
- side_quests/workflows_of_workflows.md
193194
- nf4_science/genomics/03_modules.md
195+
- nf4_science/genomics/04_testing.md
194196

195197
- i18n:
196198
docs_structure: suffix
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Call variants with GATK HaplotypeCaller
3+
*/
4+
process GATK_HAPLOTYPECALLER {
5+
6+
container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
7+
8+
publishDir params.outdir, mode: 'symlink'
9+
10+
input:
11+
tuple path(input_bam), path(input_bam_index)
12+
path ref_fasta
13+
path ref_index
14+
path ref_dict
15+
path interval_list
16+
17+
output:
18+
path "${input_bam}.g.vcf" , emit: vcf
19+
path "${input_bam}.g.vcf.idx" , emit: idx
20+
21+
script:
22+
"""
23+
gatk HaplotypeCaller \
24+
-R ${ref_fasta} \
25+
-I ${input_bam} \
26+
-O ${input_bam}.g.vcf \
27+
-L ${interval_list} \
28+
-ERC GVCF
29+
"""
30+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
nextflow_process {
2+
3+
name "Test Process GATK_HAPLOTYPECALLER"
4+
script "../main.nf"
5+
process "GATK_HAPLOTYPECALLER"
6+
7+
test("Should call son's halotype correctly") {
8+
9+
setup {
10+
run("SAMTOOLS_INDEX") {
11+
script "../../../samtools/index/main.nf"
12+
process {
13+
"""
14+
input[0] = file("${projectDir}/data/bam/reads_son.bam")
15+
"""
16+
}
17+
}
18+
}
19+
when {
20+
params {
21+
outdir = "tests/results"
22+
}
23+
process {
24+
"""
25+
input[0] = SAMTOOLS_INDEX.out
26+
input[1] = file("${projectDir}/data/ref/ref.fasta")
27+
input[2] = file("${projectDir}/data/ref/ref.fasta.fai")
28+
input[3] = file("${projectDir}/data/ref/ref.dict")
29+
input[4] = file("${projectDir}/data/ref/intervals.bed")
30+
"""
31+
}
32+
}
33+
34+
then {
35+
assert process.success
36+
assert path(process.out[0][0]).readLines().contains('#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT reads_son')
37+
assert path(process.out[0][0]).readLines().contains('20_10037292_10066351 3277 . G <NON_REF> . . END=3282 GT:DP:GQ:MIN_DP:PL 0/0:25:72:24:0,72,719')
38+
}
39+
40+
}
41+
42+
test("Should call mother's halotype correctly") {
43+
44+
setup {
45+
run("SAMTOOLS_INDEX") {
46+
script "../../../samtools/index/main.nf"
47+
process {
48+
"""
49+
input[0] = file("${projectDir}/data/bam/reads_mother.bam")
50+
"""
51+
}
52+
}
53+
}
54+
55+
when {
56+
params {
57+
outdir = "tests/results"
58+
}
59+
process {
60+
"""
61+
input[0] = SAMTOOLS_INDEX.out
62+
input[1] = file("${projectDir}/data/ref/ref.fasta")
63+
input[2] = file("${projectDir}/data/ref/ref.fasta.fai")
64+
input[3] = file("${projectDir}/data/ref/ref.dict")
65+
input[4] = file("${projectDir}/data/ref/intervals.bed")
66+
"""
67+
}
68+
}
69+
70+
then {
71+
assert process.success
72+
assert path(process.out[0][0]).readLines().contains('#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT reads_mother')
73+
assert path(process.out[0][0]).readLines().contains('20_10037292_10066351 3277 . G <NON_REF> . . END=3278 GT:DP:GQ:MIN_DP:PL 0/0:38:99:37:0,102,1530')
74+
}
75+
}
76+
77+
test("Should call father's halotype correctly") {
78+
79+
setup {
80+
run("SAMTOOLS_INDEX") {
81+
script "../../../samtools/index/main.nf"
82+
process {
83+
"""
84+
input[0] = file("${projectDir}/data/bam/reads_father.bam")
85+
"""
86+
}
87+
}
88+
}
89+
90+
when {
91+
params {
92+
outdir = "tests/results"
93+
}
94+
process {
95+
"""
96+
input[0] = SAMTOOLS_INDEX.out
97+
input[1] = file("${projectDir}/data/ref/ref.fasta")
98+
input[2] = file("${projectDir}/data/ref/ref.fasta.fai")
99+
input[3] = file("${projectDir}/data/ref/ref.dict")
100+
input[4] = file("${projectDir}/data/ref/intervals.bed")
101+
"""
102+
}
103+
}
104+
105+
then {
106+
assert process.success
107+
assert path(process.out[0][0]).readLines().contains('#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT reads_father')
108+
assert path(process.out[0][0]).readLines().contains('20_10037292_10066351 3277 . G <NON_REF> . . END=3281 GT:DP:GQ:MIN_DP:PL 0/0:44:99:42:0,120,1800')
109+
}
110+
}
111+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"Should call son's halotype correctly": {
3+
"content": [
4+
{
5+
"0": [
6+
"reads_son.bam.g.vcf:md5,069316cdd4328542ffc6ae247b1dac39"
7+
],
8+
"1": [
9+
"reads_son.bam.g.vcf.idx:md5,dc36c18f2afdc546f41e68b2687e9334"
10+
],
11+
"idx": [
12+
"reads_son.bam.g.vcf.idx:md5,dc36c18f2afdc546f41e68b2687e9334"
13+
],
14+
"vcf": [
15+
"reads_son.bam.g.vcf:md5,069316cdd4328542ffc6ae247b1dac39"
16+
]
17+
}
18+
],
19+
"meta": {
20+
"nf-test": "0.9.2",
21+
"nextflow": "24.10.0"
22+
},
23+
"timestamp": "2025-03-04T09:26:58.537420116"
24+
}
25+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls
3+
*/
4+
process GATK_JOINTGENOTYPING {
5+
6+
container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
7+
publishDir params.outdir, mode: 'copy'
8+
9+
input:
10+
path all_gvcfs
11+
path all_idxs
12+
path interval_list
13+
val cohort_name
14+
path ref_fasta
15+
path ref_index
16+
path ref_dict
17+
18+
output:
19+
path "${cohort_name}.joint.vcf" , emit: vcf
20+
path "${cohort_name}.joint.vcf.idx" , emit: idx
21+
22+
script:
23+
def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ')
24+
"""
25+
gatk GenomicsDBImport \
26+
${gvcfs_line} \
27+
-L ${interval_list} \
28+
--genomicsdb-workspace-path ${cohort_name}_gdb
29+
30+
gatk GenotypeGVCFs \
31+
-R ${ref_fasta} \
32+
-V gendb://${cohort_name}_gdb \
33+
-L ${interval_list} \
34+
-O ${cohort_name}.joint.vcf
35+
"""
36+
}

0 commit comments

Comments
 (0)