Skip to content

Commit 98c35fb

Browse files
kubranarciSPPearce
andauthored
adding new module gatk4_concordance (#9247)
* adding new module gatk4_concordance * fix lint issues * Update modules/nf-core/gatk4/concordance/main.nf Co-authored-by: Simon Pearce <[email protected]> --------- Co-authored-by: Simon Pearce <[email protected]>
1 parent 7a72607 commit 98c35fb

File tree

5 files changed

+457
-0
lines changed

5 files changed

+457
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
# renovate: datasource=conda depName=bioconda/gatk4
8+
- bioconda::gatk4=4.6.2.0
9+
# renovate: datasource=conda depName=bioconda/gcnvkernel
10+
- bioconda::gcnvkernel=0.9
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
process GATK4_CONCORDANCE {
2+
tag "${meta.id}"
3+
label 'process_low'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data'
8+
: 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}"
9+
10+
input:
11+
tuple val(meta), path(vcf), path(vcf_tbi), path(truth), path(truth_tbi)
12+
tuple val(meta2), path(intervals)
13+
tuple val(meta3), path(fasta)
14+
tuple val(meta4), path(fai)
15+
tuple val(meta5), path(dict)
16+
17+
output:
18+
tuple val(meta), path('*.tsv'), emit: summary
19+
tuple val(meta), path("*.tpfn.vcf"), emit: tpfn
20+
tuple val(meta), path("*.tpfp.vcf"), emit: tpfp
21+
path "versions.yml", emit: versions
22+
23+
when:
24+
task.ext.when == null || task.ext.when
25+
26+
script:
27+
def args = task.ext.args ?: ''
28+
def prefix = task.ext.prefix ?: "${meta.id}"
29+
def bed = intervals ? "--intervals $intervals" : ""
30+
31+
def avail_mem = 3072
32+
if (!task.memory) {
33+
log.info('[GATK Concordance] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.')
34+
}
35+
else {
36+
avail_mem = (task.memory.mega * 0.8).intValue()
37+
}
38+
"""
39+
gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\
40+
Concordance \\
41+
-R $fasta \\
42+
-eval $vcf \\
43+
--truth $truth \\
44+
--summary ${prefix}.summary.tsv \\
45+
-tpfn ${prefix}.tpfn.vcf \\
46+
-tpfp ${prefix}.tpfp.vcf \\
47+
$bed \\
48+
--tmp-dir . \\
49+
${args}
50+
51+
cat <<-END_VERSIONS > versions.yml
52+
"${task.process}":
53+
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
54+
END_VERSIONS
55+
"""
56+
57+
stub:
58+
def prefix = task.ext.prefix ?: "${meta.id}"
59+
"""
60+
touch ${prefix}.summary.tsv
61+
touch ${prefix}.tpfp.vcf
62+
touch ${prefix}.tpfn.vcf
63+
64+
cat <<-END_VERSIONS > versions.yml
65+
"${task.process}":
66+
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
67+
END_VERSIONS
68+
"""
69+
}
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "gatk4_concordance"
3+
description: Evaluate concordance of an input VCF against a validated truth VCF
4+
5+
keywords:
6+
- concordance
7+
- gatk4
8+
- gatk
9+
- genomics
10+
- variant calling
11+
- genotyping
12+
- vcf
13+
- comparison
14+
15+
tools:
16+
- "gatk4":
17+
description: "Genome Analysis Toolkit (GATK4)"
18+
homepage: "https://gatk.broadinstitute.org/hc/en-us"
19+
documentation: "https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s"
20+
tool_dev_url: "https://github.com/broadinstitute/gatk"
21+
doi: "10.1158/1538-7445.AM2017-3590"
22+
licence: ["BSD-3-clause"]
23+
identifier: "biotools:gatk"
24+
25+
input:
26+
- - meta:
27+
type: map
28+
description: |
29+
Groovy Map containing sample information
30+
e.g. `[ id:'sample1' ]`
31+
- vcf:
32+
type: file
33+
description: Evaluation VCF file created with a variant caller
34+
pattern: "*.vcf.gz"
35+
ontologies:
36+
- edam: "http://edamontology.org/format_3016"
37+
- edam: http://edamontology.org/format_3989 # GZIP format
38+
- vcf_tbi:
39+
type: file
40+
description: Index file for the evaluation VCF
41+
pattern: "*.vcf.gz.tbi"
42+
ontologies:
43+
- edam: "http://edamontology.org/format_3616"
44+
- truth:
45+
type: file
46+
description: Truth VCF file created with a variant caller
47+
pattern: "*.vcf.gz"
48+
ontologies:
49+
- edam: "http://edamontology.org/format_3016"
50+
- edam: http://edamontology.org/format_3989 # GZIP format
51+
- truth_tbi:
52+
type: file
53+
description: Index file for the truth VCF
54+
pattern: "*.vcf.gz.tbi"
55+
ontologies:
56+
- edam: "http://edamontology.org/format_3616"
57+
- - meta2:
58+
type: map
59+
description: |
60+
Groovy Map containing sample information
61+
e.g. `[ id:'bed' ]`
62+
- intervals:
63+
type: file
64+
description: Bed file with the genomic regions included in the library
65+
(optional)
66+
pattern: "*.bed"
67+
ontologies:
68+
- edam: "http://edamontology.org/format_3003"
69+
- - meta3:
70+
type: map
71+
description: |
72+
Groovy Map containing sample information
73+
e.g. `[ id:'fasta' ]`
74+
- fasta:
75+
type: file
76+
description: Reference FASTA file
77+
pattern: "*.{fasta,fa}"
78+
ontologies:
79+
- edam: "http://edamontology.org/format_1929"
80+
- - meta4:
81+
type: map
82+
description: |
83+
Groovy Map containing sample information
84+
e.g. `[ id:'fai' ]`
85+
- fai:
86+
type: file
87+
description: Index of the reference FASTA file
88+
pattern: "*.fai"
89+
ontologies:
90+
- edam: "http://edamontology.org/format_2330"
91+
- - meta5:
92+
type: map
93+
description: |
94+
Groovy Map containing sample information
95+
e.g. `[ id:'dict' ]`
96+
- dict:
97+
type: file
98+
description: Sequence dictionary of the reference FASTA file
99+
pattern: "*.dict"
100+
ontologies:
101+
- edam: "http://edamontology.org/format_2330"
102+
103+
output:
104+
summary:
105+
- - meta:
106+
type: map
107+
description: |
108+
Groovy Map containing sample information
109+
e.g. `[ id:'sample1' ]`
110+
- "*.tsv":
111+
type: file
112+
description: A tab-delimited file containing the metrics with number of
113+
TPs, FPs, FNs, Precision, Recall and F1 statistics
114+
pattern: "*.tsv"
115+
ontologies:
116+
- edam: "http://edamontology.org/format_3475"
117+
tpfn:
118+
- - meta:
119+
type: map
120+
description: |
121+
Groovy Map containing sample information
122+
e.g. `[ id:'sample1' ]`
123+
- "*.tpfn.vcf":
124+
type: file
125+
description: Eval VCF file with tagged with TP or FN in "INFO/STATUS"
126+
pattern: "*.vcf"
127+
ontologies:
128+
- edam: "http://edamontology.org/format_3989"
129+
tpfp:
130+
- - meta:
131+
type: map
132+
description: |
133+
Groovy Map containing sample information
134+
e.g. `[ id:'sample1' ]`
135+
- "*.tpfp.vcf":
136+
type: file
137+
description: Eval VCF file with tagged with TP or FP in "INFO/STATUS"
138+
pattern: "*.vcf"
139+
ontologies:
140+
- edam: "http://edamontology.org/format_3989"
141+
versions:
142+
- versions.yml:
143+
type: file
144+
description: File containing software versions
145+
pattern: "versions.yml"
146+
ontologies:
147+
- edam: "http://edamontology.org/format_3750"
148+
149+
authors:
150+
- "@kubranarci"
151+
maintainers:
152+
- "@kubranarci"
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
nextflow_process {
2+
3+
name "Test Process GATK4_CONCORDANCE"
4+
script "../main.nf"
5+
process "GATK4_CONCORDANCE"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "gatk4"
10+
tag "gatk4/concordance"
11+
12+
test("homo_sapiens - illumina - vcf") {
13+
14+
when {
15+
process {
16+
"""
17+
input[0] = [
18+
[ id:'test' ], // meta map
19+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
20+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true),
21+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz', checkIfExists: true),
22+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi', checkIfExists: true)
23+
]
24+
input[1] = [
25+
[ id:'bed' ], // meta map
26+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)
27+
]
28+
input[2] = [
29+
[ id:'fasta' ], // meta map
30+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
31+
]
32+
input[3] = [
33+
[ id:'fai' ], // meta map
34+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
35+
]
36+
input[4] = [
37+
[ id:'dict' ], // meta map
38+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true)
39+
]
40+
"""
41+
}
42+
}
43+
44+
then {
45+
assertAll(
46+
{ assert process.success },
47+
{ assert snapshot(process.out).match() }
48+
)
49+
}
50+
}
51+
test("homo_sapiens - illumina - vcf - stub") {
52+
53+
options "-stub"
54+
55+
when {
56+
process {
57+
"""
58+
input[0] = [
59+
[ id:'test' ], // meta map
60+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
61+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true),
62+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz', checkIfExists: true),
63+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi', checkIfExists: true)
64+
]
65+
input[1] = [
66+
[ id:'bed' ], // meta map
67+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)
68+
]
69+
input[2] = [
70+
[ id:'fasta' ], // meta map
71+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
72+
]
73+
input[3] = [
74+
[ id:'fai' ], // meta map
75+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
76+
]
77+
input[4] = [
78+
[ id:'dict' ], // meta map
79+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true)
80+
]
81+
"""
82+
}
83+
}
84+
85+
then {
86+
assertAll(
87+
{ assert process.success },
88+
{ assert snapshot(process.out).match() }
89+
)
90+
}
91+
92+
}
93+
94+
}

0 commit comments

Comments
 (0)