Skip to content

Commit f047a4f

Browse files
Implement short read QC subworkflow (#9279)
* implement subworkflow to run different qc tools on fastq files * apply minor changes in formatting, remove extra spaces, not required comments Co-authored-by: Evangelos Karatzas <[email protected]> * minor formatting changes * add stub test for single and paired-end data * add seqfu check process and update nf test * add seqfu check in nf-test check and yml file --------- Co-authored-by: Evangelos Karatzas <[email protected]>
1 parent a067fa8 commit f047a4f

File tree

4 files changed

+768
-0
lines changed

4 files changed

+768
-0
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//
2+
// Short read sequencing data QC using different tools
3+
//
4+
include { FASTQC } from '../../../modules/nf-core/fastqc/main'
5+
include { SEQFU_CHECK } from '../../../modules/nf-core/seqfu/check/main'
6+
include { SEQFU_STATS } from '../../../modules/nf-core/seqfu/stats/main'
7+
include { SEQKIT_STATS } from '../../../modules/nf-core/seqkit/stats/main'
8+
include { SEQTK_COMP } from '../../../modules/nf-core/seqtk/comp/main'
9+
10+
workflow FASTQ_GENERATE_STATISTICS {
11+
12+
take:
13+
ch_reads // channel: [ val(meta), [ fastq ] ]
14+
skip_fastqc // boolean
15+
skip_seqfu_check // boolean
16+
skip_seqfu_stats // boolean
17+
skip_seqkit_stats // boolean
18+
skip_seqtk_comp // boolean
19+
20+
main:
21+
22+
ch_versions = Channel.empty()
23+
24+
if (!skip_fastqc) {
25+
FASTQC( ch_reads )
26+
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
27+
}
28+
29+
if (!skip_seqfu_check){
30+
SEQFU_CHECK( ch_reads )
31+
ch_versions = ch_versions.mix(SEQFU_CHECK.out.versions.first())
32+
}
33+
34+
if (!skip_seqfu_stats) {
35+
SEQFU_STATS ( ch_reads )
36+
ch_versions = ch_versions.mix(SEQFU_STATS.out.versions.first())
37+
}
38+
39+
if (!skip_seqkit_stats) {
40+
SEQKIT_STATS ( ch_reads )
41+
ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions.first())
42+
}
43+
44+
if (!skip_seqtk_comp) {
45+
SEQTK_COMP ( ch_reads )
46+
ch_versions = ch_versions.mix(SEQTK_COMP.out.versions.first())
47+
}
48+
49+
emit:
50+
fastqc_html = FASTQC.out.html
51+
fastqc_zip = FASTQC.out.zip
52+
seqfu_check = SEQFU_CHECK.out.check
53+
seqfu_stats = SEQFU_STATS.out.stats
54+
seqfu_multiqc = SEQFU_STATS.out.multiqc
55+
seqkit_stats = SEQKIT_STATS.out.stats
56+
seqtk_stats = SEQTK_COMP.out.seqtk_stats
57+
versions = ch_versions
58+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: "fastq_generate_statistics"
3+
description: Generate statistics for short read sequencing data using multiple tools
4+
keywords:
5+
- fastq
6+
- qc
7+
- fastqc
8+
- seqfu
9+
- seqkit
10+
- seqtk
11+
components:
12+
- fastqc
13+
- seqfu/check
14+
- seqfu/stats
15+
- seqkit/stats
16+
- seqtk/comp
17+
input:
18+
- reads:
19+
type: file
20+
description: |
21+
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
22+
respectively.
23+
- skip_fastqc:
24+
type: boolean
25+
description: |
26+
Skip fastqc process
27+
- skip_seqfu_check:
28+
type: boolean
29+
description: |
30+
Skip seqfu_check process
31+
- skip_seqfu_stats:
32+
type: boolean
33+
description: |
34+
Skip seqfu_stats process
35+
- skip_seqkit_stats:
36+
type: boolean
37+
description: |
38+
Skip seqkit_stats process
39+
- skip_seqtk_comp:
40+
type: boolean
41+
description: |
42+
Skip seqtk_comp process
43+
output:
44+
- fastqc_html:
45+
type: file
46+
description: FastQC report
47+
pattern: "*_fastqc.html"
48+
- fastqc_zip:
49+
type: file
50+
description: FastQC report archive
51+
pattern: "*_fastqc.zip"
52+
- seqfu_check:
53+
type: file
54+
description: seqfu check tsv report
55+
pattern: "*.tsv"
56+
- seqfu_stats:
57+
type: file
58+
description: seqfu stats tsv report
59+
pattern: "*.tsv"
60+
- seqfu_multiqc:
61+
type: file
62+
description: seqfu stats MultiQC report
63+
pattern: "*_mqc.txt"
64+
- seqkit_stats:
65+
type: file
66+
description: seqkit stats report
67+
pattern: "*.tsv"
68+
- versions:
69+
type: file
70+
description: File containing software versions
71+
pattern: "versions.yml"
72+
authors:
73+
- "@pablo-scd"
74+
maintainers:
75+
- "@pablo-scd"
76+
- "@vagkaratzas"
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
nextflow_workflow {
2+
name "Test Subworkflow FASTQ_GENERATE_STATISTICS"
3+
script "../main.nf"
4+
workflow "FASTQ_GENERATE_STATISTICS"
5+
6+
tag "subworkflows"
7+
tag "subworkflows_nfcore"
8+
tag "subworkflows/fastq_generate_statistics"
9+
tag "fastqc"
10+
tag "seqfu"
11+
tag "seqfu/check"
12+
tag "seqfu/stats"
13+
tag "seqkit"
14+
tag "seqkit/stats"
15+
tag "seqtk"
16+
tag "seqtk/comp"
17+
18+
test("sarscov2 - fastq - single_end") {
19+
when {
20+
workflow {
21+
"""
22+
input[0] = Channel.of([
23+
[ id:'test_single', single_end:true ], // meta map
24+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
25+
])
26+
input[1] = false
27+
input[2] = false
28+
input[3] = false
29+
input[4] = false
30+
input[5] = false
31+
"""
32+
}
33+
}
34+
then {
35+
assertAll(
36+
{ assert workflow.success },
37+
{ assert snapshot(
38+
file(workflow.out.fastqc_html[0][1]).name,
39+
file(workflow.out.fastqc_zip[0][1]).name,
40+
workflow.out.seqfu_check,
41+
workflow.out.seqfu_stats,
42+
workflow.out.seqfu_multiqc,
43+
workflow.out.seqkit_stats,
44+
workflow.out.seqtk_stats,
45+
workflow.out.versions
46+
).match() }
47+
)
48+
}
49+
}
50+
test("sarscov2 - fastq - paired_end") {
51+
when {
52+
workflow {
53+
"""
54+
input[0] = Channel.of([
55+
[ id:'test_paired', single_end:false ], // meta map
56+
[
57+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
58+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
59+
]
60+
])
61+
input[1] = false
62+
input[2] = false
63+
input[3] = false
64+
input[4] = false
65+
input[5] = false
66+
"""
67+
}
68+
}
69+
then {
70+
assertAll(
71+
{ assert workflow.success },
72+
{ assert snapshot(
73+
file(workflow.out.fastqc_html[0][1][0]).name,
74+
file(workflow.out.fastqc_zip[0][1][0]).name,
75+
file(workflow.out.fastqc_html[0][1][1]).name,
76+
file(workflow.out.fastqc_zip[0][1][1]).name,
77+
workflow.out.seqfu_check,
78+
workflow.out.seqfu_stats,
79+
workflow.out.seqfu_multiqc,
80+
workflow.out.seqkit_stats,
81+
workflow.out.seqtk_stats,
82+
workflow.out.versions
83+
).match() }
84+
)
85+
}
86+
}
87+
test("sarscov2 - fastq - single_end - stub") {
88+
89+
options "-stub"
90+
91+
when {
92+
workflow {
93+
"""
94+
input[0] = Channel.of([
95+
[ id:'test_single', single_end:true ], // meta map
96+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
97+
])
98+
input[1] = false
99+
input[2] = false
100+
input[3] = false
101+
input[4] = false
102+
input[5] = false
103+
"""
104+
}
105+
}
106+
then {
107+
assertAll(
108+
{ assert workflow.success},
109+
{ assert snapshot(
110+
workflow.out,
111+
workflow.out.versions.collect{ path(it).yaml }.unique()
112+
).match() }
113+
)
114+
}
115+
}
116+
test("sarscov2 - fastq - paired_end - stub") {
117+
118+
options "-stub"
119+
120+
when {
121+
workflow {
122+
"""
123+
input[0] = Channel.of([
124+
[ id:'test_paired', single_end:false ], // meta map
125+
[
126+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
127+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
128+
]
129+
])
130+
input[1] = false
131+
input[2] = false
132+
input[3] = false
133+
input[4] = false
134+
input[5] = false
135+
"""
136+
}
137+
}
138+
then {
139+
assertAll(
140+
{ assert workflow.success},
141+
{ assert snapshot(
142+
workflow.out,
143+
workflow.out.versions.collect{ path(it).yaml }.unique()
144+
).match() }
145+
)
146+
}
147+
}
148+
}

0 commit comments

Comments
 (0)