-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.nf
More file actions
283 lines (219 loc) · 11.4 KB
/
main.nf
File metadata and controls
283 lines (219 loc) · 11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#!/usr/bin/env nextflow
/*
============================================================================
NextITS: Pipeline to process eukaryotic ITS amplicons
============================================================================
License: Apache-2.0
Github : https://github.com/vmikk/NextITS
Website: https://Next-ITS.github.io/
----------------------------------------------------------------------------
*/
// NB!!:
// - provide absolute paths to the input data (e.g. --input and --barcodes)
// - File names should not contain period (.) characters (except for extensions)
// Databases:
// - UDB for chimera identification
// Enable DSL2 syntax
nextflow.enable.dsl = 2
// Print the version and exit
if (params.version) {
ver = "NextITS " + workflow.manifest.version
if (workflow.commitId) { ver += " revision " + workflow.commitId.substring(0, 7) }
println "${ver}\n"
exit(0)
}
// Note: nf-schema plugin handles --help automatically via configuration in nextflow.config
// Show a custom help message and exit
if (params.helpMsg){
include { helpMsg } from './modules/help_message.nf'
helpMsg()
exit(0)
}
// Enable topic channels
// nextflow.preview.topic = true // Nextflow < 25.04.0
// nf-schema functions for parameter validation
include { validateParameters } from 'plugin/nf-schema'
// Include custom parameter summary function
include { paramSummary } from './modules/parameter_summary'
// Include color utilities
include { getColors; colorize; colorizeMultiple; errorMsg; warningMsg; infoMsg; successMsg } from './modules/colors'
// Include workflows
// NB! `include` statements are static, meaning they are resolved at compile time rather than at runtime!
include { S1 } from './workflows/STEP1.nf'
include { S2 } from './workflows/STEP2.nf'
include { seqstats } from './workflows/STEP1.nf'
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE INPUTS
// Print NextITS logo
def logoColors = getColors(params.monochrome_logs)
def workflow_version = workflow.manifest.version ?: "unknown"
if (workflow.commitId) { workflow_version += " (${workflow.commitId.substring(0, 7)})" }
def logo = """
${logoColors.dim}----------------------------------------------------${logoColors.reset}
${colorizeMultiple("Next", ['green', 'bold'], params.monochrome_logs)}${colorizeMultiple("ITS", ['purple', 'bold'], params.monochrome_logs)} ${logoColors.cyan}${workflow_version}${logoColors.reset}
${logoColors.green} SSU ${logoColors.purple}ITS1 ${logoColors.green}5.8S ${logoColors.purple}ITS2 ${logoColors.green}LSU ${logoColors.reset}
${logoColors.green} ▒▒▒▒▒▒▒▒▒${logoColors.purple}░░░░░░░░░${logoColors.green}▒▒▒▒▒${logoColors.purple}░░░░░░░░░░${logoColors.green}▒▒▒▒▒▒▒▒▒▒▒▒${logoColors.reset}
${logoColors.dim}----------------------------------------------------${logoColors.reset}
"""
log.info logo
// Print all parameters using nf-schema plugin
// include { paramsSummaryLog } from 'plugin/nf-schema'
// log.info paramsSummaryLog(workflow) // will print params from Step-1 and Step-2 simultaneously
// Additional runtime parameter validation
// These checks are performed after schema validation and handle
// conditional logic and file existence checks that cannot be expressed in JSON Schema
// Additional parameter validation for Step-1
if( params.step == "Step1" || params.step == "seqstats" ) {
if (params.input == false && params.seqplatform == "PacBio") {
println( errorMsg("Please provide the input file with sequences in FASTQ.gz or BAM format with `--input` parameter.", params.monochrome_logs))
exit(1)
}
if (params.input_R1 == false && params.input_R2 == false && params.seqplatform == "Illumina") {
println( errorMsg("Please provide input files with sequences in FASTQ.gz format with `--input_R1` and `--input_R2` parameters.", params.monochrome_logs))
exit(1)
}
if (params.barcodes == false && params.demultiplexed == false) {
println( errorMsg("Please provide the file with sample barcodes in FASTA format with `--barcodes` parameter.", params.monochrome_logs))
exit(1)
}
}
if( params.step == "Step1" ) {
// Reference-based chimera removal
if (params.chimera_methods && params.chimera_methods.toLowerCase().split(',').contains('ref')) {
if (!params.chimera_db || !file(params.chimera_db).exists()) {
println( errorMsg("For reference-based chimera removal, please provide the database in UDB format with `--chimera_db` parameter.", params.monochrome_logs))
println( colorize(" See https://Next-ITS.github.io/installation/#databases for more information.", 'red', params.monochrome_logs))
println( colorize("Alternatively, you can disable reference-based chimera removal with `--chimera_methods` parameter (set it to `none` or `denovo`).", 'red', params.monochrome_logs))
exit(1)
}
if (!(params.chimera_db.toLowerCase().endsWith('.udb'))) {
println( errorMsg("The reference database file specified with `--chimera_db` parameter must be in UDB format.", params.monochrome_logs))
println( colorize(" See https://Next-ITS.github.io/installation/#databases for more information.", 'red', params.monochrome_logs))
exit 1
}
}
if (params.hp == true && params.seqplatform == "Illumina" && params.illumina_keep_notmerged == true) {
println( errorMsg("Homopolymer compression is not implemented for Illumina non-merged reads (add `--hp false` to your command).", params.monochrome_logs))
exit(1)
}
if (params.seqplatform == "Illumina" && params.demultiplexed == true) {
println( errorMsg("Handling demultiplexed data for Illumina is not implemented yet.", params.monochrome_logs))
exit(1)
}
if (params.seqplatform == "Illumina" && params.illumina_keep_notmerged == true && params.its_region != "none") {
println( warningMsg("Unmerged Illumina reads are not compatible with ITSx. Amplicons will be primer-trimmed.", params.monochrome_logs))
}
// ITSx profiles validation
if (params.its_region != "none") {
/*
Currently, the following regex pattern is used to pre-validate the `ITSx_tax` parameter (in schema):
"^(?:all|
(?:alveolata|bryophyta|bacillariophyta|amoebozoa|euglenozoa|fungi|chlorophyta|rhodophyta|phaeophyceae|marchantiophyta|metazoa|oomycota|haptophyceae|raphidophyceae|rhizaria|synurophyceae|tracheophyta|eustigmatophyceae|apusozoa|parabasalia)
(?:,\\s*(?:alveolata|bryophyta|bacillariophyta|amoebozoa|euglenozoa|fungi|chlorophyta|rhodophyta|phaeophyceae|marchantiophyta|metazoa|oomycota|haptophyceae|raphidophyceae|rhizaria|synurophyceae|tracheophyta|eustigmatophyceae|apusozoa|parabasalia))*)$"
this forbids:
- mixing `all` with other values
- empty elements and trailing commas
- invalid values
*/
def itsx_profiles = params.ITSx_tax
// `ITSx_tax` must be a non-empty string (if specifying `--ITSx_tax ""`, Nextflow may coerce empty/flag to boolean)
if (itsx_profiles == null || itsx_profiles instanceof Boolean) {
println( errorMsg("Parameter --ITSx_tax must have a value (e.g. 'all' or 'fungi,rhizaria').", params.monochrome_logs) )
exit(1)
}
if (itsx_profiles.toString().trim().isEmpty()) {
println( errorMsg("Parameter --ITSx_tax cannot be empty. Use 'all' or a comma-separated list of taxa.", params.monochrome_logs) )
exit(1)
}
// Allowed profiles
def ITSX_ALLOWED = [
'alveolata','bryophyta','bacillariophyta','amoebozoa','euglenozoa','fungi',
'chlorophyta','rhodophyta','phaeophyceae','marchantiophyta','metazoa','oomycota',
'haptophyceae','raphidophyceae','rhizaria','synurophyceae','tracheophyta',
'eustigmatophyceae','apusozoa','parabasalia'
] as Set
// Parse the specified profile string
def itsx_items = itsx_profiles.toString().split(',', -1) as List<String>
// Empty-item validation (empty or whitespace-only tokens, incl. ",," and trailing commas)
def emptyIdx = []
itsx_items.eachWithIndex { s, i ->
if (s == null || s.trim().isEmpty()) emptyIdx << i
}
if (emptyIdx) {
println( errorMsg("Parameter --ITSx_tax: empty entries are not allowed (check commas at positions: ${emptyIdx.join(', ')}).", params.monochrome_logs) )
exit(1)
}
// Disallow internal whitespaces
def whitespaces = itsx_items.findAll { s ->
def tr = s.toString().trim()
!(tr ==~ /\S+/) // after trimming, token must be all non-whitespace
}
if (whitespaces) {
println( errorMsg("Parameter --ITSx_tax: whitespace is not allowed in profile names.", params.monochrome_logs) )
exit(1)
}
// Detect duplicates
itsx_items = itsx_items.collect { it.trim() }
def dups = itsx_items.countBy { it }.findAll { k, v -> v > 1 }.keySet().toList()
if (dups) {
println( errorMsg("Parameter --ITSx_tax: duplicated profile names are not allowed: ${dups.join(', ')}", params.monochrome_logs) )
exit(1)
}
// Disallow mixing 'all' with specific profile names
if (itsx_items.size() > 1 && itsx_items.contains('all')) {
println( errorMsg("Parameter --ITSx_tax: do not combine 'all' with taxon-specific profile names.", params.monochrome_logs))
exit(1)
}
// Validate values against the allow-list (skip when it's exactly ['all'])
if (!(itsx_items.size() == 1 && itsx_items[0] == 'all')) {
def invalid_profiles = (itsx_items as Set) - ITSX_ALLOWED
if (invalid_profiles) {
println( errorMsg("Parameter --ITSx_tax: invalid profile names - ${invalid_profiles.join(', ')}", params.monochrome_logs) )
println( colorize(" Supported profiles: `all` OR a comma-separated list of the following: ${ITSX_ALLOWED.join(', ')}", 'red', params.monochrome_logs))
exit(1)
}
}
// Currently, there is no X.hmm profile (Apusozoa)
if (itsx_items.contains('apusozoa')) {
println( errorMsg("Parameter --ITSx_tax: Apusozoa profile is not yet supported in ITSx.", params.monochrome_logs))
exit(1)
}
} // end of ITSx profiles validation
} // end of Step-1 parameter validation
// Additional parameter validation for Step-2
if( params.step == "Step2" ) {
if (params.preclustering == "none" && params.clustering == "none" && params.lulu == true){
println errorMsg("LULU can not be applied when pre-clustering and clustering are set to 'none'", params.monochrome_logs)
exit(1)
}
if (params.preclustering == "dada2" && params.dada2_pooling == "byrun" &&
(params.chunking_n > 1 || params.chunking_n != null)){
println errorMsg("By-sequencing-run pooling in DADA2 is not compatible with chunking.", params.monochrome_logs)
println( colorize("Set `--chunking_n` to 1 to disable chunking OR use `--dada2_pooling global`.", 'red', params.monochrome_logs))
exit(1)
}
} // end of Step-2 parameter validation
// Run the workflow
workflow {
// Print step-specific parameter summary
paramSummary(workflow, params)
validateParameters()
if (params.step == "Step1") {
S1()
}
if (params.step == "Step2") {
S2()
}
if (params.step == "seqstats") {
seqstats()
}
}
// On completion
workflow.onComplete {
println "Pipeline completed at : $workflow.complete"
println "Duration : ${workflow.duration}"
println "Execution status : ${workflow.success ? 'All done!' : 'Failed' }"
}
// On error
workflow.onError {
println "Pipeline execution stopped with the following message: ${workflow.errorMessage}"
}