@@ -29,12 +29,14 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
2929workflow PIPELINE_INITIALISATION {
3030
3131 take :
32- version : boolean // Display version and exit
33- help : boolean // Display help text
34- validate_params : boolean // Validate parameters against the schema at runtime
35- monochrome_logs : boolean // Do not use coloured log outputs
36- nextflow_cli_args : List // List of positional nextflow CLI args
37- outdir : String // The output directory where the results will be saved
32+ version : boolean // Display version and exit
33+ help : boolean // Display help text
34+ validate_params : boolean // Validate parameters against the schema at runtime
35+ monochrome_logs : boolean // Do not use coloured log outputs
36+ nextflow_cli_args : List<String > // List of positional nextflow CLI args
37+ outdir : String // The output directory where the results will be saved
38+ input : Path // File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files
39+ ena_metadata_fields : String // Comma-separated list of ENA metadata fields to fetch before downloading data
3840
3941 main :
4042
@@ -69,6 +71,23 @@ workflow PIPELINE_INITIALISATION {
6971 UTILS_NFCORE_PIPELINE (
7072 nextflow_cli_args
7173 )
74+
75+ //
76+ // Auto-detect input id type
77+ //
78+ ids = file(input)
79+ .splitCsv(header :false , sep :' ' , strip :true )
80+ .collect { row -> row[0 ] }
81+ .toUnique()
82+ if (! isSraId(ids)) {
83+ error(' Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!' )
84+ }
85+ if (! sraCheckENAMetadataFields(ena_metadata_fields)) {
86+ error(" Invalid option: '${ ena_metadata_fields} '. Minimally required fields for '--ena_metadata_fields': '${ valid_ena_metadata_fields.join(',')} '" )
87+ }
88+
89+ emit :
90+ ids
7291}
7392
7493/*
@@ -118,39 +137,29 @@ workflow PIPELINE_COMPLETION {
118137//
119138// Check if input ids are from the SRA
120139//
121- def isSraId(input : Path ) -> boolean {
122- def is_sra = false
140+ def isSraId(ids : List<String > ) -> boolean {
123141 def total_ids = 0
124142 def no_match_ids = []
125143 def pattern = / ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d +)$/
126- input . eachLine { line ->
144+ ids . each { id ->
127145 total_ids + = 1
128- if (! (line =~ pattern)) {
129- no_match_ids << line
146+ if (! (id =~ pattern)) {
147+ no_match_ids << id
130148 }
131149 }
132150
133151 def num_match = total_ids - no_match_ids. size()
134- if (num_match > 0 ) {
135- if (num_match == total_ids) {
136- is_sra = true
137- } else {
138- error(" Mixture of ids provided via --input: ${ no_match_ids.join(', ')} \n Please provide either SRA / ENA / GEO / DDBJ ids!" )
139- }
140- }
141- return is_sra
152+ return num_match > 0 && num_match == total_ids
142153}
143154
144155//
145156// Check and validate parameters
146157//
147- def sraCheckENAMetadataFields(ena_metadata_fields) {
158+ def sraCheckENAMetadataFields(ena_metadata_fields : List< String > ) -> boolean {
148159 // Check minimal ENA fields are provided to download FastQ files
149160 def valid_ena_metadata_fields = [' run_accession' , ' experiment_accession' , ' library_layout' , ' fastq_ftp' , ' fastq_md5' ]
150161 def actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields. split(' ,' ). collect{ it. trim(). toLowerCase() } : valid_ena_metadata_fields
151- if (! actual_ena_metadata_fields. containsAll(valid_ena_metadata_fields)) {
152- error(" Invalid option: '${ ena_metadata_fields} '. Minimally required fields for '--ena_metadata_fields': '${ valid_ena_metadata_fields.join(',')} '" )
153- }
162+ return actual_ena_metadata_fields. containsAll(valid_ena_metadata_fields)
154163}
155164
156165//
0 commit comments