1- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/ modules/master/subworkflows/yaml-schema.json
1+ # yaml-language-server: $schema=https://raw.githubusercontent.com/ebi-metagenomics/nf- modules/master/subworkflows/yaml-schema.json
22name : " detect_rna"
33description : Extraction of specific cmsearch-identified RNA sequences from a fasta
44 file using EASEL
@@ -10,13 +10,23 @@ keywords:
1010 - cmscan
1111 - covariance models
1212components :
13+ - seqkit/split2 :
14+ git_remote : https://github.com/nf-core/modules.git
15+ - cat/cat :
16+ git_remote : https://github.com/nf-core/modules.git
1317 - infernal/cmsearch
1418 - infernal/cmscan
1519 - convertcmscantocmsearch
1620 - cmsearchtbloutdeoverlap
1721 - easel/eslsfetch
22+ - extractcoords
1823input :
19- - ch_fasta :
24+ - meta :
25+ type : map
26+ description : |
27+ Groovy Map containing sample information
28+ e.g. `[ id:'sample1', single_end:false ]`
29+ ch_fasta :
2030 type : file
2131 description : |
2232 The input channel containing the fasta files
@@ -36,16 +46,157 @@ input:
3646 - mode :
3747 type : value
3848 description : choose cmsearch or cmscan method to use
49+ - separate_subunits :
50+ type : boolean
51+ description : Specify true to separate hits into the different RNA subunits
52+ - chunk_flag :
53+ type : boolean
54+ description : |
55+ Specify true to use seqkit/split2 to chunk contigs into sequences of specific length e.g. 50M.
56+ IMPORTANT NOTE, YOU HAVE TO SPECIFY CHUNK LENGTH USING `ext.args`, e.g. `--by-length 50M`.
57+ See nextflow.config for unit test for a full example
3958output :
4059 - versions :
4160 type : file
4261 description : |
4362 File containing software versions
4463 Structure: [ path(versions.yml) ]
4564 pattern : " versions.yml"
46- - cmsearch_deoverlap_out :
47- description : " "
48- - easel_out :
49- description : " "
65+ - cmsearch_deoverlap_coords :
66+ description : |
67+ Channel containing deoverlapped cmsearch .tblout files
68+ Structure: [ val(meta), path("*.tblout.deoverlapped") ]
69+ meta :
70+ type : map
71+ description : |
72+ Groovy Map containing sample information
73+ e.g. `[ id:'sample1', single_end:false ]`
74+ " *.tblout.deoverlapped " :
75+ type : file
76+ description : Deoverlapped .tblout file
77+ pattern : " *.tblout.deoverlapped"
78+ - easel_coords :
79+ description : |
80+ Channel containing fasta output from esl-sfetch
81+ Structure: [ val(meta), path("*.fasta") ]
82+ meta :
83+ type : map
84+ description : |
85+ Groovy Map containing sample information
86+ e.g. `[ id:'sample1', single_end:false ]`
87+ " *.fasta " :
88+ type : file
89+ description : Fasta file output from running esl-sfetch to extract sequences by name
90+ pattern : " *.{fasta}"
91+ - ssu_fasta :
92+ description : |
93+ Channel containing SSU fasta sequences
94+ Structure: [ val(meta), path("sequence-categorisation/*SSU.fasta") ]
95+ meta :
96+ type : map
97+ description : |
98+ Groovy Map containing sample information
99+ e.g. `[ id:'sample1', single_end:false ]`
100+ " sequence-categorisation/*SSU.fasta " :
101+ type : file
102+ description : Fasta file containing the SSU sequences
103+ pattern : " *.fasta"
104+ ontologies : []
105+ - lsu_fasta :
106+ description : |
107+ Channel containing LSU fasta sequences
108+ Structure: [ val(meta), path("sequence-categorisation/*LSU.fasta") ]
109+ meta :
110+ type : map
111+ description : |
112+ Groovy Map containing sample information
113+ e.g. `[ id:'sample1', single_end:false ]`
114+ " sequence-categorisation/*LSU.fasta " :
115+ type : file
116+ description : Fasta file containing the LSU sequences
117+ pattern : " *.fasta"
118+ ontologies : []
119+ - rrna_bacteria :
120+ description : |
121+ Channel containing bacterial rRNA sequences
122+ Structure: [ val(meta), path("sequence-categorisation/*rRNA_bacteria*.fasta") ]
123+ meta :
124+ type : map
125+ description : |
126+ Groovy Map containing sample information
127+ e.g. `[ id:'sample1', single_end:false ]`
128+ " sequence-categorisation/*rRNA_bacteria*.fasta " :
129+ type : file
130+ description : Fasta file containing bacterial rRNA
131+ pattern : " *.fasta"
132+ ontologies : []
133+ - rrna_archaea :
134+ description : |
135+ Channel containing archaeal rRNA sequences
136+ Structure: [ val(meta), path("sequence-categorisation/*rRNA_archaea*.fasta") ]
137+ meta :
138+ type : map
139+ description : |
140+ Groovy Map containing sample information
141+ e.g. `[ id:'sample1', single_end:false ]`
142+ " sequence-categorisation/*rRNA_archaea*.fasta " :
143+ type : file
144+ description : Fasta file containing archaeal rRNA
145+ pattern : " *.fasta"
146+ ontologies : []
147+ - eukarya :
148+ description : |
149+ Channel containing eukaryan rRNA sequences
150+ Structure: [ val(meta), path("sequence-categorisation/*rRNA_eukarya*.fasta") ]
151+ meta :
152+ type : map
153+ description : |
154+ Groovy Map containing sample information
155+ e.g. `[ id:'sample1', single_end:false ]`
156+ " sequence-categorisation/*rRNA_eukarya*.fasta " :
157+ type : file
158+ description : Fasta file containing eukaryan rRNA
159+ pattern : " *.fasta"
160+ ontologies : []
161+ - fiveS_fasta :
162+ description : |
163+ Channel containing 5S rRNA sequences
164+ Structure: [ val(meta), path("sequence-categorisation/*5S.fasta") ]
165+ meta :
166+ type : map
167+ description : |
168+ Groovy Map containing sample information
169+ e.g. `[ id:'sample1', single_end:false ]`
170+ " sequence-categorisation/*5S.fasta " :
171+ type : file
172+ description : " 5S rRNA nucleotide sequences"
173+ ontologies : []
174+ - five_eightS_fasta :
175+ description : |
176+ Channel containing 5.8S rRNA sequences
177+ Structure: [ val(meta), path("sequence-categorisation/*5_8S.fasta") ]
178+ meta :
179+ type : map
180+ description : |
181+ Groovy Map containing sample information
182+ e.g. `[ id:'sample1', single_end:false ]`
183+ " sequence-categorisation/*5_8S.fasta " :
184+ type : file
185+ description : " 5 and 8S rRNA nucleotide sequences"
186+ ontologies : []
187+ - ncrna_fasta :
188+ description : |
189+ Channel containing non-coding RNA sequences
190+ Structure: [ val(meta), path("sequence-categorisation/*other_ncRNA.fasta") ]
191+ meta :
192+ type : map
193+ description : |
194+ Groovy Map containing sample information
195+ e.g. `[ id:'sample1', single_end:false ]`
196+ " sequence-categorisation/*other_ncRNA.fasta " :
197+ type : file
198+ description : " non-coding RNA nucleotide sequences"
199+ ontologies : []
200+
50201authors :
51202 - " @Kate_Sakharova"
0 commit comments