Skip to content

Commit b775286

Browse files
committed
Implement meta.id detection for GFFREAD
1 parent cb1317e commit b775286

File tree

1 file changed

+93
-79
lines changed

1 file changed

+93
-79
lines changed

subworkflows/local/prepare_genome.nf

Lines changed: 93 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,35 @@
33
//
44

55
include {
6-
GUNZIP as GUNZIP_FASTA
7-
GUNZIP as GUNZIP_GTF
8-
GUNZIP as GUNZIP_GFF
9-
GUNZIP as GUNZIP_GENE_BED
10-
GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/gunzip/main'
6+
GUNZIP as GUNZIP_FASTA ;
7+
GUNZIP as GUNZIP_GTF ;
8+
GUNZIP as GUNZIP_GFF ;
9+
GUNZIP as GUNZIP_GENE_BED ;
10+
GUNZIP as GUNZIP_BLACKLIST
11+
} from '../../modules/nf-core/gunzip/main'
1112

1213
include {
13-
UNTAR as UNTAR_BWA_INDEX
14-
UNTAR as UNTAR_BOWTIE2_INDEX
15-
UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/untar/main'
16-
17-
include { UNTARFILES } from '../../modules/nf-core/untarfiles/main'
18-
include { GFFREAD } from '../../modules/nf-core/gffread/main'
19-
include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main'
20-
include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main'
21-
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
22-
include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main'
23-
24-
include { GTF2BED } from '../../modules/local/gtf2bed'
25-
include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions'
26-
include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'
14+
UNTAR as UNTAR_BWA_INDEX ;
15+
UNTAR as UNTAR_BOWTIE2_INDEX ;
16+
UNTAR as UNTAR_STAR_INDEX
17+
} from '../../modules/nf-core/untar/main'
18+
19+
include { UNTARFILES } from '../../modules/nf-core/untarfiles/main'
20+
include { GFFREAD } from '../../modules/nf-core/gffread/main'
21+
include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main'
22+
include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main'
23+
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
24+
include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main'
25+
26+
include { GTF2BED } from '../../modules/local/gtf2bed'
27+
include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions'
28+
include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'
2729

2830
workflow PREPARE_GENOME {
2931
take:
3032
genome // string: genome name
3133
genomes // map: genome attributes
32-
prepare_tool_index // string : tool to prepare index for
34+
prepare_tool_index // string : tool to prepare index for
3335
fasta // path: path to genome fasta file
3436
gtf // file: /path/to/genome.gtf
3537
gff // file: /path/to/genome.gff
@@ -63,25 +65,25 @@ workflow PREPARE_GENOME {
6365
if (gtf.endsWith('.gz')) {
6466
ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] }
6567
ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
66-
} else {
67-
ch_gtf = Channel.value(file(gtf))
6868
}
6969
else {
7070
ch_gtf = Channel.value(file(gtf, checkIfExists: true))
7171
}
7272
}
7373
else if (gff) {
74-
if (gff.endsWith('.gz')) {
75-
ch_gff = GUNZIP_GFF([[:], gff]).gunzip.map { it[1] }
74+
if (gff.endsWith('.gz')) {
75+
ch_gff = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] }
7676
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
77-
} else {
78-
ch_gff = Channel.value(file(gff))
7977
}
8078
else {
81-
ch_gff = Channel.value(file(gff, checkIfExists: true)).map { [[:], it] }
79+
ch_gff = Channel.value(file(gff, checkIfExists: true))
8280
}
8381

84-
ch_gtf = GFFREAD(ch_gff, []).gtf.map { it[1] }
82+
extension = (gff - '.gz').tokenize('.')[-1]
83+
id = gff.toString() - '.gz' - ".${extension}"
84+
85+
ch_gtf = GFFREAD(ch_gff.map{[[id:id], it]}, []).gtf.map { it[1] }
86+
8587
ch_versions = ch_versions.mix(GFFREAD.out.versions)
8688
}
8789

@@ -91,9 +93,10 @@ workflow PREPARE_GENOME {
9193
ch_blacklist = Channel.empty()
9294
if (blacklist) {
9395
if (blacklist.endsWith('.gz')) {
94-
ch_blacklist = GUNZIP_BLACKLIST ( [ [:], blacklist ] ).gunzip.map{ it[1] }
95-
ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
96-
} else {
96+
ch_blacklist = GUNZIP_BLACKLIST([[:], blacklist]).gunzip.map { it[1] }
97+
ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
98+
}
99+
else {
97100
ch_blacklist = Channel.value(file(blacklist))
98101
}
99102
}
@@ -107,38 +110,41 @@ workflow PREPARE_GENOME {
107110
def make_bed = false
108111
if (!gene_bed) {
109112
make_bed = true
110-
} else if (genome && gtf) {
111-
if (genomes[ genome ].gtf != gtf) {
113+
}
114+
else if (genome && gtf) {
115+
if (genomes[genome].gtf != gtf) {
112116
make_bed = true
113117
}
114118
}
115119

116120
if (make_bed) {
117-
ch_gene_bed = GTF2BED ( ch_gtf ).bed
121+
ch_gene_bed = GTF2BED(ch_gtf).bed
118122
ch_versions = ch_versions.mix(GTF2BED.out.versions)
119-
} else {
123+
}
124+
else {
120125
if (gene_bed.endsWith('.gz')) {
121-
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map{ it[1] }
126+
ch_gene_bed = GUNZIP_GENE_BED([[:], gene_bed]).gunzip.map { it[1] }
122127
ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
123-
} else {
128+
}
129+
else {
124130
ch_gene_bed = Channel.value(file(gene_bed))
125131
}
126132
}
127133

128134
//
129135
// Create chromosome sizes file
130136
//
131-
CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
137+
CUSTOM_GETCHROMSIZES(ch_fasta.map { [[:], it] })
132138
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
133-
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map{ it[1] }
134-
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
139+
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
140+
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
135141

136142
//
137143
// Prepare genome intervals for filtering by removing regions in blacklist file
138144
//
139145
ch_genome_filtered_bed = Channel.empty()
140146

141-
GENOME_BLACKLIST_REGIONS (
147+
GENOME_BLACKLIST_REGIONS(
142148
ch_chrom_sizes,
143149
ch_blacklist.ifEmpty([])
144150
)
@@ -152,14 +158,16 @@ workflow PREPARE_GENOME {
152158
if (prepare_tool_index == 'bwa') {
153159
if (bwa_index) {
154160
if (bwa_index.endsWith('.tar.gz')) {
155-
ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], bwa_index ] ).untar
156-
ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
157-
} else {
158-
ch_bwa_index = [ [:], file(bwa_index) ]
161+
ch_bwa_index = UNTAR_BWA_INDEX([[:], bwa_index]).untar
162+
ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
163+
}
164+
else {
165+
ch_bwa_index = [[:], file(bwa_index)]
159166
}
160-
} else {
161-
ch_bwa_index = BWA_INDEX ( ch_fasta.map { [ [:], it ] } ).index
162-
ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
167+
}
168+
else {
169+
ch_bwa_index = BWA_INDEX(ch_fasta.map { [[:], it] }).index
170+
ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
163171
}
164172
}
165173

@@ -170,14 +178,16 @@ workflow PREPARE_GENOME {
170178
if (prepare_tool_index == 'bowtie2') {
171179
if (bowtie2_index) {
172180
if (bowtie2_index.endsWith('.tar.gz')) {
173-
ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], bowtie2_index ] ).untar
174-
ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
175-
} else {
176-
ch_bowtie2_index = [ [:], file(bowtie2_index) ]
181+
ch_bowtie2_index = UNTAR_BOWTIE2_INDEX([[:], bowtie2_index]).untar
182+
ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
183+
}
184+
else {
185+
ch_bowtie2_index = [[:], file(bowtie2_index)]
177186
}
178-
} else {
179-
ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta.map { [ [:], it ] } ).index
180-
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
187+
}
188+
else {
189+
ch_bowtie2_index = BOWTIE2_BUILD(ch_fasta.map { [[:], it] }).index
190+
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
181191
}
182192
}
183193

@@ -188,14 +198,16 @@ workflow PREPARE_GENOME {
188198
if (prepare_tool_index == 'chromap') {
189199
if (chromap_index) {
190200
if (chromap_index.endsWith('.tar.gz')) {
191-
ch_chromap_index = UNTARFILES ( [ [:], chromap_index ] ).files
192-
ch_versions = ch_versions.mix(UNTARFILES.out.versions)
193-
} else {
194-
ch_chromap_index = [ [:], file(chromap_index) ]
201+
ch_chromap_index = UNTARFILES([[:], chromap_index]).files
202+
ch_versions = ch_versions.mix(UNTARFILES.out.versions)
203+
}
204+
else {
205+
ch_chromap_index = [[:], file(chromap_index)]
195206
}
196-
} else {
197-
ch_chromap_index = CHROMAP_INDEX ( ch_fasta.map { [ [:], it ] } ).index
198-
ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
207+
}
208+
else {
209+
ch_chromap_index = CHROMAP_INDEX(ch_fasta.map { [[:], it] }).index
210+
ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
199211
}
200212
}
201213

@@ -206,27 +218,29 @@ workflow PREPARE_GENOME {
206218
if (prepare_tool_index == 'star') {
207219
if (star_index) {
208220
if (star_index.endsWith('.tar.gz')) {
209-
ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map{ it[1] }
210-
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
211-
} else {
221+
ch_star_index = UNTAR_STAR_INDEX([[:], star_index]).untar.map { it[1] }
222+
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
223+
}
224+
else {
212225
ch_star_index = Channel.value(file(star_index))
213226
}
214-
} else {
215-
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index
216-
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
227+
}
228+
else {
229+
ch_star_index = STAR_GENOMEGENERATE(ch_fasta, ch_gtf).index
230+
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
217231
}
218232
}
219233

220234
emit:
221-
fasta = ch_fasta // path: genome.fasta
222-
fai = ch_fai // path: genome.fai
223-
gtf = ch_gtf // path: genome.gtf
224-
gene_bed = ch_gene_bed // path: gene.bed
225-
chrom_sizes = ch_chrom_sizes // path: genome.sizes
226-
filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
227-
bwa_index = ch_bwa_index // path: bwa/index/
228-
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
229-
chromap_index = ch_chromap_index // path: genome.index
230-
star_index = ch_star_index // path: star/index/
231-
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
235+
fasta = ch_fasta // path: genome.fasta
236+
fai = ch_fai // path: genome.fai
237+
gtf = ch_gtf // path: genome.gtf
238+
gene_bed = ch_gene_bed // path: gene.bed
239+
chrom_sizes = ch_chrom_sizes // path: genome.sizes
240+
filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
241+
bwa_index = ch_bwa_index // path: bwa/index/
242+
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
243+
chromap_index = ch_chromap_index // path: genome.index
244+
star_index = ch_star_index // path: star/index/
245+
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
232246
}

0 commit comments

Comments
 (0)