Skip to content

Commit 88f61f0

Browse files
Merge pull request #810 from drpatelh/updates
Auto-detect whether using AWS iGenome and run appropriate STAR version
2 parents 70d2ad5 + 5169c05 commit 88f61f0

File tree

8 files changed

+80
-31
lines changed

8 files changed

+80
-31
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
- [[#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer
1111
- [[#791](https://github.com/nf-core/rnaseq/issues/791)] - Add outputs for umitools dedup summary stats
12+
- [[#808](https://github.com/nf-core/rnaseq/issues/808)] - Auto-detect usage of Illumina iGenomes reference
1213
- Updated pipeline template to [nf-core/tools 2.3.2](https://github.com/nf-core/tools/releases/tag/2.3.2)
1314

1415
### Parameters

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ![nf-core/rnaseq](docs/images/nf-core/rnaseq_logo_light.png#gh-light-mode-only) ![nf-core/rnaseq](docs/images/nf-core/rnaseq_logo_dark.png#gh-dark-mode-only)
1+
# ![nf-core/rnaseq](docs/images/nf-core-rnaseq_logo_light.png#gh-light-mode-only) ![nf-core/rnaseq](docs/images/nf-core-rnaseq_logo_dark.png#gh-dark-mode-only)
22

33
[![GitHub Actions CI Status](https://github.com/nf-core/rnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/rnaseq/actions?query=workflow%3A%22nf-core+CI%22)
44
[![GitHub Actions Linting Status](https://github.com/nf-core/rnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/rnaseq/actions?query=workflow%3A%22nf-core+linting%22)

conf/test.config

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,14 @@ params {
2323
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.4/samplesheet_test.csv'
2424

2525
// Genome references
26-
fasta = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa'
26+
fasta = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fasta'
2727
gtf = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genes.gtf.gz'
2828
gff = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genes.gff.gz'
2929
transcript_fasta = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/transcriptome.fasta'
3030
additional_fasta = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/gfp.fa.gz'
3131

3232
bbsplit_fasta_list = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/bbsplit_fasta_list.txt'
3333
hisat2_index = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/hisat2.tar.gz'
34-
star_index = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/star.tar.gz'
3534
salmon_index = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/salmon.tar.gz'
3635
rsem_index = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/rsem.tar.gz'
3736

modules/local/star_align.nf

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,54 @@ process STAR_ALIGN {
22
tag "$meta.id"
33
label 'process_high'
44

5-
// Note: 2.7X indices incompatible with AWS iGenomes.
6-
conda (params.enable_conda ? "bioconda::star=2.6.1d" : null)
5+
conda (params.enable_conda ? conda_str : null)
76
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
8-
'https://depot.galaxyproject.org/singularity/star:2.6.1d--0' :
9-
'quay.io/biocontainers/star:2.6.1d--0' }"
7+
"https://depot.galaxyproject.org/singularity/${container_id}" :
8+
"quay.io/biocontainers/${container_id}" }"
109

1110
input:
1211
tuple val(meta), path(reads)
13-
path index
14-
path gtf
12+
path index
13+
path gtf
14+
val star_ignore_sjdbgtf
15+
val seq_platform
16+
val seq_center
17+
val is_aws_igenome
1518

1619
output:
1720
tuple val(meta), path('*d.out.bam') , emit: bam
1821
tuple val(meta), path('*Log.final.out') , emit: log_final
1922
tuple val(meta), path('*Log.out') , emit: log_out
2023
tuple val(meta), path('*Log.progress.out'), emit: log_progress
21-
path "versions.yml" , emit: versions
24+
path "versions.yml" , emit: versions
2225

2326
tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted
2427
tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript
2528
tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted
2629
tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq
2730
tuple val(meta), path('*.tab') , optional:true, emit: tab
31+
tuple val(meta), path('*.out.junction') , optional:true, emit: junction
32+
tuple val(meta), path('*.out.sam') , optional:true, emit: sam
2833

2934
when:
3035
task.ext.when == null || task.ext.when
3136

3237
script:
3338
def args = task.ext.args ?: ''
3439
def prefix = task.ext.prefix ?: "${meta.id}"
35-
def ignore_gtf = params.star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
36-
def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'"
37-
def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted'
40+
41+
// Note: 2.7X indices incompatible with AWS iGenomes so use older STAR version
42+
conda_str = "bioconda::star=2.7.9a bioconda::samtools=1.13 conda-forge::gawk=5.1.0"
43+
container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:a7908dfb0485a80ca94e4d17b0ac991532e4e989-0'
44+
if (is_aws_igenome) {
45+
conda_str = "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0"
46+
container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0'
47+
}
48+
49+
def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
50+
def seq_platform = seq_platform ? "'PL:$seq_platform'" : ""
51+
def seq_center = seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$seq_center' 'SM:$prefix' $seq_platform " : "--outSAMattrRGline ID:$prefix 'SM:$prefix' $seq_platform "
52+
def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted'
3853
def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : ''
3954
"""
4055
STAR \\
@@ -61,6 +76,8 @@ process STAR_ALIGN {
6176
cat <<-END_VERSIONS > versions.yml
6277
"${task.process}":
6378
star: \$(STAR --version | sed -e "s/STAR_//g")
79+
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
80+
gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
6481
END_VERSIONS
6582
"""
6683
}

modules/local/star_genomegenerate.nf

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@ process STAR_GENOMEGENERATE {
22
tag "$fasta"
33
label 'process_high'
44

5-
// Note: 2.7X indices incompatible with AWS iGenomes.
6-
conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null)
5+
conda (params.enable_conda ? conda_str : null)
76
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
8-
'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' :
9-
'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }"
7+
"https://depot.galaxyproject.org/singularity/${container_id}" :
8+
"quay.io/biocontainers/${container_id}" }"
109

1110
input:
1211
path fasta
1312
path gtf
13+
val is_aws_igenome
1414

1515
output:
1616
path "star" , emit: index
@@ -20,9 +20,19 @@ process STAR_GENOMEGENERATE {
2020
task.ext.when == null || task.ext.when
2121

2222
script:
23-
def args = (task.ext.args ?: '').tokenize()
23+
def args = task.ext.args ?: ''
24+
def args_list = args.tokenize()
25+
26+
// Note: 2.7X indices incompatible with AWS iGenomes so use older STAR version
27+
conda_str = "bioconda::star=2.7.9a bioconda::samtools=1.13 conda-forge::gawk=5.1.0"
28+
container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:a7908dfb0485a80ca94e4d17b0ac991532e4e989-0'
29+
if (is_aws_igenome) {
30+
conda_str = "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0"
31+
container_id = 'mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0'
32+
}
33+
2434
def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
25-
if (args.contains('--genomeSAindexNbases')) {
35+
if (args_list.contains('--genomeSAindexNbases')) {
2636
"""
2737
mkdir star
2838
STAR \\
@@ -32,11 +42,13 @@ process STAR_GENOMEGENERATE {
3242
--sjdbGTFfile $gtf \\
3343
--runThreadN $task.cpus \\
3444
$memory \\
35-
${args.join(' ')}
45+
$args
3646
3747
cat <<-END_VERSIONS > versions.yml
3848
"${task.process}":
3949
star: \$(STAR --version | sed -e "s/STAR_//g")
50+
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
51+
gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
4052
END_VERSIONS
4153
"""
4254
} else {
@@ -53,11 +65,13 @@ process STAR_GENOMEGENERATE {
5365
--runThreadN $task.cpus \\
5466
--genomeSAindexNbases \$NUM_BASES \\
5567
$memory \\
56-
${args.join(' ')}
68+
$args
5769
5870
cat <<-END_VERSIONS > versions.yml
5971
"${task.process}":
6072
star: \$(STAR --version | sed -e "s/STAR_//g")
73+
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
74+
gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
6175
END_VERSIONS
6276
"""
6377
}

subworkflows/local/align_star.nf

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,13 @@ include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools'
77

88
workflow ALIGN_STAR {
99
take:
10-
reads // channel: [ val(meta), [ reads ] ]
11-
index // channel: /path/to/star/index/
12-
gtf // channel: /path/to/genome.gtf
10+
reads // channel: [ val(meta), [ reads ] ]
11+
index // channel: /path/to/star/index/
12+
gtf // channel: /path/to/genome.gtf
13+
star_ignore_sjdbgtf // boolean: when using pre-built STAR indices do not re-extract and use splice junctions from the GTF file
14+
seq_platform // string : sequencing platform
15+
seq_center // string : sequencing center
16+
is_aws_igenome // boolean: whether the genome files are from AWS iGenomes
1317

1418
main:
1519

@@ -18,7 +22,7 @@ workflow ALIGN_STAR {
1822
//
1923
// Map reads with STAR
2024
//
21-
STAR_ALIGN ( reads, index, gtf )
25+
STAR_ALIGN ( reads, index, gtf, star_ignore_sjdbgtf, seq_platform, seq_center, is_aws_igenome )
2226
ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first())
2327

2428
//

subworkflows/local/prepare_genome.nf

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'
3131

3232
workflow PREPARE_GENOME {
3333
take:
34-
prepare_tool_indices // list : tools to prepare indices for
35-
biotype // string: if additional fasta file is provided
36-
// biotype value to use when appending entries to GTF file
34+
prepare_tool_indices // list : tools to prepare indices for
35+
biotype // string : if additional fasta file is provided biotype value to use when appending entries to GTF file
36+
is_aws_igenome // boolean: whether the genome files are from AWS iGenomes
3737

3838
main:
3939

@@ -166,7 +166,7 @@ workflow PREPARE_GENOME {
166166
ch_star_index = file(params.star_index)
167167
}
168168
} else {
169-
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index
169+
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf, is_aws_igenome ).index
170170
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
171171
}
172172
}

workflows/rnaseq.nf

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ if (anno_readme && file(anno_readme).exists()) {
6666
// Stage dummy file to be used as an optional input where required
6767
ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)
6868

69+
// Check if an AWS iGenome has been provided to use the appropriate version of STAR
70+
def is_aws_igenome = false
71+
if (params.fasta && params.gtf) {
72+
if ((file(params.fasta).getName() - '.gz' == 'genome.fa') && (file(params.gtf).getName() - '.gz' == 'genes.gtf')) {
73+
is_aws_igenome = true
74+
}
75+
}
76+
6977
/*
7078
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7179
CONFIG FILES
@@ -162,7 +170,9 @@ workflow RNASEQ {
162170
def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type
163171
PREPARE_GENOME (
164172
prepareToolIndices,
165-
biotype
173+
biotype,
174+
is_aws_igenome
175+
166176
)
167177
ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
168178

@@ -267,7 +277,11 @@ workflow RNASEQ {
267277
ALIGN_STAR (
268278
ch_filtered_reads,
269279
PREPARE_GENOME.out.star_index,
270-
PREPARE_GENOME.out.gtf
280+
PREPARE_GENOME.out.gtf,
281+
params.star_ignore_sjdbgtf,
282+
'',
283+
params.seq_center ?: '',
284+
is_aws_igenome
271285
)
272286
ch_genome_bam = ALIGN_STAR.out.bam
273287
ch_genome_bam_index = ALIGN_STAR.out.bai

0 commit comments

Comments
 (0)