Skip to content

Commit 99a662b

Browse files
authored
Merge pull request #910 from drpatelh/strand
Add 'auto' option to auto-detect strandedness
2 parents 1160e14 + 09893dc commit 99a662b

File tree

20 files changed

+331
-41
lines changed

20 files changed

+331
-41
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88
### Enhancements & fixes
99

1010
- Bump minimum Nextflow version from `21.10.3` -> `22.10.1`
11-
- Updated pipeline template to [nf-core/tools 2.7.1](https://github.com/nf-core/tools/releases/tag/2.7.1)
11+
- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2)
12+
- [[#729](https://github.com/nf-core/rnaseq/issues/729)] - Add 'auto' option to samplesheet to automatically detect strandedness for samples
1213
- [[#891](https://github.com/nf-core/rnaseq/issues/891)] - Skip MarkDuplicates when UMIs are used
1314
- [[#896](https://github.com/nf-core/rnaseq/issues/896)] - Remove `copyTo` call for iGenomes README
1415
- [[#897](https://github.com/nf-core/rnaseq/issues/897)] - Use `--skip_preseq` by default

CITATIONS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
> Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677.
2424
25+
- [fq](https://github.com/stjude-rust-labs/fq)
26+
2527
- [GffRead](https://pubmed.ncbi.nlm.nih.gov/32489650/)
2628

2729
> Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: 10.12688/f1000research.23297.2. eCollection 2020. PubMed PMID: 32489650; PubMed Central PMCID: PMC7222033.

bin/check_samplesheet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def check_samplesheet(file_in, file_out):
9999
)
100100

101101
## Check strandedness
102-
strandednesses = ["unstranded", "forward", "reverse"]
102+
strandednesses = ["unstranded", "forward", "reverse", "auto"]
103103
if strandedness:
104104
if strandedness not in strandednesses:
105105
print_error(

conf/modules.config

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,33 @@ if (!params.skip_bbsplit && params.bbsplit_fasta_list) {
166166
}
167167
}
168168

169+
//
170+
// Read subsampling and strand inferring options
171+
//
172+
173+
process {
174+
withName: 'FQ_SUBSAMPLE' {
175+
ext.args = '--record-count 1000000 --seed 1'
176+
ext.prefix = { "${meta.id}.subsampled" }
177+
publishDir = [
178+
path: { "${params.outdir}/sample_fastq/fastq" },
179+
mode: params.publish_dir_mode,
180+
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
181+
enabled: false
182+
]
183+
}
184+
185+
withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' {
186+
ext.args = '--skipQuant'
187+
publishDir = [
188+
path: { "${params.outdir}/sample_fastq/salmon" },
189+
mode: params.publish_dir_mode,
190+
saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename },
191+
enabled: false
192+
]
193+
}
194+
}
195+
169196
//
170197
// Read QC and trimming options
171198
//
@@ -546,7 +573,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
546573
publishDir = [
547574
path: { "${params.outdir}/${params.aligner}" },
548575
mode: params.publish_dir_mode,
549-
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
576+
saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename }
550577
]
551578
}
552579

@@ -1051,7 +1078,7 @@ if (params.pseudo_aligner == 'salmon') {
10511078
publishDir = [
10521079
path: { "${params.outdir}/${params.pseudo_aligner}" },
10531080
mode: params.publish_dir_mode,
1054-
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
1081+
saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename }
10551082
]
10561083
}
10571084

conf/test.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ params {
2020
max_time = '6.h'
2121

2222
// Input data
23-
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.4/samplesheet_test.csv'
23+
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.10/samplesheet_test.csv'
2424

2525
// Genome references
2626
fasta = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fasta'

conf/test_full.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ params {
1515
config_profile_description = 'Full test dataset to check pipeline function'
1616

1717
// Parameters for full-size test
18-
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.4/samplesheet_full.csv'
18+
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.10/samplesheet_full.csv'
1919
genome = 'GRCh37'
2020
pseudo_aligner = 'salmon'
2121
}

docs/usage.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ You will need to create a samplesheet with information about the samples you wou
1414

1515
### Multiple runs of the same sample
1616

17-
The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
17+
The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes. If you set the strandedness value to `auto` the pipeline will sub-sample the input FastQ files to 1 million reads, use Salmon Quant to infer the strandedness automatically and then propagate this information to the remainder of the pipeline. If the strandedness has been inferred or provided incorrectly a warning will be present at the top of the MultiQC report so please be sure to check when looking at the QC for your samples.
1818

1919
```console
2020
sample,fastq_1,fastq_2,strandedness
21-
CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,unstranded
22-
CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,unstranded
23-
CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,unstranded
21+
CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto
22+
CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto
23+
CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto
2424
```
2525

2626
### Full samplesheet
@@ -45,7 +45,7 @@ TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,reverse
4545
| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
4646
| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
4747
| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
48-
| `strandedness` | Sample strand-specificity. Must be one of `unstranded`, `forward` or `reverse`. |
48+
| `strandedness` | Sample strand-specificity. Must be one of `unstranded`, `forward`, `reverse` or `auto`. |
4949

5050
An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
5151

lib/WorkflowRnaseq.groovy

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// This file holds several functions specific to the workflow/rnaseq.nf in the nf-core/rnaseq pipeline
33
//
44

5+
import groovy.json.JsonSlurper
56
import groovy.text.SimpleTemplateEngine
67

78
class WorkflowRnaseq {
@@ -164,6 +165,24 @@ class WorkflowRnaseq {
164165
}
165166
}
166167

168+
//
169+
// Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness
170+
//
171+
public static String getSalmonInferredStrandedness(json_file) {
172+
def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0]
173+
def strandedness = 'reverse'
174+
if (lib_type) {
175+
if (lib_type in ['U', 'IU']) {
176+
strandedness = 'unstranded'
177+
} else if (lib_type in ['SF', 'ISF']) {
178+
strandedness = 'forward'
179+
} else if (lib_type in ['SR', 'ISR']) {
180+
strandedness = 'reverse'
181+
}
182+
}
183+
return strandedness
184+
}
185+
167186
//
168187
// Function that parses TrimGalore log output file to get total number of reads after trimming
169188
//

modules.json

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
3232
"installed_by": ["fastq_fastqc_umitools_trimgalore"]
3333
},
34+
"fq/subsample": {
35+
"branch": "master",
36+
"git_sha": "ad462aa294faf9a8c42688a08daf81a580594f70",
37+
"installed_by": ["modules", "fastq_subsample_fq_salmon"]
38+
},
3439
"gffread": {
3540
"branch": "master",
3641
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
@@ -123,13 +128,13 @@
123128
},
124129
"salmon/index": {
125130
"branch": "master",
126-
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
131+
"git_sha": "94b06f1683ddf893cf06525f6e7f0573ad8fbf83",
127132
"installed_by": ["modules"]
128133
},
129134
"salmon/quant": {
130135
"branch": "master",
131-
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
132-
"installed_by": ["modules"]
136+
"git_sha": "94b06f1683ddf893cf06525f6e7f0573ad8fbf83",
137+
"installed_by": ["modules", "fastq_subsample_fq_salmon"]
133138
},
134139
"samtools/flagstat": {
135140
"branch": "master",
@@ -262,6 +267,11 @@
262267
"branch": "master",
263268
"git_sha": "b51a69e30973c71950225c817ad07a3337d22c40",
264269
"installed_by": ["subworkflows"]
270+
},
271+
"fastq_subsample_fq_salmon": {
272+
"branch": "master",
273+
"git_sha": "0098bc93f6219c6194f443f0feb089ba83717384",
274+
"installed_by": ["subworkflows"]
265275
}
266276
}
267277
}

modules/local/dupradar.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ process DUPRADAR {
22
tag "$meta.id"
33
label 'process_long'
44

5-
conda "bioconda::bioconductor-dupradar=1.18.0"
5+
conda "bioconda::bioconductor-dupradar=1.28.0"
66
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7-
'https://depot.galaxyproject.org/singularity/bioconductor-dupradar:1.18.0--r40_1' :
8-
'quay.io/biocontainers/bioconductor-dupradar:1.18.0--r40_1' }"
7+
'https://depot.galaxyproject.org/singularity/bioconductor-dupradar:1.28.0--r42hdfd78af_0' :
8+
'quay.io/biocontainers/bioconductor-dupradar:1.28.0--r42hdfd78af_0' }"
99

1010
input:
1111
tuple val(meta), path(bam)

0 commit comments

Comments
 (0)