Skip to content

Commit 96f6988

Browse files
committed
Fix #825
1 parent d5034fa commit 96f6988

File tree

8 files changed

+115
-8
lines changed

8 files changed

+115
-8
lines changed

CHANGELOG.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,25 @@
33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
55

6-
## [Unpublished Version / DEV]
6+
## [[3.8](https://github.com/nf-core/rnaseq/releases/tag/3.8)] - 2022-05-25
7+
8+
### :warning: Major enhancements
9+
10+
Fixed quite a well hidden bug in the UMI processing mode of the pipeline when using `--with_umi --aligner star_salmon` as reported by [Lars Roed Ingerslev](https://github.com/lars-work-sund). Paired-end BAM files were not appropriately name sorted after `umi_tools dedup` which ultimately resulted in incorrect reading and quantification with Salmon. If you have used previous versions of the pipeline to analyse paired-end UMI data it will need to be reprocessed using this version of the pipeline. See [#828](https://github.com/nf-core/rnaseq/issues/828) for more context.
711

812
### Enhancements & fixes
913

1014
- [[#824](https://github.com/nf-core/rnaseq/issues/824)] - Add explicit docs for usage of featureCounts in the pipeline
15+
- [[#825](https://github.com/nf-core/rnaseq/issues/825)] - Pipeline fails due to trimming related removal of all reads from a sample
1116
- [[#828](https://github.com/nf-core/rnaseq/issues/828)] - Filter BAM output of UMI-tools dedup before passing to Salmon quant
1217
- Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1)
1318

1419
### Parameters
1520

21+
| Old parameter | New parameter |
22+
| ------------- | --------------------- |
23+
| | `--min_trimmed_reads` |
24+
1625
## [[3.7](https://github.com/nf-core/rnaseq/releases/tag/3.7)] - 2022-05-03
1726

1827
### :warning: Major enhancements

assets/multiqc_config.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ run_modules:
2828

2929
# Order of modules
3030
top_modules:
31+
- "fail_trimmed_samples"
3132
- "fail_mapped_samples"
3233
- "fail_strand_check"
3334
- "star_rsem_deseq2_pca"
@@ -140,6 +141,15 @@ sp:
140141

141142
# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml
142143
custom_data:
144+
fail_trimmed_samples:
145+
section_name: "WARNING: Fail Trimming Check"
146+
description: "List of samples that failed the minimum trimmed reads threshold specified via the '--min_trimmed_reads' parameter, and hence were ignored for the downstream processing steps."
147+
plot_type: "table"
148+
pconfig:
149+
id: "fail_trimmed_samples_table"
150+
table_title: "Samples failed trimming threshold"
151+
namespace: "Samples failed trimming threshold"
152+
format: "{:.0f}"
143153
fail_mapped_samples:
144154
section_name: "WARNING: Fail Alignment Check"
145155
description: "List of samples that failed the STAR minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps."

conf/modules.config

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,13 @@ if (!params.skip_trimming) {
204204
]
205205
]
206206
}
207+
208+
withName: 'MULTIQC_TSV_FAIL_TRIMMED' {
209+
publishDir = [
210+
path: { "${params.outdir}/multiqc" },
211+
enabled: false
212+
]
213+
}
207214
}
208215
}
209216

lib/WorkflowRnaseq.groovy

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,23 @@ class WorkflowRnaseq {
152152
}
153153
}
154154

155+
//
156+
// Function that parses TrimGalore log output file to get total number of reads after trimming
157+
//
158+
public static Integer getTrimGaloreReadsAfterFiltering(log_file) {
159+
def total_reads = 0
160+
def filtered_reads = 0
161+
log_file.eachLine { line ->
162+
def total_reads_matcher = line =~ /([\d\.]+)\ssequences processed in total/
163+
def se_filtered_reads_matcher = line =~ /shorter than the length cutoff of\s[\d\.]+\sbp:\s([\d\.]+)/
164+
def pe_filtered_reads_matcher = line =~ /shorter than the length cutoff\s\([\d\.]+\sbp\):\s([\d\.]+)/
165+
if (total_reads_matcher) total_reads = total_reads_matcher[0][1].toFloat()
166+
if (se_filtered_reads_matcher) filtered_reads = se_filtered_reads_matcher[0][1].toFloat()
167+
if (pe_filtered_reads_matcher) filtered_reads = pe_filtered_reads_matcher[0][1].toFloat()
168+
}
169+
return total_reads - filtered_reads
170+
}
171+
155172
//
156173
// Function that parses and returns the alignment rate from the STAR log output
157174
//

modules/local/multiqc.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ process MULTIQC {
1111
path multiqc_custom_config
1212
path software_versions
1313
path workflow_summary
14+
path fail_trimming_summary
1415
path fail_mapping_summary
1516
path fail_strand_check
1617
path ('fastqc/*')

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ params {
3333
save_umi_intermeds = false
3434

3535
// Trimming
36+
min_trimmed_reads = 10000
3637
clip_r1 = null
3738
clip_r2 = null
3839
three_prime_clip_r1 = null

nextflow_schema.json

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
"type": "object",
1111
"fa_icon": "fas fa-terminal",
1212
"description": "Define where the pipeline should find input data and save output data.",
13-
"required": ["outdir"],
13+
"required": [
14+
"outdir"
15+
],
1416
"properties": {
1517
"input": {
1618
"type": "string",
@@ -329,6 +331,12 @@
329331
"help_text": "This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases.",
330332
"fa_icon": "fas fa-cut"
331333
},
334+
"min_trimmed_reads": {
335+
"type": "integer",
336+
"default": 10000,
337+
"fa_icon": "fas fa-hand-paper",
338+
"description": "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low."
339+
},
332340
"skip_trimming": {
333341
"type": "boolean",
334342
"description": "Skip the adapter trimming step.",
@@ -354,13 +362,19 @@
354362
"default": "star_salmon",
355363
"description": "Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.",
356364
"fa_icon": "fas fa-map-signs",
357-
"enum": ["star_salmon", "star_rsem", "hisat2"]
365+
"enum": [
366+
"star_salmon",
367+
"star_rsem",
368+
"hisat2"
369+
]
358370
},
359371
"pseudo_aligner": {
360372
"type": "string",
361373
"description": "Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.",
362374
"fa_icon": "fas fa-hamburger",
363-
"enum": ["salmon"]
375+
"enum": [
376+
"salmon"
377+
]
364378
},
365379
"bam_csi_index": {
366380
"type": "boolean",
@@ -596,7 +610,14 @@
596610
"description": "Method used to save pipeline results to output directory.",
597611
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
598612
"fa_icon": "fas fa-copy",
599-
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
613+
"enum": [
614+
"symlink",
615+
"rellink",
616+
"link",
617+
"copy",
618+
"copyNoFollow",
619+
"move"
620+
],
600621
"hidden": true
601622
},
602623
"email_on_fail": {

workflows/rnaseq.nf

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ include { DUPRADAR } from '../modules/local/dupradar'
106106
include { MULTIQC } from '../modules/local/multiqc'
107107
include { MULTIQC_CUSTOM_BIOTYPE } from '../modules/local/multiqc_custom_biotype'
108108
include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_FAIL_MAPPED } from '../modules/local/multiqc_tsv_from_list'
109+
include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_FAIL_TRIMMED } from '../modules/local/multiqc_tsv_from_list'
109110
include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_STRAND_CHECK } from '../modules/local/multiqc_tsv_from_list'
110111

111112
//
@@ -194,8 +195,9 @@ workflow RNASEQ {
194195
.reads
195196
.map {
196197
meta, fastq ->
197-
meta.id = meta.id.split('_')[0..-2].join('_')
198-
[ meta, fastq ]
198+
def meta_clone = meta.clone()
199+
meta_clone.id = meta_clone.id.split('_')[0..-2].join('_')
200+
[ meta_clone, fastq ]
199201
}
200202
.groupTuple(by: [0])
201203
.branch {
@@ -233,9 +235,47 @@ workflow RNASEQ {
233235
ch_versions = ch_versions.mix(FASTQC_UMITOOLS_TRIMGALORE.out.versions)
234236

235237
//
236-
// MODULE: Remove genome contaminant reads
238+
// Filter channels to get samples that passed minimum trimmed read count
237239
//
240+
ch_fail_trimming_multiqc = Channel.empty()
238241
ch_filtered_reads = FASTQC_UMITOOLS_TRIMGALORE.out.reads
242+
if (!params.skip_trimming) {
243+
ch_filtered_reads
244+
.join(FASTQC_UMITOOLS_TRIMGALORE.out.trim_log)
245+
.map {
246+
meta, reads, trim_log ->
247+
if (!meta.single_end) {
248+
trim_log = trim_log[-1]
249+
}
250+
num_reads = WorkflowRnaseq.getTrimGaloreReadsAfterFiltering(trim_log)
251+
[ meta, reads, num_reads ]
252+
}
253+
.set { ch_num_trimmed_reads }
254+
255+
ch_num_trimmed_reads
256+
.map { meta, reads, num_reads -> if (num_reads > params.min_trimmed_reads) [ meta, reads ] }
257+
.set { ch_filtered_reads }
258+
259+
ch_num_trimmed_reads
260+
.map {
261+
meta, reads, num_reads ->
262+
if (num_reads <= params.min_trimmed_reads) {
263+
return [ "$meta.id\t$num_reads" ]
264+
}
265+
}
266+
.set { ch_num_trimmed_reads }
267+
268+
MULTIQC_TSV_FAIL_TRIMMED (
269+
ch_num_trimmed_reads.collect(),
270+
["Sample", "Reads after trimming"],
271+
'fail_trimmed_samples'
272+
)
273+
.set { ch_fail_trimming_multiqc }
274+
}
275+
276+
//
277+
// MODULE: Remove genome contaminant reads
278+
//
239279
if (!params.skip_bbsplit) {
240280
BBMAP_BBSPLIT (
241281
ch_filtered_reads,
@@ -726,6 +766,7 @@ workflow RNASEQ {
726766
ch_multiqc_custom_config.collect().ifEmpty([]),
727767
CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(),
728768
ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'),
769+
ch_fail_trimming_multiqc.ifEmpty([]),
729770
ch_fail_mapping_multiqc.ifEmpty([]),
730771
ch_fail_strand_multiqc.ifEmpty([]),
731772
FASTQC_UMITOOLS_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]),

0 commit comments

Comments
 (0)