Fix merge conflicts

drpatelh · drpatelh · commit c6527795390d · 2022-12-19T22:53:16.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Bump minimum Nextflow version from `21.10.3` -> `22.10.1`
 - Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2)
 - [[#729](https://github.com/nf-core/rnaseq/issues/729)] - Add 'auto' option to samplesheet to automatically detect strandedness for samples
+- [[#891](https://github.com/nf-core/rnaseq/issues/891)] - Skip MarkDuplicates when UMIs are used
 - [[#896](https://github.com/nf-core/rnaseq/issues/896)] - Remove `copyTo` call for iGenomes README
 - [[#897](https://github.com/nf-core/rnaseq/issues/897)] - Use `--skip_preseq` by default
 - [[#900](https://github.com/nf-core/rnaseq/issues/900)] - Add `--recursive` option to `fastq_dir_to_samplesheet.py` script
diff --git a/conf/modules.config b/conf/modules.config
@@ -358,7 +358,7 @@ if (!params.skip_alignment) {
         }
     }
 
-    if (!params.skip_markduplicates) {
+    if (!params.skip_markduplicates && !params.with_umi) {
         process {
             withName: '.*:BAM_MARKDUPLICATES_PICARD:PICARD_MARKDUPLICATES' {
                 ext.args   = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'
@@ -419,7 +419,7 @@ if (!params.skip_alignment) {
                         pattern: '*.bam',
                         enabled: (
                             params.save_align_intermeds ||
-                            params.skip_markduplicates ||
+                            params.with_umi ||
                             params.save_umi_intermeds
                         )
                     ]
@@ -435,7 +435,7 @@ if (!params.skip_alignment) {
                     pattern: '*.{bai,csi}',
                     enabled: (
                         params.save_align_intermeds ||
-                        params.skip_markduplicates ||
+                        params.with_umi ||
                         params.save_umi_intermeds
                     )
                 ]
diff --git a/docs/output.md b/docs/output.md
@@ -292,7 +292,7 @@ After extracting the UMI information from the read sequence (see [UMI-tools extr
 
 </details>
 
-Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. By default, the pipeline uses [picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) to _mark_ the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. However, for RNA-seq data it is not recommended to physically remove duplicate reads from the alignments (unless you are using UMIs) because you expect a significant level of true biological duplication that arises from the same fragments being sequenced from for example highly expressed genes. You can skip this step via the `--skip_markduplicates` parameter.
+Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. By default, the pipeline uses [picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) to _mark_ the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. However, for RNA-seq data it is not recommended to physically remove duplicate reads from the alignments (unless you are using UMIs) because you expect a significant level of true biological duplication that arises from the same fragments being sequenced from for example highly expressed genes. This step will be skipped automatically when using the `--with_umi` option or explicitly via the `--skip_markduplicates` parameter.
 
 ![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_markduplicates.png)
 
diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf
@@ -591,7 +591,7 @@ workflow RNASEQ {
     // SUBWORKFLOW: Mark duplicate reads
     //
     ch_markduplicates_multiqc = Channel.empty()
-    if (!params.skip_alignment && !params.skip_markduplicates) {
+    if (!params.skip_alignment && !params.skip_markduplicates && !params.with_umi) {
         BAM_MARKDUPLICATES_PICARD (
             ch_genome_bam,
             PREPARE_GENOME.out.fasta,

Original file line number	Diff line number	Diff line change
`@@ -358,7 +358,7 @@ if (!params.skip_alignment) {`
`358`	`358`	`}`
`359`	`359`	`}`
`360`	`360`
`361`		`- if (!params.skip_markduplicates) {`
	`361`	`+ if (!params.skip_markduplicates && !params.with_umi) {`
`362`	`362`	`process {`
`363`	`363`	`withName: '.*:BAM_MARKDUPLICATES_PICARD:PICARD_MARKDUPLICATES' {`
`364`	`364`	`ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'`
`@@ -419,7 +419,7 @@ if (!params.skip_alignment) {`
`419`	`419`	`pattern: '*.bam',`
`420`	`420`	`enabled: (`
`421`	`421`	`params.save_align_intermeds \|\|`
`422`		`- params.skip_markduplicates \|\|`
	`422`	`+ params.with_umi \|\|`
`423`	`423`	`params.save_umi_intermeds`
`424`	`424`	`)`
`425`	`425`	`]`
`@@ -435,7 +435,7 @@ if (!params.skip_alignment) {`
`435`	`435`	`pattern: '*.{bai,csi}',`
`436`	`436`	`enabled: (`
`437`	`437`	`params.save_align_intermeds \|\|`
`438`		`- params.skip_markduplicates \|\|`
	`438`	`+ params.with_umi \|\|`
`439`	`439`	`params.save_umi_intermeds`
`440`	`440`	`)`
`441`	`441`	`]`