nf-core
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CITATIONS.md‎
Lines changed: 2 additions & 0 deletions b/‎CITATIONS.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bin/check_samplesheet.py‎
Lines changed: 1 addition & 1 deletion b/‎bin/check_samplesheet.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/modules.config‎
Lines changed: 29 additions & 2 deletions b/‎conf/modules.config‎
Lines changed: 29 additions & 2 deletions
diff --git a/‎conf/test.config‎
Lines changed: 1 addition & 1 deletion b/‎conf/test.config‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/test_full.config‎
Lines changed: 1 addition & 1 deletion b/‎conf/test_full.config‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/usage.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/usage.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎lib/WorkflowRnaseq.groovy‎
Lines changed: 19 additions & 0 deletions b/‎lib/WorkflowRnaseq.groovy‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎modules.json‎
Lines changed: 13 additions & 3 deletions b/‎modules.json‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎modules/local/dupradar.nf‎
Lines changed: 3 additions & 3 deletions b/‎modules/local/dupradar.nf‎
Lines changed: 3 additions & 3 deletions
@@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Enhancements & fixes
 
 - Bump minimum Nextflow version from `21.10.3` -> `22.10.1`
-- Updated pipeline template to [nf-core/tools 2.7.1](https://github.com/nf-core/tools/releases/tag/2.7.1)
+- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2)
+- [[#729](https://github.com/nf-core/rnaseq/issues/729)] - Add 'auto' option to samplesheet to automatically detect strandedness for samples
 - [[#891](https://github.com/nf-core/rnaseq/issues/891)] - Skip MarkDuplicates when UMIs are used
 - [[#896](https://github.com/nf-core/rnaseq/issues/896)] - Remove `copyTo` call for iGenomes README
 - [[#897](https://github.com/nf-core/rnaseq/issues/897)] - Use `--skip_preseq` by default
 
@@ -22,6 +22,8 @@
 
   > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677.
 
+- [fq](https://github.com/stjude-rust-labs/fq)
+
 - [GffRead](https://pubmed.ncbi.nlm.nih.gov/32489650/)
 
   > Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: 10.12688/f1000research.23297.2. eCollection 2020. PubMed PMID: 32489650; PubMed Central PMCID: PMC7222033.
 
@@ -99,7 +99,7 @@ def check_samplesheet(file_in, file_out):
                             )
 
                 ## Check strandedness
-                strandednesses = ["unstranded", "forward", "reverse"]
+                strandednesses = ["unstranded", "forward", "reverse", "auto"]
                 if strandedness:
                     if strandedness not in strandednesses:
                         print_error(
 
@@ -166,6 +166,33 @@ if (!params.skip_bbsplit && params.bbsplit_fasta_list) {
     }
 }
 
+//
+// Read subsampling and strand inferring options
+//
+
+process {
+    withName: 'FQ_SUBSAMPLE' {
+        ext.args   = '--record-count 1000000 --seed 1'
+        ext.prefix = { "${meta.id}.subsampled" }
+        publishDir = [
+            path: { "${params.outdir}/sample_fastq/fastq" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: false
+        ]
+    }
+
+    withName: '.*:FASTQ_SUBSAMPLE_FQ_SALMON:SALMON_QUANT' {
+        ext.args   = '--skipQuant'
+        publishDir = [
+            path: { "${params.outdir}/sample_fastq/salmon" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename },
+            enabled: false
+        ]
+    }
+}
+
 //
 // Read QC and trimming options
 //
@@ -546,7 +573,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode,
-                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename }
             ]
         }
 
@@ -1051,7 +1078,7 @@ if (params.pseudo_aligner == 'salmon') {
             publishDir = [
                 path: { "${params.outdir}/${params.pseudo_aligner}" },
                 mode: params.publish_dir_mode,
-                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+                saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('_meta_info.json') ? null : filename }
             ]
         }
 
 
@@ -20,7 +20,7 @@ params {
     max_time   = '6.h'
 
     // Input data
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.4/samplesheet_test.csv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.10/samplesheet_test.csv'
 
     // Genome references
     fasta              = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fasta'
 
@@ -15,7 +15,7 @@ params {
     config_profile_description = 'Full test dataset to check pipeline function'
 
     // Parameters for full-size test
-    input          = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.4/samplesheet_full.csv'
+    input          = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.10/samplesheet_full.csv'
     genome         = 'GRCh37'
     pseudo_aligner = 'salmon'
 }
@@ -14,13 +14,13 @@ You will need to create a samplesheet with information about the samples you wou
 
 ### Multiple runs of the same sample
 
-The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
+The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes. If you set the strandedness value to `auto` the pipeline will sub-sample the input FastQ files to 1 million reads, use Salmon Quant to infer the strandedness automatically and then propagate this information to the remainder of the pipeline. If the strandedness has been inferred or provided incorrectly a warning will be present at the top of the MultiQC report so please be sure to check when looking at the QC for your samples.
 
 ```console
 sample,fastq_1,fastq_2,strandedness
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,unstranded
-CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,unstranded
-CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,unstranded
+CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,auto
+CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,auto
+CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,auto
 ```
 
 ### Full samplesheet
@@ -45,7 +45,7 @@ TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,reverse
 | `sample`       | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
 | `fastq_1`      | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
 | `fastq_2`      | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
-| `strandedness` | Sample strand-specificity. Must be one of `unstranded`, `forward` or `reverse`.                                                                                                        |
+| `strandedness` | Sample strand-specificity. Must be one of `unstranded`, `forward`, `reverse` or `auto`.                                                                                                |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
 
@@ -2,6 +2,7 @@
 // This file holds several functions specific to the workflow/rnaseq.nf in the nf-core/rnaseq pipeline
 //
 
+import groovy.json.JsonSlurper
 import groovy.text.SimpleTemplateEngine
 
 class WorkflowRnaseq {
@@ -164,6 +165,24 @@ class WorkflowRnaseq {
         }
     }
 
+    //
+    // Function that parses Salmon quant 'meta_info.json' output file to get inferred strandedness
+    //
+    public static String getSalmonInferredStrandedness(json_file) {
+        def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0]
+        def strandedness = 'reverse'
+        if (lib_type) {
+            if (lib_type in ['U', 'IU']) {
+                strandedness = 'unstranded'
+            } else if (lib_type in ['SF', 'ISF']) {
+                strandedness = 'forward'
+            } else if (lib_type in ['SR', 'ISR']) {
+                strandedness = 'reverse'
+            }
+        }
+        return strandedness
+    }
+
     //
     // Function that parses TrimGalore log output file to get total number of reads after trimming
     //
 
@@ -31,6 +31,11 @@
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                         "installed_by": ["fastq_fastqc_umitools_trimgalore"]
                     },
+                    "fq/subsample": {
+                        "branch": "master",
+                        "git_sha": "ad462aa294faf9a8c42688a08daf81a580594f70",
+                        "installed_by": ["modules", "fastq_subsample_fq_salmon"]
+                    },
                     "gffread": {
                         "branch": "master",
                         "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
@@ -123,13 +128,13 @@
                     },
                     "salmon/index": {
                         "branch": "master",
-                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "git_sha": "94b06f1683ddf893cf06525f6e7f0573ad8fbf83",
                         "installed_by": ["modules"]
                     },
                     "salmon/quant": {
                         "branch": "master",
-                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
-                        "installed_by": ["modules"]
+                        "git_sha": "94b06f1683ddf893cf06525f6e7f0573ad8fbf83",
+                        "installed_by": ["modules", "fastq_subsample_fq_salmon"]
                     },
                     "samtools/flagstat": {
                         "branch": "master",
@@ -262,6 +267,11 @@
                         "branch": "master",
                         "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40",
                         "installed_by": ["subworkflows"]
+                    },
+                    "fastq_subsample_fq_salmon": {
+                        "branch": "master",
+                        "git_sha": "0098bc93f6219c6194f443f0feb089ba83717384",
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }
 
@@ -2,10 +2,10 @@ process DUPRADAR {
     tag "$meta.id"
     label 'process_long'
 
-    conda "bioconda::bioconductor-dupradar=1.18.0"
+    conda "bioconda::bioconductor-dupradar=1.28.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bioconductor-dupradar:1.18.0--r40_1' :
-        'quay.io/biocontainers/bioconductor-dupradar:1.18.0--r40_1' }"
+        'https://depot.galaxyproject.org/singularity/bioconductor-dupradar:1.28.0--r42hdfd78af_0' :
+        'quay.io/biocontainers/bioconductor-dupradar:1.28.0--r42hdfd78af_0' }"
 
     input:
     tuple val(meta), path(bam)
Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,7 @@ def check_samplesheet(file_in, file_out):`
`99`	`99`	`)`
`100`	`100`
`101`	`101`	`## Check strandedness`
`102`		`- strandednesses = ["unstranded", "forward", "reverse"]`
	`102`	`+ strandednesses = ["unstranded", "forward", "reverse", "auto"]`
`103`	`103`	`if strandedness:`
`104`	`104`	`if strandedness not in strandednesses:`
`105`	`105`	`print_error(`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ params {`
`15`	`15`	`config_profile_description = 'Full test dataset to check pipeline function'`
`16`	`16`
`17`	`17`	`// Parameters for full-size test`
`18`		`- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.4/samplesheet_full.csv'`
	`18`	`+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnaseq/samplesheet/v3.10/samplesheet_full.csv'`
`19`	`19`	`genome = 'GRCh37'`
`20`	`20`	`pseudo_aligner = 'salmon'`
`21`	`21`	`}`