From fcf1b9b4fac7ecb8b1a7c1dbd76c51b467946733 Mon Sep 17 00:00:00 2001
From: Ben Sherman <bentshermann@gmail.com>
Date: Fri, 13 Jun 2025 10:41:36 -0500
Subject: [PATCH 1/2] Workflow outputs (third preview)

Signed-off-by: Ben Sherman <bentshermann@gmail.com>
---
 bin/fastqc.sh           |  6 ++--
 data/allreads.csv       |  4 +++
 data/gut.csv            |  1 +
 main.nf                 | 63 +++++++++++++++++++++++++++++++----------
 modules/fastqc/main.nf  | 10 +++----
 modules/multiqc/main.nf |  2 --
 modules/quant/main.nf   | 10 +++----
 modules/rnaseq.nf       | 20 ++++++-------
 nextflow.config         | 34 +++++++---------------
 9 files changed, 86 insertions(+), 64 deletions(-)
 create mode 100644 data/allreads.csv
 create mode 100644 data/gut.csv

diff --git a/bin/fastqc.sh b/bin/fastqc.sh
index 93f38b6..55bc33b 100755
--- a/bin/fastqc.sh
+++ b/bin/fastqc.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
-sample_id="$1"
+id="$1"
 reads="$2"
 
-mkdir fastqc_${sample_id}_logs
-fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads}
+mkdir fastqc_${id}_logs
+fastqc -o fastqc_${id}_logs -f fastq -q ${reads}
diff --git a/data/allreads.csv b/data/allreads.csv
new file mode 100644
index 0000000..db40aac
--- /dev/null
+++ b/data/allreads.csv
@@ -0,0 +1,4 @@
+gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq
+liver,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_liver_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_liver_2.fq
+lung,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_lung_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_lung_2.fq
+spleen,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_spleen_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_spleen_2.fq
\ No newline at end of file
diff --git a/data/gut.csv b/data/gut.csv
new file mode 100644
index 0000000..e9c7353
--- /dev/null
+++ b/data/gut.csv
@@ -0,0 +1 @@
+gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq
\ No newline at end of file
diff --git a/main.nf b/main.nf
index f585d53..7c9a46e 100755
--- a/main.nf
+++ b/main.nf
@@ -4,16 +4,17 @@
  * Proof of concept of a RNAseq pipeline implemented with Nextflow
  */
 
+nextflow.preview.output = true
 
 /*
  * Default pipeline parameters. They can be overriden on the command line eg.
- * given `params.foo` specify on the run command line `--foo some_value`.
+ * given `params.reads` specify on the run command line `--reads some_value`.
  */
 
-params.reads = "$baseDir/data/ggal/ggal_gut_{1,2}.fq"
-params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
+params.reads = null
+params.transcriptome = null
 params.outdir = "results"
-params.multiqc = "$baseDir/multiqc"
+params.multiqc = "$projectDir/multiqc"
 
 
 // import modules
@@ -24,16 +25,48 @@ include { MULTIQC } from './modules/multiqc'
  * main script flow
  */
 workflow {
+  main:
+  log.info """\
+      R N A S E Q - N F   P I P E L I N E
+      ===================================
+      transcriptome: ${params.transcriptome}
+      reads        : ${params.reads}
+      outdir       : ${params.outdir}
+    """.stripIndent()
 
-log.info """\
-  R N A S E Q - N F   P I P E L I N E
-  ===================================
-  transcriptome: ${params.transcriptome}
-  reads        : ${params.reads}
-  outdir       : ${params.outdir}
-  """
-
-  read_pairs_ch = channel.fromFilePairs( params.reads, checkIfExists: true ) 
-  RNASEQ( params.transcriptome, read_pairs_ch )
-  MULTIQC( RNASEQ.out, params.multiqc )
+  inputs_ch = channel.fromPath(params.reads)
+    .splitCsv()
+    .map { id, fastq_1, fastq_2 ->
+      tuple(id, file(fastq_1, checkIfExists: true), file(fastq_2, checkIfExists: true))
+    }
+
+  samples_ch = RNASEQ( params.transcriptome, inputs_ch )
+    .map { id, fastqc, quant ->
+      [id: id, fastqc: fastqc, quant: quant]
+    }
+
+  multiqc_files_ch = samples_ch
+    .flatMap { sample -> [sample.fastqc, sample.quant] }
+    .collect()
+  multiqc_report = MULTIQC( multiqc_files_ch, params.multiqc )
+
+  publish:
+  samples = samples_ch
+  multiqc_report = multiqc_report
+}
+
+output {
+  samples {
+    path { sample ->
+      sample.fastqc >> "fastqc/${sample.id}"
+      sample.quant >> "quant/${sample.id}"
+    }
+    index {
+      path 'samples.csv'
+      header true
+    }
+  }
+
+  multiqc_report {
+  }
 }
diff --git a/modules/fastqc/main.nf b/modules/fastqc/main.nf
index 57c0477..5d013c9 100644
--- a/modules/fastqc/main.nf
+++ b/modules/fastqc/main.nf
@@ -1,18 +1,16 @@
-params.outdir = 'results'
 
 process FASTQC {
-    tag "FASTQC on $sample_id"
+    tag "$id"
     conda 'bioconda::fastqc=0.12.1'
-    publishDir params.outdir, mode:'copy'
 
     input:
-    tuple val(sample_id), path(reads)
+    tuple val(id), path(fastq_1), path(fastq_2)
 
     output:
-    path "fastqc_${sample_id}_logs", emit: logs
+    tuple val(id), path("fastqc_${id}_logs")
 
     script:
     """
-    fastqc.sh "$sample_id" "$reads"
+    fastqc.sh "$id" "$fastq_1 $fastq_2"
     """
 }
diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf
index 43d7450..ac80a9e 100644
--- a/modules/multiqc/main.nf
+++ b/modules/multiqc/main.nf
@@ -1,8 +1,6 @@
-params.outdir = 'results'
 
 process MULTIQC {
     conda 'bioconda::multiqc=1.27.1'
-    publishDir params.outdir, mode:'copy'
 
     input:
     path '*'
diff --git a/modules/quant/main.nf b/modules/quant/main.nf
index 7e7286f..2ab0a3d 100644
--- a/modules/quant/main.nf
+++ b/modules/quant/main.nf
@@ -1,17 +1,17 @@
 
 process QUANT {
-    tag "$pair_id"
+    tag "$id"
     conda 'bioconda::salmon=1.10.3'
 
     input:
-    path index 
-    tuple val(pair_id), path(reads) 
+    path index
+    tuple val(id), path(fastq_1), path(fastq_2)
 
     output:
-    path pair_id 
+    tuple val(id), path("quant_${id}")
 
     script:
     """
-    salmon quant --threads $task.cpus --libType=U -i $index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id
+    salmon quant --threads $task.cpus --libType=U -i $index -1 ${fastq_1} -2 ${fastq_2} -o quant_$id
     """
 }
diff --git a/modules/rnaseq.nf b/modules/rnaseq.nf
index 2f607c1..6849a9e 100644
--- a/modules/rnaseq.nf
+++ b/modules/rnaseq.nf
@@ -1,19 +1,19 @@
-params.outdir = 'results'
 
 include { INDEX } from './index'
 include { QUANT } from './quant'
 include { FASTQC } from './fastqc'
 
 workflow RNASEQ {
-  take:
+    take:
     transcriptome
-    read_pairs_ch
- 
-  main: 
-    INDEX(transcriptome)
-    FASTQC(read_pairs_ch)
-    QUANT(INDEX.out, read_pairs_ch)
+    samples_ch
 
-  emit: 
-     QUANT.out | concat(FASTQC.out) | collect
+    main:
+    index = INDEX(transcriptome)
+    fastqc_ch = FASTQC(samples_ch)
+    quant_ch = QUANT(index, samples_ch)
+    samples_ch = fastqc_ch.join(quant_ch)
+
+    emit:
+    samples_ch
 }
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index c07d123..922745e 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,16 +17,20 @@ manifest {
 }
 
 /*
- * default params
+ * params for default test data
  */
 
-params.outdir = "results"
-params.reads = "${projectDir}/data/ggal/ggal_gut_{1,2}.fq"
-params.transcriptome = "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
-params.multiqc = "${projectDir}/multiqc"
+params.reads = "${projectDir}/data/gut.csv"
+params.transcriptome = "https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa"
 
 /*
- * defines execution profiles for different environments
+ * publish settings
+ */
+
+workflow.output.mode = 'copy'
+
+/*
+ * execution profiles for different environments
  */
 
 profiles {
@@ -35,7 +39,7 @@ profiles {
   }
 
   'all-reads' {
-    params.reads = "${projectDir}/data/ggal/ggal_*_{1,2}.fq"
+    params.reads = "${projectDir}/data/allreads.csv"
   }
 
   'arm64' {
@@ -84,8 +88,6 @@ profiles {
   }
 
   'batch' {
-    params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq'
-    params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa'
     process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
     process.executor = 'awsbatch'
     process.queue = 'nextflow-ci'
@@ -94,15 +96,7 @@ profiles {
     aws.batch.cliPath = '/home/ec2-user/miniconda/bin/aws'
   }
 
-  's3-data' {
-    process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
-    params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq'
-    params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa'
-  }
-
   'google-batch' {
-      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
-      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
       params.multiqc = 'gs://rnaseq-nf/multiqc'
       process.executor = 'google-batch'
       process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
@@ -113,12 +107,6 @@ profiles {
       google.region  = 'europe-west2'
   }
 
-  'gs-data' {
-      process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
-      params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa'
-      params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq'
-  }
-
   'azure-batch' {
     process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1'
     workDir = 'az://nf-scratch/work'

From 484bfed5224bf9b248b767518c3b6d6eff7d46eb Mon Sep 17 00:00:00 2001
From: Ben Sherman <bentshermann@gmail.com>
Date: Fri, 13 Jun 2025 12:43:07 -0500
Subject: [PATCH 2/2] Static types

---
 main.nf                 | 69 +++++++++++++++++++++++++----------------
 modules/fastqc/main.nf  |  7 +++--
 modules/index/main.nf   |  4 +--
 modules/multiqc/main.nf |  6 ++--
 modules/quant/main.nf   |  9 ++++--
 modules/rnaseq.nf       | 27 +++++++++++-----
 nextflow_schema.json    | 29 ++++++++++++-----
 7 files changed, 100 insertions(+), 51 deletions(-)

diff --git a/main.nf b/main.nf
index 7c9a46e..c9fb0c6 100755
--- a/main.nf
+++ b/main.nf
@@ -4,59 +4,75 @@
  * Proof of concept of a RNAseq pipeline implemented with Nextflow
  */
 
-nextflow.preview.output = true
-
-/*
- * Default pipeline parameters. They can be overriden on the command line eg.
- * given `params.reads` specify on the run command line `--reads some_value`.
- */
-
-params.reads = null
-params.transcriptome = null
-params.outdir = "results"
-params.multiqc = "$projectDir/multiqc"
+// enable v2 operators (required for static type checking)
+nextflow.preview.operators = true
 
+// enable static type checking
+nextflow.preview.typeChecking = true
 
 // import modules
 include { RNASEQ } from './modules/rnaseq'
+include { FastqPair ; Sample } from './modules/rnaseq'
 include { MULTIQC } from './modules/multiqc'
 
+/*
+ * Pipeline parameters. They can be overridden on the command line, e.g.
+ * `params.reads` can be specified as `--reads '...'`.
+ */
+params {
+  // The input read-pair files
+  reads: List<FastqPair>
+
+  // The input transcriptome file
+  transcriptome: Path
+
+  // Directory containing multiqc configuration
+  multiqc: Path = "${projectDir}/multiqc"
+}
+
 /* 
- * main script flow
+ * Entry workflow
  */
 workflow {
   main:
   log.info """\
       R N A S E Q - N F   P I P E L I N E
       ===================================
+      reads        : ${params.reads*.id.join(',')}
       transcriptome: ${params.transcriptome}
-      reads        : ${params.reads}
-      outdir       : ${params.outdir}
+      outdir       : ${workflow.outputDir}
     """.stripIndent()
 
-  inputs_ch = channel.fromPath(params.reads)
-    .splitCsv()
-    .map { id, fastq_1, fastq_2 ->
-      tuple(id, file(fastq_1, checkIfExists: true), file(fastq_2, checkIfExists: true))
-    }
-
-  samples_ch = RNASEQ( params.transcriptome, inputs_ch )
-    .map { id, fastqc, quant ->
-      [id: id, fastqc: fastqc, quant: quant]
-    }
+  (samples_ch, index) = RNASEQ( channel.fromList(params.reads), params.transcriptome )
 
   multiqc_files_ch = samples_ch
     .flatMap { sample -> [sample.fastqc, sample.quant] }
     .collect()
+
   multiqc_report = MULTIQC( multiqc_files_ch, params.multiqc )
 
   publish:
+  index = index
   samples = samples_ch
   multiqc_report = multiqc_report
+
+  onComplete:
+  log.info(
+    workflow.success
+      ? "\nDone! Open the following report in your browser --> ${workflow.outputDir}/multiqc_report.html\n"
+      : "Oops .. something went wrong"
+  )
 }
 
+/*
+ * Pipeline outputs. By default they will be saved to the 'results' directory.
+ */
 output {
-  samples {
+  index: Path {
+    path '.'
+  }
+
+  samples: Channel<Sample> {
     path { sample ->
       sample.fastqc >> "fastqc/${sample.id}"
       sample.quant >> "quant/${sample.id}"
@@ -67,6 +83,7 @@ output {
     }
   }
 
-  multiqc_report {
+  multiqc_report: Path {
+    path '.'
   }
 }
diff --git a/modules/fastqc/main.nf b/modules/fastqc/main.nf
index 5d013c9..8634f9f 100644
--- a/modules/fastqc/main.nf
+++ b/modules/fastqc/main.nf
@@ -4,10 +4,13 @@ process FASTQC {
     conda 'bioconda::fastqc=0.12.1'
 
     input:
-    tuple val(id), path(fastq_1), path(fastq_2)
+    id      : String
+    fastq_1 : Path
+    fastq_2 : Path
 
     output:
-    tuple val(id), path("fastqc_${id}_logs")
+    id      : String = id
+    fastqc  : Path = file("fastqc_${id}_logs")
 
     script:
     """
diff --git a/modules/index/main.nf b/modules/index/main.nf
index 1d99b1b..5b1e305 100644
--- a/modules/index/main.nf
+++ b/modules/index/main.nf
@@ -4,10 +4,10 @@ process INDEX {
     conda 'bioconda::salmon=1.10.3'
     
     input:
-    path transcriptome 
+    transcriptome   : Path
 
     output:
-    path 'index' 
+    file('index')
 
     script:
     """
diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf
index ac80a9e..7efb9a1 100644
--- a/modules/multiqc/main.nf
+++ b/modules/multiqc/main.nf
@@ -3,11 +3,11 @@ process MULTIQC {
     conda 'bioconda::multiqc=1.27.1'
 
     input:
-    path '*'
-    path config
+    inputs  : Bag<Path>
+    config  : Path
 
     output:
-    path 'multiqc_report.html', emit: report
+    file('multiqc_report.html')
 
     script:
     """
diff --git a/modules/quant/main.nf b/modules/quant/main.nf
index 2ab0a3d..0e59326 100644
--- a/modules/quant/main.nf
+++ b/modules/quant/main.nf
@@ -4,11 +4,14 @@ process QUANT {
     conda 'bioconda::salmon=1.10.3'
 
     input:
-    path index
-    tuple val(id), path(fastq_1), path(fastq_2)
+    id      : String
+    fastq_1 : Path
+    fastq_2 : Path
+    index   : Path
 
     output:
-    tuple val(id), path("quant_${id}")
+    id      : String = id
+    quant   : Path = file("quant_${id}") 
 
     script:
     """
diff --git a/modules/rnaseq.nf b/modules/rnaseq.nf
index 6849a9e..9a4d0cc 100644
--- a/modules/rnaseq.nf
+++ b/modules/rnaseq.nf
@@ -5,15 +5,28 @@ include { FASTQC } from './fastqc'
 
 workflow RNASEQ {
     take:
-    transcriptome
-    samples_ch
+    reads         : Channel<FastqPair>
+    transcriptome : Path
 
     main:
     index = INDEX(transcriptome)
-    fastqc_ch = FASTQC(samples_ch)
-    quant_ch = QUANT(index, samples_ch)
-    samples_ch = fastqc_ch.join(quant_ch)
+    fastqc_ch = reads.map(FASTQC)
+    quant_ch = reads.map(QUANT, index: index)
+    samples_ch = fastqc_ch.join(quant_ch, 'id')
 
     emit:
-    samples_ch
-}
\ No newline at end of file
+    samples : Channel<Sample> = samples_ch
+    index   : Path = index
+}
+
+record FastqPair {
+  id      : String
+  fastq_1 : Path
+  fastq_2 : Path
+}
+
+record Sample {
+  id      : String
+  fastqc  : Path
+  quant   : Path
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 823417a..c0cab4f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -11,15 +11,27 @@
       "fa_icon": "fas fa-terminal",
       "description": "Define where the pipeline should find input data and save output data.",
       "properties": {
-        "outdir": {
-          "type": "string",
-          "format": "directory-path",
-          "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
-          "fa_icon": "fas fa-folder-open",
-          "default": "results"
-        },
         "reads": {
-          "type": "string",
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "id": {
+                "type": "string"
+              },
+              "fastq_1": {
+                  "type": "string",
+                  "format": "file-path",
+                  "exists": true
+              },
+              "fastq_2": {
+                  "type": "string",
+                  "format": "file-path",
+                  "exists": true
+              }
+            },
+            "required": ["id", "fastq_1", "fastq_2"]
+          },
           "description": "The input read-pair files",
           "fa_icon": "fas fa-folder-open",
           "default": "${projectDir}/data/ggal/ggal_gut_{1,2}.fq"
@@ -32,6 +44,7 @@
         },
         "multiqc": {
           "type": "string",
+          "description": "Directory containing multiqc configuration",
           "fa_icon": "fas fa-folder-open",
           "default": "${projectDir}/multiqc"
         }