diff --git a/bin/fastqc.sh b/bin/fastqc.sh index 93f38b6..55bc33b 100755 --- a/bin/fastqc.sh +++ b/bin/fastqc.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -sample_id="$1" +id="$1" reads="$2" -mkdir fastqc_${sample_id}_logs -fastqc -o fastqc_${sample_id}_logs -f fastq -q ${reads} +mkdir fastqc_${id}_logs +fastqc -o fastqc_${id}_logs -f fastq -q ${reads} diff --git a/data/allreads.csv b/data/allreads.csv new file mode 100644 index 0000000..db40aac --- /dev/null +++ b/data/allreads.csv @@ -0,0 +1,4 @@ +gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq +liver,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_liver_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_liver_2.fq +lung,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_lung_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_lung_2.fq +spleen,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_spleen_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_spleen_2.fq \ No newline at end of file diff --git a/data/gut.csv b/data/gut.csv new file mode 100644 index 0000000..e9c7353 --- /dev/null +++ b/data/gut.csv @@ -0,0 +1 @@ +gut,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_1.fq,https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_gut_2.fq \ No newline at end of file diff --git a/main.nf b/main.nf index f585d53..c9fb0c6 100755 --- a/main.nf +++ b/main.nf @@ -4,36 +4,86 @@ * Proof of concept of a RNAseq pipeline implemented with Nextflow */ +// enable v2 operators (required for static type checking) +nextflow.preview.operators = true -/* - * Default pipeline parameters. They can be overriden on the command line eg. - * given `params.foo` specify on the run command line `--foo some_value`. - */ - -params.reads = "$baseDir/data/ggal/ggal_gut_{1,2}.fq" -params.transcriptome = "$baseDir/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" -params.outdir = "results" -params.multiqc = "$baseDir/multiqc" - +// enable static type checking +nextflow.preview.typeChecking = true // import modules include { RNASEQ } from './modules/rnaseq' +include { FastqPair ; Sample } from './modules/rnaseq' include { MULTIQC } from './modules/multiqc' +/* + * Pipeline parameters. They can be overridden on the command line, e.g. + * `params.reads` can be specified as `--reads '...'`. + */ +params { + // The input read-pair files + reads: List + + // The input transcriptome file + transcriptome: Path + + // Directory containing multiqc configuration + multiqc: Path = "${projectDir}/multiqc" +} + /* - * main script flow + * Entry workflow */ workflow { + main: + log.info """\ + R N A S E Q - N F P I P E L I N E + =================================== + reads : ${params.reads*.id.join(',')} + transcriptome: ${params.transcriptome} + outdir : ${workflow.outputDir} + """.stripIndent() + + (samples_ch, index) = RNASEQ( channel.fromList(params.reads), params.transcriptome ) + + multiqc_files_ch = samples_ch + .flatMap { sample -> [sample.fastqc, sample.quant] } + .collect() + + multiqc_report = MULTIQC( multiqc_files_ch, params.multiqc ) + + publish: + index = index + samples = samples_ch + multiqc_report = multiqc_report + + onComplete: + log.info( + workflow.success + ? "\nDone! Open the following report in your browser --> ${workflow.outputDir}/multiqc_report.html\n" + : "Oops .. something went wrong" + ) +} + +/* + * Pipeline outputs. By default they will be saved to the 'results' directory. + */ +output { + index: Path { + path '.' + } + + samples: Channel { + path { sample -> + sample.fastqc >> "fastqc/${sample.id}" + sample.quant >> "quant/${sample.id}" + } + index { + path 'samples.csv' + header true + } + } -log.info """\ - R N A S E Q - N F P I P E L I N E - =================================== - transcriptome: ${params.transcriptome} - reads : ${params.reads} - outdir : ${params.outdir} - """ - - read_pairs_ch = channel.fromFilePairs( params.reads, checkIfExists: true ) - RNASEQ( params.transcriptome, read_pairs_ch ) - MULTIQC( RNASEQ.out, params.multiqc ) + multiqc_report: Path { + path '.' + } } diff --git a/modules/fastqc/main.nf b/modules/fastqc/main.nf index 57c0477..8634f9f 100644 --- a/modules/fastqc/main.nf +++ b/modules/fastqc/main.nf @@ -1,18 +1,19 @@ -params.outdir = 'results' process FASTQC { - tag "FASTQC on $sample_id" + tag "$id" conda 'bioconda::fastqc=0.12.1' - publishDir params.outdir, mode:'copy' input: - tuple val(sample_id), path(reads) + id : String + fastq_1 : Path + fastq_2 : Path output: - path "fastqc_${sample_id}_logs", emit: logs + id : String = id + fastqc : Path = file("fastqc_${id}_logs") script: """ - fastqc.sh "$sample_id" "$reads" + fastqc.sh "$id" "$fastq_1 $fastq_2" """ } diff --git a/modules/index/main.nf b/modules/index/main.nf index 1d99b1b..5b1e305 100644 --- a/modules/index/main.nf +++ b/modules/index/main.nf @@ -4,10 +4,10 @@ process INDEX { conda 'bioconda::salmon=1.10.3' input: - path transcriptome + transcriptome : Path output: - path 'index' + file('index') script: """ diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index 43d7450..7efb9a1 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -1,15 +1,13 @@ -params.outdir = 'results' process MULTIQC { conda 'bioconda::multiqc=1.27.1' - publishDir params.outdir, mode:'copy' input: - path '*' - path config + inputs : Bag + config : Path output: - path 'multiqc_report.html', emit: report + file('multiqc_report.html') script: """ diff --git a/modules/quant/main.nf b/modules/quant/main.nf index 7e7286f..0e59326 100644 --- a/modules/quant/main.nf +++ b/modules/quant/main.nf @@ -1,17 +1,20 @@ process QUANT { - tag "$pair_id" + tag "$id" conda 'bioconda::salmon=1.10.3' input: - path index - tuple val(pair_id), path(reads) + id : String + fastq_1 : Path + fastq_2 : Path + index : Path output: - path pair_id + id : String = id + quant : Path = file("quant_${id}") script: """ - salmon quant --threads $task.cpus --libType=U -i $index -1 ${reads[0]} -2 ${reads[1]} -o $pair_id + salmon quant --threads $task.cpus --libType=U -i $index -1 ${fastq_1} -2 ${fastq_2} -o quant_$id """ } diff --git a/modules/rnaseq.nf b/modules/rnaseq.nf index 2f607c1..9a4d0cc 100644 --- a/modules/rnaseq.nf +++ b/modules/rnaseq.nf @@ -1,19 +1,32 @@ -params.outdir = 'results' include { INDEX } from './index' include { QUANT } from './quant' include { FASTQC } from './fastqc' workflow RNASEQ { - take: - transcriptome - read_pairs_ch - - main: - INDEX(transcriptome) - FASTQC(read_pairs_ch) - QUANT(INDEX.out, read_pairs_ch) + take: + reads : Channel + transcriptome : Path - emit: - QUANT.out | concat(FASTQC.out) | collect -} \ No newline at end of file + main: + index = INDEX(transcriptome) + fastqc_ch = reads.map(FASTQC) + quant_ch = reads.map(QUANT, index: index) + samples_ch = fastqc_ch.join(quant_ch, 'id') + + emit: + samples : Channel = samples_ch + index : Path = index +} + +record FastqPair { + id : String + fastq_1 : Path + fastq_2 : Path +} + +record Sample { + id : String + fastqc : Path + quant : Path +} diff --git a/nextflow.config b/nextflow.config index c07d123..922745e 100755 --- a/nextflow.config +++ b/nextflow.config @@ -17,16 +17,20 @@ manifest { } /* - * default params + * params for default test data */ -params.outdir = "results" -params.reads = "${projectDir}/data/ggal/ggal_gut_{1,2}.fq" -params.transcriptome = "${projectDir}/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" -params.multiqc = "${projectDir}/multiqc" +params.reads = "${projectDir}/data/gut.csv" +params.transcriptome = "https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/refs/heads/master/data/ggal/ggal_1_48850000_49020000.Ggal71.500bpflank.fa" /* - * defines execution profiles for different environments + * publish settings + */ + +workflow.output.mode = 'copy' + +/* + * execution profiles for different environments */ profiles { @@ -35,7 +39,7 @@ profiles { } 'all-reads' { - params.reads = "${projectDir}/data/ggal/ggal_*_{1,2}.fq" + params.reads = "${projectDir}/data/allreads.csv" } 'arm64' { @@ -84,8 +88,6 @@ profiles { } 'batch' { - params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq' - params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa' process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1' process.executor = 'awsbatch' process.queue = 'nextflow-ci' @@ -94,15 +96,7 @@ profiles { aws.batch.cliPath = '/home/ec2-user/miniconda/bin/aws' } - 's3-data' { - process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1' - params.reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq' - params.transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa' - } - 'google-batch' { - params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa' - params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq' params.multiqc = 'gs://rnaseq-nf/multiqc' process.executor = 'google-batch' process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1' @@ -113,12 +107,6 @@ profiles { google.region = 'europe-west2' } - 'gs-data' { - process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1' - params.transcriptome = 'gs://rnaseq-nf/data/ggal/transcript.fa' - params.reads = 'gs://rnaseq-nf/data/ggal/gut_{1,2}.fq' - } - 'azure-batch' { process.container = 'docker.io/nextflow/rnaseq-nf:v1.3.1' workDir = 'az://nf-scratch/work' diff --git a/nextflow_schema.json b/nextflow_schema.json index 823417a..c0cab4f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,15 +11,27 @@ "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "properties": { - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open", - "default": "results" - }, "reads": { - "type": "string", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "fastq_1": { + "type": "string", + "format": "file-path", + "exists": true + }, + "fastq_2": { + "type": "string", + "format": "file-path", + "exists": true + } + }, + "required": ["id", "fastq_1", "fastq_2"] + }, "description": "The input read-pair files", "fa_icon": "fas fa-folder-open", "default": "${projectDir}/data/ggal/ggal_gut_{1,2}.fq" @@ -32,6 +44,7 @@ }, "multiqc": { "type": "string", + "description": "Directory containing multiqc configuration", "fa_icon": "fas fa-folder-open", "default": "${projectDir}/multiqc" }