nextflow-io
diff --git a/‎docs/nf4_science/genomics/04_testing.md‎
Lines changed: 480 additions & 466 deletions b/‎docs/nf4_science/genomics/04_testing.md‎
Lines changed: 480 additions & 466 deletions
diff --git a/‎mkdocs.yml‎
Lines changed: 2 additions & 0 deletions b/‎mkdocs.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎nf4-science/genomics/solutions/modules/gatk/haplotypecaller/main.nf‎
Lines changed: 30 additions & 0 deletions b/‎nf4-science/genomics/solutions/modules/gatk/haplotypecaller/main.nf‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎nf4-science/genomics/solutions/modules/gatk/haplotypecaller/tests/main.nf.test‎
Lines changed: 111 additions & 0 deletions b/‎nf4-science/genomics/solutions/modules/gatk/haplotypecaller/tests/main.nf.test‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎nf4-science/genomics/solutions/modules/gatk/haplotypecaller/tests/main.nf.test.snap‎
Lines changed: 25 additions & 0 deletions b/‎nf4-science/genomics/solutions/modules/gatk/haplotypecaller/tests/main.nf.test.snap‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎nf4-science/genomics/solutions/modules/gatk/jointgenotyping/main.nf‎
Lines changed: 36 additions & 0 deletions b/‎nf4-science/genomics/solutions/modules/gatk/jointgenotyping/main.nf‎
Lines changed: 36 additions & 0 deletions
@@ -29,6 +29,7 @@ nav:
       - nf4_science/genomics/01_per_sample_variant_calling.md
       - nf4_science/genomics/02_joint_calling.md
       - nf4_science/genomics/03_modules.md
+      - nf4_science/genomics/04_testing.md
 
   - Nextflow for RNAseq:
       - nf4_science/rnaseq/index.md
@@ -191,6 +192,7 @@ plugins:
         - side_quests/nf-test.md
         - side_quests/workflows_of_workflows.md
         - nf4_science/genomics/03_modules.md
+        - nf4_science/genomics/04_testing.md
 
   - i18n:
       docs_structure: suffix
 
@@ -0,0 +1,30 @@
+/*
+ * Call variants with GATK HaplotypeCaller
+ */
+process GATK_HAPLOTYPECALLER {
+
+    container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
+
+    publishDir params.outdir, mode: 'symlink'
+
+    input:
+        tuple path(input_bam), path(input_bam_index)
+        path ref_fasta
+        path ref_index
+        path ref_dict
+        path interval_list
+
+    output:
+        path "${input_bam}.g.vcf"     , emit: vcf
+        path "${input_bam}.g.vcf.idx" , emit: idx
+
+    script:
+    """
+    gatk HaplotypeCaller \
+        -R ${ref_fasta} \
+        -I ${input_bam} \
+        -O ${input_bam}.g.vcf \
+        -L ${interval_list} \
+        -ERC GVCF
+    """
+}
@@ -0,0 +1,111 @@
+nextflow_process {
+
+    name "Test Process GATK_HAPLOTYPECALLER"
+    script "../main.nf"
+    process "GATK_HAPLOTYPECALLER"
+
+    test("Should call son's halotype correctly") {
+
+        setup {
+            run("SAMTOOLS_INDEX") {
+                script "../../../samtools/index/main.nf"
+                process {
+                    """
+                    input[0] =  file("${projectDir}/data/bam/reads_son.bam")
+                    """
+                }
+            }
+        }
+        when {
+            params {
+                outdir = "tests/results"
+            }
+            process {
+                """
+                input[0] = SAMTOOLS_INDEX.out
+                input[1] = file("${projectDir}/data/ref/ref.fasta")
+                input[2] = file("${projectDir}/data/ref/ref.fasta.fai")
+                input[3] = file("${projectDir}/data/ref/ref.dict")
+                input[4] = file("${projectDir}/data/ref/intervals.bed")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert path(process.out[0][0]).readLines().contains('#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_son')
+            assert path(process.out[0][0]).readLines().contains('20_10037292_10066351	3277	.	G	<NON_REF>	.	.	END=3282	GT:DP:GQ:MIN_DP:PL	0/0:25:72:24:0,72,719')
+        }
+
+    }
+
+    test("Should call mother's halotype correctly") {
+
+        setup {
+            run("SAMTOOLS_INDEX") {
+                script "../../../samtools/index/main.nf"
+                process {
+                    """
+                    input[0] =  file("${projectDir}/data/bam/reads_mother.bam")
+                    """
+                }
+            }
+        }
+
+        when {
+            params {
+                outdir = "tests/results"
+            }
+            process {
+                """
+                input[0] = SAMTOOLS_INDEX.out
+                input[1] = file("${projectDir}/data/ref/ref.fasta")
+                input[2] = file("${projectDir}/data/ref/ref.fasta.fai")
+                input[3] = file("${projectDir}/data/ref/ref.dict")
+                input[4] = file("${projectDir}/data/ref/intervals.bed")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert path(process.out[0][0]).readLines().contains('#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_mother')
+            assert path(process.out[0][0]).readLines().contains('20_10037292_10066351	3277	.	G	<NON_REF>	.	.	END=3278	GT:DP:GQ:MIN_DP:PL	0/0:38:99:37:0,102,1530')
+        }
+    }
+
+    test("Should call father's halotype correctly") {
+
+        setup {
+            run("SAMTOOLS_INDEX") {
+                script "../../../samtools/index/main.nf"
+                process {
+                    """
+                    input[0] =  file("${projectDir}/data/bam/reads_father.bam")
+                    """
+                }
+            }
+        }
+
+        when {
+            params {
+                outdir = "tests/results"
+            }
+            process {
+                """
+                input[0] = SAMTOOLS_INDEX.out
+                input[1] = file("${projectDir}/data/ref/ref.fasta")
+                input[2] = file("${projectDir}/data/ref/ref.fasta.fai")
+                input[3] = file("${projectDir}/data/ref/ref.dict")
+                input[4] = file("${projectDir}/data/ref/intervals.bed")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert path(process.out[0][0]).readLines().contains('#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	reads_father')
+            assert path(process.out[0][0]).readLines().contains('20_10037292_10066351	3277	.	G	<NON_REF>	.	.	END=3281	GT:DP:GQ:MIN_DP:PL	0/0:44:99:42:0,120,1800')
+        }
+    }
+}
@@ -0,0 +1,25 @@
+{
+    "Should call son's halotype correctly": {
+        "content": [
+            {
+                "0": [
+                    "reads_son.bam.g.vcf:md5,069316cdd4328542ffc6ae247b1dac39"
+                ],
+                "1": [
+                    "reads_son.bam.g.vcf.idx:md5,dc36c18f2afdc546f41e68b2687e9334"
+                ],
+                "idx": [
+                    "reads_son.bam.g.vcf.idx:md5,dc36c18f2afdc546f41e68b2687e9334"
+                ],
+                "vcf": [
+                    "reads_son.bam.g.vcf:md5,069316cdd4328542ffc6ae247b1dac39"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2025-03-04T09:26:58.537420116"
+    }
+}
@@ -0,0 +1,36 @@
+/*
+ * Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls
+ */
+process GATK_JOINTGENOTYPING {
+
+    container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
+    publishDir params.outdir, mode: 'copy'
+
+    input:
+        path all_gvcfs
+        path all_idxs
+        path interval_list
+        val cohort_name
+        path ref_fasta
+        path ref_index
+        path ref_dict
+
+    output:
+        path "${cohort_name}.joint.vcf"     , emit: vcf
+        path "${cohort_name}.joint.vcf.idx" , emit: idx
+
+    script:
+    def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ')
+    """
+    gatk GenomicsDBImport \
+        ${gvcfs_line} \
+        -L ${interval_list} \
+        --genomicsdb-workspace-path ${cohort_name}_gdb
+
+    gatk GenotypeGVCFs \
+        -R ${ref_fasta} \
+        -V gendb://${cohort_name}_gdb \
+        -L ${interval_list} \
+        -O ${cohort_name}.joint.vcf
+    """
+}