galaxyproject
diff --git a/‎workflows/variant-calling/ploidy-aware-genotype-calling/.dockstore.yml‎
Lines changed: 13 additions & 0 deletions b/‎workflows/variant-calling/ploidy-aware-genotype-calling/.dockstore.yml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎workflows/variant-calling/ploidy-aware-genotype-calling/CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions b/‎workflows/variant-calling/ploidy-aware-genotype-calling/CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎workflows/variant-calling/ploidy-aware-genotype-calling/README.md‎
Lines changed: 62 additions & 0 deletions b/‎workflows/variant-calling/ploidy-aware-genotype-calling/README.md‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎workflows/variant-calling/ploidy-aware-genotype-calling/generic-genotype+variant-calling-wgs-pe-test.yml‎
Lines changed: 64 additions & 0 deletions b/‎workflows/variant-calling/ploidy-aware-genotype-calling/generic-genotype+variant-calling-wgs-pe-test.yml‎
Lines changed: 64 additions & 0 deletions
@@ -0,0 +1,13 @@
+version: 1.2
+workflows:
+- name: main
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /generic-genotype+variant-calling-wgs-pe.ga
+  testParameterFiles:
+  - /generic-genotype+variant-calling-wgs-pe-test.yml
+  authors:
+  - name: Saim Momin
+    orcid: 0009-0003-9935-828X
+  - name: Wolfgang Maier
+    orcid: 0000-0002-9464-6640
@@ -0,0 +1,5 @@
+# Changelog
+
+## [0.1] 2026-02-18
+
+First release.
@@ -0,0 +1,62 @@
+# Paired-End Variant and Ploidy-Aware Genotype Calling
+
+This workflow performs paired-end reads quality control, mapping and germline
+variant and genotype calling for organisms of any given ploidy.
+
+It takes a collection of Illumina paired-end FASTQ files, a reference genome
+in FASTA format, a gene annotation in GTF format, and a ploidy parameter, and
+produces annotated variants both as VCF and as a tab-separated table.
+
+Reads are first quality- and adapter-trimmed with fastp. Trimmed reads
+are then mapped to the reference genome using BWA-MEM. The resulting
+alignments are filtered with Samtools view to retain only properly paired
+reads, and PCR duplicates are removed using Picard MarkDuplicates. QC metrics
+from fastp, Samtools stats, and MarkDuplicates are aggregated into a single
+MultiQC report.
+
+Variant and genotype calling is performed with FreeBayes, which operates in
+haplotype-based mode on the duplicate-free BAM.
+The ploidy assumed for calling is configurable and defaults to 2 (diploid).
+
+The intial VCF output is normalised and left-aligned with bcftools norm,
+splitting multi-allelic sites into individual biallelic records.
+Variants are then functionally annotated using SnpEff, with a custom SnpEff
+database built on-the-fly from the provided reference FASTA and GTF annotation.
+Annotation is restricted to coding and splicing effects (downstream,
+intergenic, intronic, UTR, and upstream effects are excluded). The annotated
+VCF is subsequently parsed with SnpSift Extract Fields into a flat tabular
+format, and per-sample tables are merged into a single file.
+
+## Inputs
+
+Paired Collection: a list:paired dataset collection of Illumina paired-end
+reads in fastqsanger or fastqsanger.gz format.
+
+Reference Genome FASTA: the reference genome sequence to use for mapping
+and variant calling.
+
+Annotation GTF: a GTF gene annotation file corresponding to the reference
+genome, used to build the SnpEff database.
+
+Set Ploidy for FreeBayes Variant Calling: an integer specifying the ploidy
+of the organism (default: 2).
+
+
+## Outputs
+
+Fastp HTML report: per-sample HTML quality control report from fastp.
+
+Preprocessing and mapping MultiQC report: aggregated HTML QC report
+combining fastp, Samtools stats, and Picard MarkDuplicates metrics across
+all samples.
+
+SnpEff annotated variants (VCF): annotated variants in VCF format, tagged VariantsasVCF.
+
+SnpEff HTML summary report: HTML summary statistics from SnpEff describing the
+distribution of variant effects across functional categories.
+
+Annotated variants table: a merged, tab-separated table of annotated variants
+across all samples, tagged VariantsAsTSV. Columns include CHROM, POS,
+FILTER, REF, ALT, DP, AF, DP4, SB, and per-effect fields for
+impact, functional class, effect type, gene name, codon change, amino acid
+change, and transcript ID.
@@ -0,0 +1,64 @@
+- doc: Test outline for generic-genotype+variant-calling-wgs-pe.ga
+  job:
+    Paired Collection:
+      class: Collection
+      collection_type: list:paired
+      elements:
+        - class: Collection
+          type: paired
+          identifier: sample1
+          elements:
+            - class: File
+              identifier: forward
+              path: test-data/sample1_R1.fastq.gz
+              filetype: fastqsanger.gz
+            - class: File
+              identifier: reverse
+              path: test-data/sample1_R2.fastq.gz
+              filetype: fastqsanger.gz
+    Reference Genome fasta:
+      class: File
+      path: test-data/reference.fasta
+      filetype: fasta
+    Annotation GTF:
+      class: File
+      path: test-data/annotation.gtf
+      filetype: gtf
+    Set Ploidy for FreeBayes Variant Calling: 2
+  outputs:
+    fastp HTML report:
+      element_tests:
+        sample1:
+          asserts:
+            has_text:
+              text: "<tr><td class='col1'>total reads:</td><td class='col2'>600</td></tr>"
+
+    Preprocessing and mapping reports:
+      asserts:
+        has_text:
+          text: "MultiQC Report"
+
+    SnpEff variants:
+      asserts:
+        has_text:
+          text: "##fileformat=VCFv4.2"
+        has_text:
+          text: "##contig=<ID=chr1,length=2000>"
+        has_n_lines:
+          n: 71
+
+    SnpEff eff reports:
+      asserts:
+        has_text:
+          text: "<td valign=top> <b> Number of variants processed <br> (i.e. after filter and non-variants) </b> </td>"
+        has_text:
+          text: "<td> 2 </td>"
+
+    Annotated Variants:
+      asserts:
+        has_n_lines:
+          n: 3
+        has_n_columns:
+          n: 19
+        has_text:
+          text: "chr1\t751\t.\tC\tG\t60\t1.0\t0\t60\t0.0\t4.31318\tLOW\tSILENT\tSYNONYMOUS_CODING\tSynGene1\tccC/ccG\tP184\tTRANS1"