PMCC-BioinformaticsCore
diff --git a/‎janis_bioinformatics/tools/dawson/__init__.py‎
Lines changed: 4 additions & 4 deletions b/‎janis_bioinformatics/tools/dawson/__init__.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎janis_bioinformatics/tools/dawson/workflows/__init__.py‎
Lines changed: 1 addition & 4 deletions b/‎janis_bioinformatics/tools/dawson/workflows/__init__.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎janis_bioinformatics/tools/dawson/workflows/variantcalling/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎janis_bioinformatics/tools/dawson/workflows/variantcalling/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎…on/workflows/freebayessomaticworkflow.py‎ ‎…/multisample/freebayessomaticworkflow.py‎janis_bioinformatics/tools/dawson/workflows/freebayessomaticworkflow.py renamed to janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/freebayessomaticworkflow.py
Lines changed: 4 additions & 4 deletions b/‎…on/workflows/freebayessomaticworkflow.py‎ ‎…/multisample/freebayessomaticworkflow.py‎janis_bioinformatics/tools/dawson/workflows/freebayessomaticworkflow.py renamed to janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/freebayessomaticworkflow.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/freebayessomaticworkflow_cram.py‎
Lines changed: 195 additions & 0 deletions b/‎janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/freebayessomaticworkflow_cram.py‎
Lines changed: 195 additions & 0 deletions
@@ -1,11 +1,11 @@
+#tools import
 from .refilterstrelka2calls.latest import RefilterStrelka2CallsLatest
 from .refilterstrelka2calls.refilterstrelka2calls_0_1 import RefilterStrelka2Calls_0_1
 
 from .callsomaticfreebayes.latest import CallSomaticFreeBayesLatest
 from .callsomaticfreebayes.callsomaticfreebayes_0_1 import CallSomaticFreeBayes_0_1
 
-from .workflows.strelka2passworkflow import Strelka2PassWorkflow
-from .workflows.freebayessomaticworkflow import FreeBayesSomaticWorkflow
-from .workflows.mutectjointsomaticworkflow import Mutect2JointSomaticWorkflow
-
 from .createcallregions.base import CreateCallRegions
+
+# workflow import
+from .workflows import *
@@ -1,4 +1 @@
-# from .strelka2passworkflow import Strelka2PassWorkflow
-
-# from .strelka2passanalysisstep1 import Strelka2PassWorkflowStep1
-# from .strelka2passanalysisstep2 import Strelka2PassWorkflowStep2
+from .variantcalling import *
@@ -0,0 +1 @@
+from .multisample import *
@@ -0,0 +1,8 @@
+from .freebayessomaticworkflow import FreeBayesSomaticWorkflow
+from .freebayessomaticworkflow_cram import FreeBayesSomaticWorkflowCram
+
+from .mutect2jointsomaticworkflow import Mutect2JointSomaticWorkflow
+from .mutect2jointsomaticworkflow_cram import Mutect2JointSomaticWorkflowCram
+
+from .strelka2passworkflow import Strelka2PassWorkflow
+from .strelka2passworkflow_cram import Strelka2PassWorkflowCram
@@ -1,13 +1,13 @@
 from datetime import date
 
-from janis_bioinformatics.data_types import CramCrai, FastaFai
+from janis_bioinformatics.data_types import BamBai, FastaFai
 from janis_bioinformatics.tools import BioinformaticsWorkflow
 from janis_bioinformatics.tools.bcftools import BcfToolsNormLatest as BcfToolsNorm
 from janis_bioinformatics.tools.dawson import (
     CallSomaticFreeBayes_0_1 as CallSomaticFreeBayes,
 )
 from janis_bioinformatics.tools.dawson.createcallregions.base import CreateCallRegions
-from janis_bioinformatics.tools.freebayes.versions import FreeBayesCram_1_3 as FreeBayes
+from janis_bioinformatics.tools.freebayes.versions import FreeBayes_1_3 as FreeBayes
 from janis_bioinformatics.tools.htslib import BGZipLatest as BGZip, TabixLatest as Tabix
 from janis_bioinformatics.tools.vcflib import (
     VcfAllelicPrimitivesLatest as VcfAllelicPrimitives,
@@ -36,7 +36,7 @@ def version(self):
     def bind_metadata(self):
         self.metadata.version = "0.1"
         self.metadata.dateCreated = date(2019, 10, 18)
-        self.metadata.dateUpdated = date(2019, 10, 25)
+        self.metadata.dateUpdated = date(2020, 12, 10)
 
         self.contributors = ["Sebastian Hollizeck"]
         self.metadata.keywords = [
@@ -54,7 +54,7 @@ def bind_metadata(self):
 
     def constructor(self):
 
-        self.input("bams", Array(CramCrai))
+        self.input("bams", Array(BamBai))
 
         self.input("reference", FastaFai)
         self.input("regionSize", int, default=10000000)
 
@@ -0,0 +1,195 @@
+from datetime import date
+
+from janis_bioinformatics.data_types import CramCrai, FastaFai
+from janis_bioinformatics.tools import BioinformaticsWorkflow
+from janis_bioinformatics.tools.bcftools import BcfToolsNormLatest as BcfToolsNorm
+from janis_bioinformatics.tools.dawson import (
+    CallSomaticFreeBayes_0_1 as CallSomaticFreeBayes,
+)
+from janis_bioinformatics.tools.dawson.createcallregions.base import CreateCallRegions
+from janis_bioinformatics.tools.freebayes.versions import FreeBayesCram_1_3 as FreeBayes
+from janis_bioinformatics.tools.htslib import BGZipLatest as BGZip, TabixLatest as Tabix
+from janis_bioinformatics.tools.vcflib import (
+    VcfAllelicPrimitivesLatest as VcfAllelicPrimitives,
+    VcfCombineLatest as VcfCombine,
+    VcfFixUpLatest as VcfFixUp,
+    VcfStreamSortLatest as VcfStreamSort,
+    VcfUniqAllelesLatest as VcfUniqAlleles,
+    VcfUniqLatest as VcfUniq,
+)
+from janis_core import Array, Int, String
+
+
+class FreeBayesSomaticWorkflowCram(BioinformaticsWorkflow):
+    def id(self):
+        return "FreeBayesSomaticWorkflowCram"
+
+    def friendly_name(self):
+        return "Freebayes somatic workflow (CRAM)"
+
+    def tool_provider(self):
+        return "Dawson Labs"
+
+    def version(self):
+        return "0.1"
+
+    def bind_metadata(self):
+        self.metadata.version = "0.1"
+        self.metadata.dateCreated = date(2019, 10, 18)
+        self.metadata.dateUpdated = date(2020, 12, 10)
+
+        self.contributors = ["Sebastian Hollizeck"]
+        self.metadata.keywords = [
+            "variants",
+            "freebayes",
+            "variant caller",
+            "multi sample",
+        ]
+        self.metadata.documentation = """
+        This workflow uses the capabilities of freebayes to output all variants independent of the
+        diploid model which then in turn allows us to create a likelihood based difference between
+        the normal sample and an arbitrary amount of samples.
+        This allows a joint somatic genotyping of multiple samples of the same individual.
+                """.strip()
+
+    def constructor(self):
+
+        self.input("bams", Array(CramCrai))
+
+        self.input("reference", FastaFai)
+        self.input("regionSize", int, default=10000000)
+
+        self.input("normalSample", String)
+
+        # this is the coverage per sample that is the max we will analyse. It will automatically
+        # multiplied by the amount of input bams we get
+        self.input("skipCov", Int(optional=True), default=500)
+
+        # the same is true for min cov
+        self.input("minCov", Int(optional=True), default=10)
+
+        # this should be a conditional (if the callregions are supplied we use them, otherwise we
+        # create them)
+        self.step(
+            "createCallRegions",
+            CreateCallRegions(
+                reference=self.reference, regionSize=self.regionSize, equalize=True
+            ),
+        )
+
+        self.step(
+            "callVariants",
+            FreeBayes(
+                bams=self.bams,
+                reference=self.reference,
+                pooledDiscreteFlag=True,
+                gtQuals=True,
+                strictFlag=True,
+                pooledContinousFlag=True,
+                reportMaxGLFlag=True,
+                noABPriorsFlag=True,
+                maxNumOfAlleles=4,
+                noPartObsFlag=True,
+                region=self.createCallRegions.regions,
+                # here we multiply the skipCov input by the amount of input that we have
+                skipCov=(self.skipCov * self.bams.length()),
+                # things that are actually default, but janis does not recognize yet
+                useDupFlag=False,
+                minBaseQual=1,
+                minSupMQsum=0,
+                minSupQsum=0,
+                minCov=self.minCov,
+                # now here we are trying to play with the detection limits
+                # we set the fraction to be very low, to include ALL of the sites in a potential analysis
+                minAltFrac=0.01,
+                # and we want at least one sample that has two high quality variants OR multiple
+                # lower quality ones
+                minAltQSum=70,
+                # but we also want to have at least two reads overall with that variants
+                # we do not care if they are between samples or if they are in the same sample, but
+                # 2 is better than one
+                minAltTotal=2,
+            ),
+            scatter="region",
+        )
+        # might actually rewrite this once everything works, to not combine the files here, but do
+        # all of it scattered and then only combine the final output
+        # self.step("combineRegions", VcfCombine(vcf=self.callVariants.out))
+
+        #
+
+        # self.step("compressAll", BGZip(file=self.sortAll.out))
+        # self.step("indexAll", Tabix(file=self.compressAll.out))
+
+        self.step(
+            "callSomatic",
+            CallSomaticFreeBayes(
+                vcf=self.callVariants.out, normalSampleName=self.normalSample
+            ),
+            # added for parallel
+            scatter="vcf",
+        )
+
+        self.step("combineRegions", VcfCombine(vcf=self.callSomatic.out))
+
+        # should not be necessary here, but just to be save
+        self.step(
+            "sortSomatic1",
+            VcfStreamSort(vcf=self.combineRegions.out, inMemoryFlag=True),
+        )
+
+        # no need to compress this here if it leads to problems when we dont have an index for the allelic allelicPrimitves
+        self.step(
+            "normalizeSomatic1",
+            BcfToolsNorm(
+                vcf=self.sortSomatic1.out,
+                reference=self.reference,
+                outputType="v",
+                outputFilename="normalised.vcf",
+            ),
+        )
+
+        self.step(
+            "allelicPrimitves",
+            VcfAllelicPrimitives(
+                vcf=self.normalizeSomatic1.out,
+                tagParsed="DECOMPOSED",
+                keepGenoFlag=True,
+            ),
+        )
+
+        self.step("fixSplitLines", VcfFixUp(vcf=self.allelicPrimitves.out))
+
+        self.step(
+            "sortSomatic2", VcfStreamSort(vcf=self.fixSplitLines.out, inMemoryFlag=True)
+        )
+
+        self.step(
+            "normalizeSomatic2",
+            BcfToolsNorm(
+                vcf=self.sortSomatic2.out,
+                reference=self.reference,
+                outputType="v",
+                outputFilename="normalised.vcf",
+            ),
+        )
+
+        self.step("uniqueAlleles", VcfUniqAlleles(vcf=self.normalizeSomatic2.out))
+
+        self.step(
+            "sortFinal", VcfStreamSort(vcf=self.uniqueAlleles.out, inMemoryFlag=True)
+        )
+
+        self.step("uniqVcf", VcfUniq(vcf=self.sortFinal.out))
+
+        self.step("compressFinal", BGZip(file=self.uniqVcf.out))
+
+        self.step("indexFinal", Tabix(inp=self.compressFinal.out))
+
+        self.output("somaticOutVcf", source=self.indexFinal)
+
+
+if __name__ == "__main__":
+
+    wf = FreeBayesSomaticWorkflow()
+    wdl = wf.translate("wdl", to_console=True, to_disk=False, write_inputs_file=False)