Skip to content

Commit 21fdb59

Browse files
author
SebastianHollizeck
committed
major restructure
1 parent 2c09938 commit 21fdb59

15 files changed

+712
-22
lines changed
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1+
#tools import
12
from .refilterstrelka2calls.latest import RefilterStrelka2CallsLatest
23
from .refilterstrelka2calls.refilterstrelka2calls_0_1 import RefilterStrelka2Calls_0_1
34

45
from .callsomaticfreebayes.latest import CallSomaticFreeBayesLatest
56
from .callsomaticfreebayes.callsomaticfreebayes_0_1 import CallSomaticFreeBayes_0_1
67

7-
from .workflows.strelka2passworkflow import Strelka2PassWorkflow
8-
from .workflows.freebayessomaticworkflow import FreeBayesSomaticWorkflow
9-
from .workflows.mutectjointsomaticworkflow import Mutect2JointSomaticWorkflow
10-
118
from .createcallregions.base import CreateCallRegions
9+
10+
# workflow import
11+
from .workflows import *
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1 @@
1-
# from .strelka2passworkflow import Strelka2PassWorkflow
2-
3-
# from .strelka2passanalysisstep1 import Strelka2PassWorkflowStep1
4-
# from .strelka2passanalysisstep2 import Strelka2PassWorkflowStep2
1+
from .variantcalling import *
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .multisample import *
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .freebayessomaticworkflow import FreeBayesSomaticWorkflow
2+
from .freebayessomaticworkflow_cram import FreeBayesSomaticWorkflowCram
3+
4+
from .mutect2jointsomaticworkflow import Mutect2JointSomaticWorkflow
5+
from .mutect2jointsomaticworkflow_cram import Mutect2JointSomaticWorkflowCram
6+
7+
from .strelka2passworkflow import Strelka2PassWorkflow
8+
from .strelka2passworkflow_cram import Strelka2PassWorkflowCram

janis_bioinformatics/tools/dawson/workflows/freebayessomaticworkflow.py renamed to janis_bioinformatics/tools/dawson/workflows/variantcalling/multisample/freebayessomaticworkflow.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
from datetime import date
22

3-
from janis_bioinformatics.data_types import CramCrai, FastaFai
3+
from janis_bioinformatics.data_types import BamBai, FastaFai
44
from janis_bioinformatics.tools import BioinformaticsWorkflow
55
from janis_bioinformatics.tools.bcftools import BcfToolsNormLatest as BcfToolsNorm
66
from janis_bioinformatics.tools.dawson import (
77
CallSomaticFreeBayes_0_1 as CallSomaticFreeBayes,
88
)
99
from janis_bioinformatics.tools.dawson.createcallregions.base import CreateCallRegions
10-
from janis_bioinformatics.tools.freebayes.versions import FreeBayesCram_1_3 as FreeBayes
10+
from janis_bioinformatics.tools.freebayes.versions import FreeBayes_1_3 as FreeBayes
1111
from janis_bioinformatics.tools.htslib import BGZipLatest as BGZip, TabixLatest as Tabix
1212
from janis_bioinformatics.tools.vcflib import (
1313
VcfAllelicPrimitivesLatest as VcfAllelicPrimitives,
@@ -36,7 +36,7 @@ def version(self):
3636
def bind_metadata(self):
3737
self.metadata.version = "0.1"
3838
self.metadata.dateCreated = date(2019, 10, 18)
39-
self.metadata.dateUpdated = date(2019, 10, 25)
39+
self.metadata.dateUpdated = date(2020, 12, 10)
4040

4141
self.contributors = ["Sebastian Hollizeck"]
4242
self.metadata.keywords = [
@@ -54,7 +54,7 @@ def bind_metadata(self):
5454

5555
def constructor(self):
5656

57-
self.input("bams", Array(CramCrai))
57+
self.input("bams", Array(BamBai))
5858

5959
self.input("reference", FastaFai)
6060
self.input("regionSize", int, default=10000000)
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
from datetime import date
2+
3+
from janis_bioinformatics.data_types import CramCrai, FastaFai
4+
from janis_bioinformatics.tools import BioinformaticsWorkflow
5+
from janis_bioinformatics.tools.bcftools import BcfToolsNormLatest as BcfToolsNorm
6+
from janis_bioinformatics.tools.dawson import (
7+
CallSomaticFreeBayes_0_1 as CallSomaticFreeBayes,
8+
)
9+
from janis_bioinformatics.tools.dawson.createcallregions.base import CreateCallRegions
10+
from janis_bioinformatics.tools.freebayes.versions import FreeBayesCram_1_3 as FreeBayes
11+
from janis_bioinformatics.tools.htslib import BGZipLatest as BGZip, TabixLatest as Tabix
12+
from janis_bioinformatics.tools.vcflib import (
13+
VcfAllelicPrimitivesLatest as VcfAllelicPrimitives,
14+
VcfCombineLatest as VcfCombine,
15+
VcfFixUpLatest as VcfFixUp,
16+
VcfStreamSortLatest as VcfStreamSort,
17+
VcfUniqAllelesLatest as VcfUniqAlleles,
18+
VcfUniqLatest as VcfUniq,
19+
)
20+
from janis_core import Array, Int, String
21+
22+
23+
class FreeBayesSomaticWorkflowCram(BioinformaticsWorkflow):
24+
def id(self):
25+
return "FreeBayesSomaticWorkflowCram"
26+
27+
def friendly_name(self):
28+
return "Freebayes somatic workflow (CRAM)"
29+
30+
def tool_provider(self):
31+
return "Dawson Labs"
32+
33+
def version(self):
34+
return "0.1"
35+
36+
def bind_metadata(self):
37+
self.metadata.version = "0.1"
38+
self.metadata.dateCreated = date(2019, 10, 18)
39+
self.metadata.dateUpdated = date(2020, 12, 10)
40+
41+
self.contributors = ["Sebastian Hollizeck"]
42+
self.metadata.keywords = [
43+
"variants",
44+
"freebayes",
45+
"variant caller",
46+
"multi sample",
47+
]
48+
self.metadata.documentation = """
49+
This workflow uses the capabilities of freebayes to output all variants independent of the
50+
diploid model which then in turn allows us to create a likelihood based difference between
51+
the normal sample and an arbitrary amount of samples.
52+
This allows a joint somatic genotyping of multiple samples of the same individual.
53+
""".strip()
54+
55+
def constructor(self):
56+
57+
self.input("bams", Array(CramCrai))
58+
59+
self.input("reference", FastaFai)
60+
self.input("regionSize", int, default=10000000)
61+
62+
self.input("normalSample", String)
63+
64+
# this is the coverage per sample that is the max we will analyse. It will automatically
65+
# multiplied by the amount of input bams we get
66+
self.input("skipCov", Int(optional=True), default=500)
67+
68+
# the same is true for min cov
69+
self.input("minCov", Int(optional=True), default=10)
70+
71+
# this should be a conditional (if the callregions are supplied we use them, otherwise we
72+
# create them)
73+
self.step(
74+
"createCallRegions",
75+
CreateCallRegions(
76+
reference=self.reference, regionSize=self.regionSize, equalize=True
77+
),
78+
)
79+
80+
self.step(
81+
"callVariants",
82+
FreeBayes(
83+
bams=self.bams,
84+
reference=self.reference,
85+
pooledDiscreteFlag=True,
86+
gtQuals=True,
87+
strictFlag=True,
88+
pooledContinousFlag=True,
89+
reportMaxGLFlag=True,
90+
noABPriorsFlag=True,
91+
maxNumOfAlleles=4,
92+
noPartObsFlag=True,
93+
region=self.createCallRegions.regions,
94+
# here we multiply the skipCov input by the amount of input that we have
95+
skipCov=(self.skipCov * self.bams.length()),
96+
# things that are actually default, but janis does not recognize yet
97+
useDupFlag=False,
98+
minBaseQual=1,
99+
minSupMQsum=0,
100+
minSupQsum=0,
101+
minCov=self.minCov,
102+
# now here we are trying to play with the detection limits
103+
# we set the fraction to be very low, to include ALL of the sites in a potential analysis
104+
minAltFrac=0.01,
105+
# and we want at least one sample that has two high quality variants OR multiple
106+
# lower quality ones
107+
minAltQSum=70,
108+
# but we also want to have at least two reads overall with that variants
109+
# we do not care if they are between samples or if they are in the same sample, but
110+
# 2 is better than one
111+
minAltTotal=2,
112+
),
113+
scatter="region",
114+
)
115+
# might actually rewrite this once everything works, to not combine the files here, but do
116+
# all of it scattered and then only combine the final output
117+
# self.step("combineRegions", VcfCombine(vcf=self.callVariants.out))
118+
119+
#
120+
121+
# self.step("compressAll", BGZip(file=self.sortAll.out))
122+
# self.step("indexAll", Tabix(file=self.compressAll.out))
123+
124+
self.step(
125+
"callSomatic",
126+
CallSomaticFreeBayes(
127+
vcf=self.callVariants.out, normalSampleName=self.normalSample
128+
),
129+
# added for parallel
130+
scatter="vcf",
131+
)
132+
133+
self.step("combineRegions", VcfCombine(vcf=self.callSomatic.out))
134+
135+
# should not be necessary here, but just to be save
136+
self.step(
137+
"sortSomatic1",
138+
VcfStreamSort(vcf=self.combineRegions.out, inMemoryFlag=True),
139+
)
140+
141+
# no need to compress this here if it leads to problems when we dont have an index for the allelic allelicPrimitves
142+
self.step(
143+
"normalizeSomatic1",
144+
BcfToolsNorm(
145+
vcf=self.sortSomatic1.out,
146+
reference=self.reference,
147+
outputType="v",
148+
outputFilename="normalised.vcf",
149+
),
150+
)
151+
152+
self.step(
153+
"allelicPrimitves",
154+
VcfAllelicPrimitives(
155+
vcf=self.normalizeSomatic1.out,
156+
tagParsed="DECOMPOSED",
157+
keepGenoFlag=True,
158+
),
159+
)
160+
161+
self.step("fixSplitLines", VcfFixUp(vcf=self.allelicPrimitves.out))
162+
163+
self.step(
164+
"sortSomatic2", VcfStreamSort(vcf=self.fixSplitLines.out, inMemoryFlag=True)
165+
)
166+
167+
self.step(
168+
"normalizeSomatic2",
169+
BcfToolsNorm(
170+
vcf=self.sortSomatic2.out,
171+
reference=self.reference,
172+
outputType="v",
173+
outputFilename="normalised.vcf",
174+
),
175+
)
176+
177+
self.step("uniqueAlleles", VcfUniqAlleles(vcf=self.normalizeSomatic2.out))
178+
179+
self.step(
180+
"sortFinal", VcfStreamSort(vcf=self.uniqueAlleles.out, inMemoryFlag=True)
181+
)
182+
183+
self.step("uniqVcf", VcfUniq(vcf=self.sortFinal.out))
184+
185+
self.step("compressFinal", BGZip(file=self.uniqVcf.out))
186+
187+
self.step("indexFinal", Tabix(inp=self.compressFinal.out))
188+
189+
self.output("somaticOutVcf", source=self.indexFinal)
190+
191+
192+
if __name__ == "__main__":
193+
194+
wf = FreeBayesSomaticWorkflow()
195+
wdl = wf.translate("wdl", to_console=True, to_disk=False, write_inputs_file=False)

0 commit comments

Comments
 (0)