Skip to content

Commit e94377a

Browse files
authored
Merge pull request #102 from jiaan-yu/manta-fix
Manta fix
2 parents 0fb3957 + 67424bd commit e94377a

File tree

6 files changed

+195
-13
lines changed

6 files changed

+195
-13
lines changed

janis_bioinformatics/tools/common/splitmultiallele.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import os
22
from datetime import datetime
33
from typing import List, Dict, Any
4-
from janis_core import get_value_for_hints_and_ordered_resource_tuple, ToolMetadata
4+
from janis_core import (
5+
get_value_for_hints_and_ordered_resource_tuple,
6+
ToolMetadata,
7+
UnionType,
8+
)
59
from janis_core.tool.test_classes import TTestCase
610

711
from janis_bioinformatics.data_types import FastaWithDict, CompressedVcf
@@ -77,7 +81,9 @@ def memory(self, hints: Dict[str, Any]):
7781

7882
def inputs(self) -> List[ToolInput]:
7983
return [
80-
ToolInput("vcf", Vcf(), position=1, shell_quote=False),
84+
ToolInput(
85+
"vcf", UnionType(Vcf, CompressedVcf), position=1, shell_quote=False
86+
),
8187
ToolInput(
8288
"reference", FastaWithDict(), prefix="-r", position=4, shell_quote=False
8389
),

janis_bioinformatics/tools/illumina/strelkagermline/base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ def memory(self, hints: Dict[str, Any]):
7373

7474
def inputs(self) -> List[ToolInput]:
7575
return [
76+
ToolInput(
77+
"config",
78+
File(optional=True),
79+
prefix="--config",
80+
position=1,
81+
shell_quote=False,
82+
doc="provide a configuration file to override defaults in \
83+
global config file \
84+
(/opt/strelka/bin/configureStrelkaGermlineWorkflow.py.ini)",
85+
),
7686
ToolInput(
7787
"bam",
7888
BamBai(),

janis_bioinformatics/tools/pmac/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@
1818
from .generatintervalsbychromosome.generateintervalsbychromosome import (
1919
GenerateIntervalsByChromosome,
2020
)
21+
from .generatemantaconfig import GenerateMantaConfig
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
from datetime import datetime
2+
from typing import List, Dict, Any
3+
4+
from janis_core import TOutput, File, OutputDocumentation
5+
from janis_bioinformatics.tools.bioinformaticstoolbase import BioinformaticsPythonTool
6+
7+
8+
class GenerateMantaConfig(BioinformaticsPythonTool):
9+
@staticmethod
10+
def code_block(output_filename: str = "output.txt") -> Dict[str, Any]:
11+
"""
12+
:param output_filename: Filename to output to
13+
"""
14+
with open(output_filename, "w+") as out:
15+
out.write("\n")
16+
out.write("#\n")
17+
out.write(
18+
"# This section contains all configuration settings for the top-level manta workflow,\n"
19+
)
20+
out.write("#\n")
21+
out.write("[manta]\n")
22+
out.write("\n")
23+
out.write(
24+
"referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa\n"
25+
)
26+
out.write("\n")
27+
out.write(
28+
"# Run discovery and candidate reporting for all SVs/indels at or above this size\n"
29+
)
30+
out.write(
31+
"# Separate option (to provide different default) used for runs in RNA-mode\n"
32+
)
33+
out.write("minCandidateVariantSize = 8\n")
34+
out.write("rnaMinCandidateVariantSize = 1000\n")
35+
out.write("\n")
36+
out.write(
37+
"# Remove all edges from the graph unless they're supported by this many 'observations'.\n"
38+
)
39+
out.write(
40+
"# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.\n"
41+
)
42+
out.write("minEdgeObservations = 3\n")
43+
out.write("\n")
44+
out.write(
45+
"# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.\n"
46+
)
47+
out.write("# Set to 0 to turn this filtration off\n")
48+
out.write("graphNodeMaxEdgeCount = 10\n")
49+
out.write("\n")
50+
out.write(
51+
"# Run discovery and candidate reporting for all SVs/indels with at least this\n"
52+
)
53+
out.write("# many spanning support observations\n")
54+
out.write("minCandidateSpanningCount = 3\n")
55+
out.write("\n")
56+
out.write(
57+
"# After candidate identification, only score and report SVs/indels at or above this size:\n"
58+
)
59+
out.write("minScoredVariantSize = 50\n")
60+
out.write("\n")
61+
out.write(
62+
'# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:\n'
63+
)
64+
out.write("minDiploidVariantScore = 10\n")
65+
out.write("\n")
66+
out.write(
67+
'# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:\n'
68+
)
69+
out.write("minPassDiploidVariantScore = 20\n")
70+
out.write("\n")
71+
out.write(
72+
"# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:\n"
73+
)
74+
out.write("minPassDiploidGTScore = 15\n")
75+
out.write("\n")
76+
out.write(
77+
"# somatic quality scores below this level are not included in the somatic vcf:\n"
78+
)
79+
out.write("minSomaticScore = 10\n")
80+
out.write("\n")
81+
out.write(
82+
"# somatic quality scores below this level are filtered in the somatic vcf:\n"
83+
)
84+
out.write("minPassSomaticScore = 30\n")
85+
out.write("\n")
86+
out.write(
87+
"# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote\n"
88+
)
89+
out.write(
90+
"# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads\n"
91+
)
92+
out.write(
93+
"# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime\n"
94+
)
95+
out.write(
96+
"# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read\n"
97+
)
98+
out.write(
99+
"# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.\n"
100+
)
101+
out.write(
102+
"# This feature can be enabled/disabled separately for germline and cancer calling below.\n"
103+
)
104+
out.write("#\n")
105+
out.write(
106+
'# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes\n'
107+
)
108+
out.write("# all other calling modes.\n")
109+
out.write("# custom set-up: https://github.com/Illumina/manta/issues/213\n")
110+
out.write(
111+
"enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 0\n"
112+
)
113+
out.write(
114+
"enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0\n"
115+
)
116+
out.write("\n")
117+
out.write(
118+
"# Set if an overlapping read pair will be considered as evidence\n"
119+
)
120+
out.write("# Set to 0 to skip overlapping read pairs\n")
121+
out.write("useOverlapPairEvidence = 0\n")
122+
out.write("\n")
123+
return {"out": output_filename}
124+
125+
def outputs(self) -> List[TOutput]:
126+
return [
127+
TOutput(
128+
"out",
129+
File,
130+
doc=OutputDocumentation(doc="Custom Manta config file"),
131+
)
132+
]
133+
134+
def id(self) -> str:
135+
return "GenerateMantaConfig"
136+
137+
def friendly_name(self) -> str:
138+
return "GenerateMantaConfig"
139+
140+
def tool_provider(self):
141+
return "Peter MacCallum Cancer Centre"
142+
143+
def version(self):
144+
return "v0.1.0"
145+
146+
def bind_metadata(self):
147+
self.metadata.dateCreated = datetime(2021, 5, 27)
148+
self.metadata.dateUpdated = datetime(2021, 5, 27)
149+
self.metadata.contributors = ["Jiaan Yu"]
150+
self.metadata.documentation = """\
151+
Generate custom manta config file.
152+
"""

janis_bioinformatics/tools/variantcallers/illuminagermline_strelka.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
from datetime import datetime
2-
from janis_core import Boolean, WorkflowMetadata
2+
from janis_core import Boolean, WorkflowMetadata, File
33
from janis_unix.tools import UncompressArchive
44

5-
from janis_bioinformatics.data_types import FastaWithDict, BamBai, BedTabix, Vcf
5+
from janis_bioinformatics.data_types import (
6+
FastaWithDict,
7+
BamBai,
8+
BedTabix,
9+
CompressedVcf,
10+
)
611
from janis_bioinformatics.tools import BioinformaticsWorkflow
712
from janis_bioinformatics.tools.common import SplitMultiAllele
8-
from janis_bioinformatics.tools.htslib import BGZipLatest, TabixLatest
913
from janis_bioinformatics.tools.illumina import StrelkaGermline_2_9_10, Manta_1_5_0
1014
from janis_bioinformatics.tools.vcftools import VcfToolsvcftoolsLatest
1115

@@ -27,8 +31,12 @@ def constructor(self):
2731

2832
self.input("bam", BamBai)
2933
self.input("reference", FastaWithDict)
34+
35+
# optional
3036
self.input("intervals", BedTabix(optional=True))
3137
self.input("is_exome", Boolean(optional=True))
38+
self.input("manta_config", File(optional=True))
39+
self.input("strelka_config", File(optional=True))
3240

3341
self.step(
3442
"manta",
@@ -37,6 +45,7 @@ def constructor(self):
3745
reference=self.reference,
3846
callRegions=self.intervals,
3947
exome=self.is_exome,
48+
config=self.manta_config,
4049
),
4150
)
4251

@@ -45,18 +54,18 @@ def constructor(self):
4554
StrelkaGermline_2_9_10(
4655
bam=self.bam,
4756
reference=self.reference,
48-
indelCandidates=self.manta.candidateSmallIndels,
4957
callRegions=self.intervals,
5058
exome=self.is_exome,
59+
config=self.strelka_config,
5160
),
5261
)
5362

5463
# normalise and filter "PASS" variants
55-
self.step("uncompressvcf", UncompressArchive(file=self.strelka.variants))
5664
self.step(
5765
"splitnormalisevcf",
5866
SplitMultiAllele(
59-
vcf=self.uncompressvcf.out.as_type(Vcf), reference=self.reference
67+
vcf=self.strelka.variants.as_type(CompressedVcf),
68+
reference=self.reference,
6069
),
6170
)
6271

@@ -78,7 +87,7 @@ def bind_metadata(self):
7887
return WorkflowMetadata(
7988
contributors=["Jiaan Yu", "Michael Franklin"],
8089
dateCreated=datetime(2019, 3, 28),
81-
dateUpdated=datetime(2020, 7, 14),
90+
dateUpdated=datetime(2021, 5, 27),
8291
documentation="",
8392
)
8493

janis_bioinformatics/tools/variantcallers/illuminasomatic_strelka.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from datetime import datetime
2-
from janis_core import Boolean, WorkflowMetadata
2+
from janis_core import Boolean, WorkflowMetadata, File
33
from janis_unix.tools import UncompressArchive
44

55
from janis_bioinformatics.data_types import FastaWithDict, BamBai, BedTabix
@@ -31,11 +31,13 @@ def constructor(self):
3131

3232
self.input("normal_bam", BamBai)
3333
self.input("tumor_bam", BamBai)
34-
3534
self.input("reference", FastaWithDict)
36-
self.input("intervals", BedTabix(optional=True))
3735

36+
# optional
37+
self.input("intervals", BedTabix(optional=True))
3838
self.input("is_exome", Boolean(optional=True))
39+
self.input("manta_config", File(optional=True))
40+
self.input("strelka_config", File(optional=True))
3941

4042
self.step(
4143
"manta",
@@ -45,6 +47,7 @@ def constructor(self):
4547
reference=self.reference,
4648
callRegions=self.intervals,
4749
exome=self.is_exome,
50+
config=self.manta_config,
4851
),
4952
)
5053
self.step(
@@ -56,6 +59,7 @@ def constructor(self):
5659
reference=self.reference,
5760
callRegions=self.intervals,
5861
exome=self.is_exome,
62+
config=self.strelka_config,
5963
),
6064
)
6165
self.step(
@@ -93,6 +97,6 @@ def bind_metadata(self):
9397
return WorkflowMetadata(
9498
contributors=["Jiaan Yu", "Michael Franklin"],
9599
dateCreated=datetime(2020, 6, 12),
96-
dateUpdated=datetime(2020, 8, 25),
100+
dateUpdated=datetime(2021, 5, 27),
97101
documentation="",
98102
)

0 commit comments

Comments
 (0)