|
| 1 | +from datetime import datetime |
| 2 | +from typing import List, Dict, Any |
| 3 | + |
| 4 | +from janis_core import TOutput, File, OutputDocumentation |
| 5 | +from janis_bioinformatics.tools.bioinformaticstoolbase import BioinformaticsPythonTool |
| 6 | + |
| 7 | + |
| 8 | +class GenerateMantaConfig(BioinformaticsPythonTool): |
| 9 | + @staticmethod |
| 10 | + def code_block(output_filename: str = "output.txt") -> Dict[str, Any]: |
| 11 | + """ |
| 12 | + :param output_filename: Filename to output to |
| 13 | + """ |
| 14 | + with open(output_filename, "w+") as out: |
| 15 | + out.write("\n") |
| 16 | + out.write("#\n") |
| 17 | + out.write( |
| 18 | + "# This section contains all configuration settings for the top-level manta workflow,\n" |
| 19 | + ) |
| 20 | + out.write("#\n") |
| 21 | + out.write("[manta]\n") |
| 22 | + out.write("\n") |
| 23 | + out.write( |
| 24 | + "referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa\n" |
| 25 | + ) |
| 26 | + out.write("\n") |
| 27 | + out.write( |
| 28 | + "# Run discovery and candidate reporting for all SVs/indels at or above this size\n" |
| 29 | + ) |
| 30 | + out.write( |
| 31 | + "# Separate option (to provide different default) used for runs in RNA-mode\n" |
| 32 | + ) |
| 33 | + out.write("minCandidateVariantSize = 8\n") |
| 34 | + out.write("rnaMinCandidateVariantSize = 1000\n") |
| 35 | + out.write("\n") |
| 36 | + out.write( |
| 37 | + "# Remove all edges from the graph unless they're supported by this many 'observations'.\n" |
| 38 | + ) |
| 39 | + out.write( |
| 40 | + "# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.\n" |
| 41 | + ) |
| 42 | + out.write("minEdgeObservations = 3\n") |
| 43 | + out.write("\n") |
| 44 | + out.write( |
| 45 | + "# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.\n" |
| 46 | + ) |
| 47 | + out.write("# Set to 0 to turn this filtration off\n") |
| 48 | + out.write("graphNodeMaxEdgeCount = 10\n") |
| 49 | + out.write("\n") |
| 50 | + out.write( |
| 51 | + "# Run discovery and candidate reporting for all SVs/indels with at least this\n" |
| 52 | + ) |
| 53 | + out.write("# many spanning support observations\n") |
| 54 | + out.write("minCandidateSpanningCount = 3\n") |
| 55 | + out.write("\n") |
| 56 | + out.write( |
| 57 | + "# After candidate identification, only score and report SVs/indels at or above this size:\n" |
| 58 | + ) |
| 59 | + out.write("minScoredVariantSize = 50\n") |
| 60 | + out.write("\n") |
| 61 | + out.write( |
| 62 | + '# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:\n' |
| 63 | + ) |
| 64 | + out.write("minDiploidVariantScore = 10\n") |
| 65 | + out.write("\n") |
| 66 | + out.write( |
| 67 | + '# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:\n' |
| 68 | + ) |
| 69 | + out.write("minPassDiploidVariantScore = 20\n") |
| 70 | + out.write("\n") |
| 71 | + out.write( |
| 72 | + "# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:\n" |
| 73 | + ) |
| 74 | + out.write("minPassDiploidGTScore = 15\n") |
| 75 | + out.write("\n") |
| 76 | + out.write( |
| 77 | + "# somatic quality scores below this level are not included in the somatic vcf:\n" |
| 78 | + ) |
| 79 | + out.write("minSomaticScore = 10\n") |
| 80 | + out.write("\n") |
| 81 | + out.write( |
| 82 | + "# somatic quality scores below this level are filtered in the somatic vcf:\n" |
| 83 | + ) |
| 84 | + out.write("minPassSomaticScore = 30\n") |
| 85 | + out.write("\n") |
| 86 | + out.write( |
| 87 | + "# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote\n" |
| 88 | + ) |
| 89 | + out.write( |
| 90 | + "# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads\n" |
| 91 | + ) |
| 92 | + out.write( |
| 93 | + "# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime\n" |
| 94 | + ) |
| 95 | + out.write( |
| 96 | + "# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read\n" |
| 97 | + ) |
| 98 | + out.write( |
| 99 | + "# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.\n" |
| 100 | + ) |
| 101 | + out.write( |
| 102 | + "# This feature can be enabled/disabled separately for germline and cancer calling below.\n" |
| 103 | + ) |
| 104 | + out.write("#\n") |
| 105 | + out.write( |
| 106 | + '# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes\n' |
| 107 | + ) |
| 108 | + out.write("# all other calling modes.\n") |
| 109 | + out.write("# custom set-up: https://github.com/Illumina/manta/issues/213\n") |
| 110 | + out.write( |
| 111 | + "enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 0\n" |
| 112 | + ) |
| 113 | + out.write( |
| 114 | + "enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0\n" |
| 115 | + ) |
| 116 | + out.write("\n") |
| 117 | + out.write( |
| 118 | + "# Set if an overlapping read pair will be considered as evidence\n" |
| 119 | + ) |
| 120 | + out.write("# Set to 0 to skip overlapping read pairs\n") |
| 121 | + out.write("useOverlapPairEvidence = 0\n") |
| 122 | + out.write("\n") |
| 123 | + return {"out": output_filename} |
| 124 | + |
| 125 | + def outputs(self) -> List[TOutput]: |
| 126 | + return [ |
| 127 | + TOutput( |
| 128 | + "out", |
| 129 | + File, |
| 130 | + doc=OutputDocumentation(doc="Custom Manta config file"), |
| 131 | + ) |
| 132 | + ] |
| 133 | + |
| 134 | + def id(self) -> str: |
| 135 | + return "GenerateMantaConfig" |
| 136 | + |
| 137 | + def friendly_name(self) -> str: |
| 138 | + return "GenerateMantaConfig" |
| 139 | + |
| 140 | + def tool_provider(self): |
| 141 | + return "Peter MacCallum Cancer Centre" |
| 142 | + |
| 143 | + def version(self): |
| 144 | + return "v0.1.0" |
| 145 | + |
| 146 | + def bind_metadata(self): |
| 147 | + self.metadata.dateCreated = datetime(2021, 5, 27) |
| 148 | + self.metadata.dateUpdated = datetime(2021, 5, 27) |
| 149 | + self.metadata.contributors = ["Jiaan Yu"] |
| 150 | + self.metadata.documentation = """\ |
| 151 | +Generate custom manta config file. |
| 152 | + """ |
0 commit comments