Skip to content

Commit f98b36b

Browse files
committed
Add tests
1 parent 819dab2 commit f98b36b

File tree

2 files changed

+157
-12
lines changed

2 files changed

+157
-12
lines changed

genmod/annotate_models/models/compound_model.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,11 @@
1212

1313
from __future__ import print_function
1414
from genmod.vcf_tools.genotype import Genotype
15-
from intervaltree import IntervalTree
1615
import logging
1716
from typing import Union
1817

19-
# TODO: What is family?
20-
# TODO: Remove intervals
21-
def check_compounds(variant_1: dict, variant_2: dict, family, intervals: IntervalTree, phased: bool) -> bool:
18+
19+
def check_compounds(variant_1: dict, variant_2: dict, family, phased: bool) -> bool:
2220
"""
2321
Check if two variants of a pair follow the compound heterozygous model.
2422
@@ -56,8 +54,12 @@ def check_compounds(variant_1: dict, variant_2: dict, family, intervals: Interva
5654
for parent_id in (individual.mother, individual.father):
5755
if parent_id != "0":
5856
parent = family.individuals[parent_id]
59-
parent_genotypes = [get_genotype(variant, parent_id) for variant in (variant_1, variant_2)]
60-
if parent.healthy and all(genotype.has_variant for genotype in parent_genotypes):
57+
parent_genotypes = [
58+
get_genotype(variant, parent_id) for variant in (variant_1, variant_2)
59+
]
60+
if parent.healthy and all(
61+
genotype.has_variant for genotype in parent_genotypes
62+
):
6163
return False
6264

6365
genotype_1 = get_genotype(variant_1, individual_id)
@@ -69,13 +71,17 @@ def check_compounds(variant_1: dict, variant_2: dict, family, intervals: Interva
6971
return False
7072

7173
# If the individual is affected and phased we can say that the variants need to be on different alleles, otherwise it can not be a compound pair.
72-
if individual.affected and phased and variants_on_same_allele(individual.individual_id, variant_1, variant_2):
74+
if (
75+
individual.affected
76+
and phased
77+
and variants_on_same_allele(individual.individual_id, variant_1, variant_2)
78+
):
7379
return False
7480

7581
# If the individual is affected and not phased we can not say anything about the phase, so we say it is a compound pair, since it could be a compound pair.
7682
return True
7783

78-
# TODO: Write a test for this
84+
7985
def get_genotype(variant: dict, individual_id: str) -> Genotype:
8086
"""
8187
Return the Genotype object for a variants for a given individual.
@@ -89,7 +95,7 @@ def get_genotype(variant: dict, individual_id: str) -> Genotype:
8995
"""
9096
return variant["genotypes"][individual_id]
9197

92-
# TODO: Write a test for this
98+
9399
def get_phase_set(variant_dict: dict, sample_id: str) -> Union[str, None]:
94100
"""
95101
Extracts the PS (phase set) field for a given sample from a VCF-like variant dictionary.
@@ -106,7 +112,7 @@ def get_phase_set(variant_dict: dict, sample_id: str) -> Union[str, None]:
106112
field_map = dict(zip(format_fields, sample_values))
107113
return field_map.get("PS")
108114

109-
# TODO: Write a test for this
115+
110116
def variants_on_same_allele(individual_id: str, variant_1: dict, variant_2: dict) -> bool:
111117
"""
112118
Determine if two phased variants are on the same haplotype for a given individual.
@@ -130,13 +136,13 @@ def variants_on_same_allele(individual_id: str, variant_1: dict, variant_2: dict
130136

131137
same_phase = get_phase_set(variant_1, individual_id) == get_phase_set(variant_2, individual_id)
132138
overlapping_allele = (
133-
genotype_1.allele_1 == genotype_2.allele_1 or
134-
genotype_1.allele_2 == genotype_2.allele_2
139+
genotype_1.allele_1 == genotype_2.allele_1 or genotype_1.allele_2 == genotype_2.allele_2
135140
)
136141

137142
# Variants are on the same allele if they overlap in the same haplotype and are in the same phase set
138143
return same_phase and overlapping_allele
139144

145+
140146
def main():
141147
pass
142148

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
from ped_parser import FamilyParser
2+
3+
from genmod.annotate_models.models.compound_model import (
4+
check_compounds,
5+
get_genotype,
6+
get_phase_set,
7+
variants_on_same_allele,
8+
)
9+
from genmod.vcf_tools import Genotype
10+
11+
12+
def get_family(family_lines):
13+
return FamilyParser(family_lines)
14+
15+
16+
def make_variant(*, genotypes, format_str="GT:PS", sample_fields=None):
17+
variant = {"genotypes": dict(genotypes), "FORMAT": format_str}
18+
if sample_fields:
19+
variant.update(sample_fields)
20+
return variant
21+
22+
23+
def test_get_genotype_returns_expected_object():
24+
genotype = Genotype(**{"GT": "0/1"})
25+
variant = {"genotypes": {"sample": genotype}}
26+
assert get_genotype(variant, "sample") is genotype
27+
28+
29+
def test_get_phase_set_returns_ps_when_present():
30+
variant = {"FORMAT": "GT:AD:PS", "sample": "0/1:10,5:12345"}
31+
assert get_phase_set(variant, "sample") == "12345"
32+
33+
34+
def test_get_phase_set_returns_none_when_ps_missing_in_format():
35+
variant = {"FORMAT": "GT:AD", "sample": "0/1:10,5"}
36+
assert get_phase_set(variant, "sample") is None
37+
38+
39+
def test_get_phase_set_returns_none_when_sample_missing_or_unset():
40+
variant_missing_sample = {"FORMAT": "GT:PS"}
41+
assert get_phase_set(variant_missing_sample, "sample") is None
42+
43+
variant_empty_sample = {"FORMAT": "GT:PS", "sample": ""}
44+
assert get_phase_set(variant_empty_sample, "sample") is None
45+
46+
variant_short_sample = {"FORMAT": "GT:PS", "sample": "0|1"}
47+
assert get_phase_set(variant_short_sample, "sample") is None
48+
49+
50+
def test_variants_on_same_allele_true_same_ps_and_overlap():
51+
variant_1 = make_variant(
52+
genotypes={"sample": Genotype(**{"GT": "0|1"})},
53+
sample_fields={"sample": "0|1:PS1"},
54+
)
55+
variant_2 = make_variant(
56+
genotypes={"sample": Genotype(**{"GT": "0|1"})},
57+
sample_fields={"sample": "0|1:PS1"},
58+
)
59+
assert variants_on_same_allele("sample", variant_1, variant_2) is True
60+
61+
62+
def test_variants_on_same_allele_false_different_phase_set():
63+
variant_1 = make_variant(
64+
genotypes={"sample": Genotype(**{"GT": "0|1"})},
65+
sample_fields={"sample": "0|1:1"},
66+
)
67+
variant_2 = make_variant(
68+
genotypes={"sample": Genotype(**{"GT": "0|1"})},
69+
sample_fields={"sample": "0|1:2"},
70+
)
71+
assert variants_on_same_allele("sample", variant_1, variant_2) is False
72+
73+
74+
def test_variants_on_same_allele_false_same_phase_set_no_overlap():
75+
variant_1 = make_variant(
76+
genotypes={"sample": Genotype(**{"GT": "0|1"})},
77+
sample_fields={"sample": "0|1:1"},
78+
)
79+
variant_2 = make_variant(
80+
genotypes={"sample": Genotype(**{"GT": "1|0"})},
81+
sample_fields={"sample": "1|0:1"},
82+
)
83+
assert variants_on_same_allele("sample", variant_1, variant_2) is False
84+
85+
86+
def test_check_compounds_false_when_affected_phased_same_allele():
87+
family_lines = [
88+
"#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n",
89+
"1\tchild\tfather\tmother\t1\t2\n",
90+
"1\tfather\t0\t0\t1\t1\n",
91+
"1\tmother\t0\t0\t2\t1\n",
92+
]
93+
family = get_family(family_lines)
94+
95+
variant_1 = make_variant(
96+
genotypes={
97+
"child": Genotype(**{"GT": "0|1"}),
98+
"father": Genotype(**{"GT": "0|0"}),
99+
"mother": Genotype(**{"GT": "0|0"}),
100+
},
101+
sample_fields={"child": "0|1:1"},
102+
)
103+
variant_2 = make_variant(
104+
genotypes={
105+
"child": Genotype(**{"GT": "0|1"}),
106+
"father": Genotype(**{"GT": "0|0"}),
107+
"mother": Genotype(**{"GT": "0|0"}),
108+
},
109+
sample_fields={"child": "0|1:1"},
110+
)
111+
112+
assert check_compounds(variant_1, variant_2, family=family, phased=True) is False
113+
114+
115+
def test_check_compounds_false_when_healthy_parent_has_both_variants():
116+
family_lines = [
117+
"#FamilyID\tSampleID\tFather\tMother\tSex\tPhenotype\n",
118+
"1\tchild\tfather\tmother\t1\t2\n",
119+
"1\tfather\t0\t0\t1\t1\n",
120+
"1\tmother\t0\t0\t2\t1\n",
121+
]
122+
family = get_family(family_lines)
123+
124+
variant_1 = make_variant(
125+
genotypes={
126+
"child": Genotype(**{"GT": "0/1"}),
127+
"father": Genotype(**{"GT": "0/1"}),
128+
"mother": Genotype(**{"GT": "0/0"}),
129+
}
130+
)
131+
variant_2 = make_variant(
132+
genotypes={
133+
"child": Genotype(**{"GT": "0/1"}),
134+
"father": Genotype(**{"GT": "0/1"}),
135+
"mother": Genotype(**{"GT": "0/0"}),
136+
}
137+
)
138+
139+
assert check_compounds(variant_1, variant_2, family=family, phased=False) is False

0 commit comments

Comments
 (0)