Skip to content

Commit ed95cac

Browse files
committed
Add tests to check LS HMM of tskit compared to BEAGLE
1 parent b6f9872 commit ed95cac

File tree

1 file changed

+159
-0
lines changed

1 file changed

+159
-0
lines changed

python/tests/test_imputation.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
from io import StringIO
2+
3+
import pandas as pd
4+
5+
6+
"""
7+
A tree sequence containing 3 diploid individuals with 5 sites and 5 mutations
8+
(one per site). The first 2 individuals are used as reference panel,
9+
the last one is the target individual.
10+
"""
11+
_toy_ts_text = """
12+
left right parent child metadata
13+
0.000000 1000000.000000 6 0
14+
0.000000 1000000.000000 6 3
15+
0.000000 1000000.000000 7 2
16+
0.000000 1000000.000000 7 5
17+
0.000000 1000000.000000 8 1
18+
0.000000 1000000.000000 8 4
19+
0.000000 781157.000000 9 6
20+
0.000000 781157.000000 9 7
21+
0.000000 505438.000000 10 8
22+
0.000000 505438.000000 10 9
23+
505438.000000 549484.000000 11 8
24+
505438.000000 549484.000000 11 9
25+
781157.000000 1000000.000000 12 6
26+
781157.000000 1000000.000000 12 7
27+
549484.000000 1000000.000000 13 8
28+
549484.000000 781157.000000 13 9
29+
781157.000000 1000000.000000 13 12
30+
id flags location parents metadata
31+
0 0
32+
1 0
33+
2 0
34+
site node time derived_state parent metadata
35+
0 9 unknown G -1
36+
1 8 unknown A -1
37+
2 9 unknown T -1
38+
3 9 unknown C -1
39+
4 12 unknown C -1
40+
id is_sample time population individual metadata
41+
0 1 0.000000 0 0
42+
1 1 0.000000 0 0
43+
2 1 0.000000 0 1
44+
3 1 0.000000 0 1
45+
4 1 0.000000 0 2
46+
5 1 0.000000 0 2
47+
6 0 0.029768 0 -1
48+
7 0 0.133017 0 -1
49+
8 0 0.223233 0 -1
50+
9 0 0.651586 0 -1
51+
10 0 0.698831 0 -1
52+
11 0 2.114867 0 -1
53+
12 0 4.322031 0 -1
54+
13 0 7.432311 0 -1
55+
position ancestral_state metadata
56+
200000.000000 A
57+
300000.000000 C
58+
520000.000000 G
59+
600000.000000 T
60+
900000.000000 A
61+
"""
62+
63+
"""
64+
BEAGLE 4.1 was run on the toy data set above using default parameters.
65+
The following are the forward probability matrices and backward probability
66+
matrices calculated when imputing into the third individual above.
67+
Note that there are two sets of matrices, one for each haplotype.
68+
"""
69+
_fwd_matrix_text_1 = """
70+
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,fwdVal
71+
0,0,0.000000,1.000000,0.999900,0.000100,1,0,0.000000,1.000000,0.000100,0.000100
72+
0,1,0.000000,1.000000,0.999900,0.000100,0,0,0.000000,1.000000,1.000000,0.999900
73+
0,2,0.000000,1.000000,0.999900,0.000100,1,0,0.000000,1.000000,1.000100,0.000100
74+
0,3,0.000000,1.000000,0.999900,0.000100,1,0,0.000000,1.000000,1.000200,0.000100
75+
1,0,1.000000,0.000000,0.999900,0.000100,0,1,0.250000,0.000000,0.000025,0.000025
76+
1,1,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.250000,0.249975
77+
1,2,1.000000,0.000000,0.999900,0.000100,0,1,0.250000,0.000000,0.250025,0.000025
78+
1,3,1.000000,0.000000,0.999900,0.000100,0,1,0.250000,0.000000,0.250050,0.000025
79+
2,0,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.000025,0.000025
80+
2,1,1.000000,0.000000,0.999900,0.000100,0,0,0.250000,0.000000,0.250000,0.249975
81+
2,2,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.250025,0.000025
82+
2,3,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.250050,0.000025
83+
3,0,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.000025,0.000025
84+
3,1,1.000000,0.000000,0.999900,0.000100,0,0,0.250000,0.000000,0.250000,0.249975
85+
3,2,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.250025,0.000025
86+
3,3,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.250050,0.000025
87+
"""
88+
89+
_bwd_matrix_text_1 = """
90+
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,bwdVal
91+
2,0,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
92+
2,1,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
93+
2,2,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
94+
2,3,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
95+
1,0,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
96+
1,1,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
97+
1,2,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
98+
1,3,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
99+
0,0,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.250050,0.250000
100+
0,1,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.250050,0.250000
101+
0,2,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.250050,0.250000
102+
0,3,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.250050,0.250000
103+
"""
104+
105+
_fwd_matrix_text_2 = """
106+
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,fwdVal
107+
0,0,0.000000,1.000000,0.999900,0.000100,1,1,0.000000,1.000000,0.999900,0.999900
108+
0,1,0.000000,1.000000,0.999900,0.000100,0,1,0.000000,1.000000,1.000000,0.000100
109+
0,2,0.000000,1.000000,0.999900,0.000100,1,1,0.000000,1.000000,1.999900,0.999900
110+
0,3,0.000000,1.000000,0.999900,0.000100,1,1,0.000000,1.000000,2.999800,0.999900
111+
1,0,1.000000,0.000000,0.999900,0.000100,0,0,0.250000,0.000000,0.249975,0.249975
112+
1,1,1.000000,0.000000,0.999900,0.000100,1,0,0.250000,0.000000,0.250000,0.000025
113+
1,2,1.000000,0.000000,0.999900,0.000100,0,0,0.250000,0.000000,0.499975,0.249975
114+
1,3,1.000000,0.000000,0.999900,0.000100,0,0,0.250000,0.000000,0.749950,0.249975
115+
2,0,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.249975,0.249975
116+
2,1,1.000000,0.000000,0.999900,0.000100,0,1,0.250000,0.000000,0.250000,0.000025
117+
2,2,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.499975,0.249975
118+
2,3,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.749950,0.249975
119+
3,0,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.249975,0.249975
120+
3,1,1.000000,0.000000,0.999900,0.000100,0,1,0.250000,0.000000,0.250000,0.000025
121+
3,2,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.499975,0.249975
122+
3,3,1.000000,0.000000,0.999900,0.000100,1,1,0.250000,0.000000,0.749950,0.249975
123+
"""
124+
125+
_bwd_matrix_text_2 = """
126+
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,bwdVal
127+
2,0,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
128+
2,1,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
129+
2,2,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
130+
2,3,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
131+
1,0,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
132+
1,1,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
133+
1,2,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
134+
1,3,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
135+
0,0,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.749950,0.250000
136+
0,1,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.749950,0.250000
137+
0,2,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.749950,0.250000
138+
0,3,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.749950,0.250000
139+
"""
140+
141+
142+
def convert_to_pd_df(matrix_text):
143+
"""
144+
Convert a matrix in text to a Pandas dataframe.
145+
"""
146+
df = pd.read_csv(StringIO(matrix_text))
147+
# Check that switch and non-switch probabilities sum to 1
148+
assert all(df.probRec + df.probNoRec == 1)
149+
# Check that non-mismatch and mismatch probabilities sum to 1
150+
assert all(df.noErrProb + df.errProb == 1)
151+
return df
152+
153+
154+
def get_forward_backward_matrices():
155+
fwd_matrix_1 = convert_to_pd_df(_fwd_matrix_text_1)
156+
bwd_matrix_1 = convert_to_pd_df(_bwd_matrix_text_1)
157+
fwd_matrix_2 = convert_to_pd_df(_fwd_matrix_text_2)
158+
bwd_matrix_2 = convert_to_pd_df(_bwd_matrix_text_2)
159+
return [fwd_matrix_1, bwd_matrix_1, fwd_matrix_2, bwd_matrix_2]

0 commit comments

Comments
 (0)