1
1
from io import StringIO
2
2
3
+ import numpy as np
3
4
import pandas as pd
4
5
5
6
import tskit
@@ -93,8 +94,27 @@ def get_toy_ts():
93
94
"""
94
95
BEAGLE 4.1 was run on the toy data set above using default parameters.
95
96
The following are the forward probability matrices and backward probability
96
- matrices calculated when imputing into the third individual above.
97
- Note that there are two sets of matrices, one for each haplotype.
97
+ matrices calculated when imputing into the third individual above. There are
98
+ two sets of matrices, one for each haplotype.
99
+
100
+ Notes about calculations:
101
+ n = number of haplotypes in ref. panel
102
+ M = number of markers
103
+ m = index of marker (site)
104
+ h = index of haplotype in ref. panel
105
+
106
+ In forward probability matrix,
107
+ fwd[m][h] = emission prob., if m = 0 (first marker)
108
+ fwd[m][h] = emission prob. * (scale * fwd[m - 1][h] + shift), otherwise
109
+ where scale = (1 - switch prob.)/sum of fwd[m - 1],
110
+ and shift = switch prob./n.
111
+
112
+ In backward probability matrix,
113
+ bwd[m][h] = 1, if m = M - 1 (last marker) // DON'T SEE THIS IN BEAGLE
114
+ unadj. bwd[m][h] = emission prob. / n
115
+ bwd[m][h] = (unadj. bwd[m][h] + shift) * scale, otherwise
116
+ where scale = (1 - switch prob.)/sum of unadj. bwd[m],
117
+ and shift = switch prob./n.
98
118
"""
99
119
_fwd_matrix_text_1 = """
100
120
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
@@ -118,6 +138,10 @@ def get_toy_ts():
118
138
119
139
_bwd_matrix_text_1 = """
120
140
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
141
+ 3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
142
+ 3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
143
+ 3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
144
+ 3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
121
145
2,0,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
122
146
2,1,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
123
147
2,2,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
@@ -154,6 +178,10 @@ def get_toy_ts():
154
178
155
179
_bwd_matrix_text_2 = """
156
180
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
181
+ 3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
182
+ 3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
183
+ 3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
184
+ 3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
157
185
2,0,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
158
186
2,1,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
159
187
2,2,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
@@ -174,10 +202,11 @@ def convert_to_pd_df(matrix_text):
174
202
Converts a matrix in text to a Pandas dataframe and returns it.
175
203
"""
176
204
df = pd .read_csv (StringIO (matrix_text ))
177
- # Check that switch and non-switch probabilities sum to 1
178
- assert all (df .probRec + df .probNoRec == 1 )
179
- # Check that non-mismatch and mismatch probabilities sum to 1
180
- assert all (df .noErrProb + df .errProb == 1 )
205
+ for i in np .arange (df .shape [0 ]):
206
+ # Check that switch and non-switch probabilities sum to 1
207
+ assert df .probRec [i ] + df .probNoRec [i ] == 1 or np .isnan (df .probRec [i ])
208
+ # Check that non-mismatch and mismatch probabilities sum to 1
209
+ assert df .noErrProb [i ] + df .errProb [i ] == 1 or np .isnan (df .noErrProb [i ])
181
210
matrix = df .val .to_numpy ().reshape (
182
211
(
183
212
4 ,
0 commit comments