Skip to content

Commit 7564875

Browse files
committed
Add rightmost column in backward matrix, update conversion function, and add calculation notes
1 parent 5880435 commit 7564875

File tree

1 file changed

+35
-6
lines changed

1 file changed

+35
-6
lines changed

python/tests/test_imputation.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from io import StringIO
22

3+
import numpy as np
34
import pandas as pd
45

56
import tskit
@@ -93,8 +94,27 @@ def get_toy_ts():
9394
"""
9495
BEAGLE 4.1 was run on the toy data set above using default parameters.
9596
The following are the forward probability matrices and backward probability
96-
matrices calculated when imputing into the third individual above.
97-
Note that there are two sets of matrices, one for each haplotype.
97+
matrices calculated when imputing into the third individual above. There are
98+
two sets of matrices, one for each haplotype.
99+
100+
Notes about calculations:
101+
n = number of haplotypes in ref. panel
102+
M = number of markers
103+
m = index of marker (site)
104+
h = index of haplotype in ref. panel
105+
106+
In forward probability matrix,
107+
fwd[m][h] = emission prob., if m = 0 (first marker)
108+
fwd[m][h] = emission prob. * (scale * fwd[m - 1][h] + shift), otherwise
109+
where scale = (1 - switch prob.)/sum of fwd[m - 1],
110+
and shift = switch prob./n.
111+
112+
In backward probability matrix,
113+
bwd[m][h] = 1, if m = M - 1 (last marker) // DON'T SEE THIS IN BEAGLE
114+
unadj. bwd[m][h] = emission prob. / n
115+
bwd[m][h] = (unadj. bwd[m][h] + shift) * scale, otherwise
116+
where scale = (1 - switch prob.)/sum of unadj. bwd[m],
117+
and shift = switch prob./n.
98118
"""
99119
_fwd_matrix_text_1 = """
100120
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
@@ -118,6 +138,10 @@ def get_toy_ts():
118138

119139
_bwd_matrix_text_1 = """
120140
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
141+
3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
142+
3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
143+
3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
144+
3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
121145
2,0,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
122146
2,1,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
123147
2,2,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
@@ -154,6 +178,10 @@ def get_toy_ts():
154178

155179
_bwd_matrix_text_2 = """
156180
m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
181+
3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
182+
3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
183+
3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
184+
3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
157185
2,0,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
158186
2,1,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
159187
2,2,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
@@ -174,10 +202,11 @@ def convert_to_pd_df(matrix_text):
174202
Converts a matrix in text to a Pandas dataframe and returns it.
175203
"""
176204
df = pd.read_csv(StringIO(matrix_text))
177-
# Check that switch and non-switch probabilities sum to 1
178-
assert all(df.probRec + df.probNoRec == 1)
179-
# Check that non-mismatch and mismatch probabilities sum to 1
180-
assert all(df.noErrProb + df.errProb == 1)
205+
for i in np.arange(df.shape[0]):
206+
# Check that switch and non-switch probabilities sum to 1
207+
assert df.probRec[i] + df.probNoRec[i] == 1 or np.isnan(df.probRec[i])
208+
# Check that non-mismatch and mismatch probabilities sum to 1
209+
assert df.noErrProb[i] + df.errProb[i] == 1 or np.isnan(df.noErrProb[i])
181210
matrix = df.val.to_numpy().reshape(
182211
(
183212
4,

0 commit comments

Comments
 (0)