@@ -278,15 +278,32 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
278
278
assert np .all (np .isin (np .unique (ref_h ), [0 , 1 ]))
279
279
assert np .all (np .isin (np .unique (query_h ), [- 1 , 0 , 1 ]))
280
280
sm = np .zeros ((m , h ), dtype = np .float64 ) # HMM state probability matrix
281
- fwd_hap_probs = np .zeros ((m , 2 ), dtype = np .float64 )
282
- bwd_hap_probs = np .zeros ((m , 2 ), dtype = np .float64 )
281
+
282
+ def _get_ref_hap_seg (a , b ):
283
+ """
284
+ Assuming all biallelic sites, get the index of a reference haplotype segment (i)
285
+ defined by the alleles a and b at two adjacent genotyped markers.
286
+
287
+ #i, a, b
288
+ 0, 0, 0
289
+ 1, 1, 0
290
+ 2, 0, 1
291
+ 3, 1, 1
292
+
293
+ See https://github.com/tskit-dev/tskit/issues/2802#issuecomment-1698767169
294
+ """
295
+ ref_hap_idx = a * 1 + b * 2
296
+ return ref_hap_idx
297
+
298
+ fwd_hap_probs = np .zeros ((m , 4 ), dtype = np .float64 )
299
+ bwd_hap_probs = np .zeros ((m , 4 ), dtype = np .float64 )
283
300
for i in np .arange (m - 1 , 0 , - 1 ):
284
301
for j in np .arange (h ):
285
302
sm [i , j ] = fm [i , j ] * bm [i , j ]
286
- ref_allele_mP1 = ref_h [i + 1 , j ]
287
- ref_allele_m = ref_h [i , j ]
288
- fwd_hap_probs [i , ref_allele_mP1 ] += sm [i , j ]
289
- bwd_hap_probs [i , ref_allele_m ] += sm [i , j ]
303
+ ref_hap_seg_mP1 = _get_ref_hap_seg ( ref_h [i , j ], ref_h [ i + 1 , j ])
304
+ ref_hap_seg_m = _get_ref_hap_seg ( ref_h [i - 1 , j ], ref_h [ i , j ])
305
+ fwd_hap_probs [i , ref_hap_seg_mP1 ] += sm [i , j ]
306
+ bwd_hap_probs [i , ref_hap_seg_m ] += sm [i , j ]
290
307
sum_fwd_hap_probs = np .sum (fwd_hap_probs [i , :])
291
308
fwd_hap_probs [i , :] /= sum_fwd_hap_probs
292
309
bwd_hap_probs [i , :] /= sum_fwd_hap_probs # Sum of forward hap probs
0 commit comments