Skip to content

Commit 4a091ba

Browse files
committed
Implement reference haplotype segment indexing
1 parent 0395bf1 commit 4a091ba

File tree

1 file changed

+23
-6
lines changed

1 file changed

+23
-6
lines changed

python/tests/test_beagle.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,32 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
278278
assert np.all(np.isin(np.unique(ref_h), [0, 1]))
279279
assert np.all(np.isin(np.unique(query_h), [-1, 0, 1]))
280280
sm = np.zeros((m, h), dtype=np.float64) # HMM state probability matrix
281-
fwd_hap_probs = np.zeros((m, 2), dtype=np.float64)
282-
bwd_hap_probs = np.zeros((m, 2), dtype=np.float64)
281+
282+
def _get_ref_hap_seg(a, b):
283+
"""
284+
Assuming all biallelic sites, get the index of a reference haplotype segment (i)
285+
defined by the alleles a and b at two adjacent genotyped markers.
286+
287+
#i, a, b
288+
0, 0, 0
289+
1, 1, 0
290+
2, 0, 1
291+
3, 1, 1
292+
293+
See https://github.com/tskit-dev/tskit/issues/2802#issuecomment-1698767169
294+
"""
295+
ref_hap_idx = a * 1 + b * 2
296+
return ref_hap_idx
297+
298+
fwd_hap_probs = np.zeros((m, 4), dtype=np.float64)
299+
bwd_hap_probs = np.zeros((m, 4), dtype=np.float64)
283300
for i in np.arange(m - 1, 0, -1):
284301
for j in np.arange(h):
285302
sm[i, j] = fm[i, j] * bm[i, j]
286-
ref_allele_mP1 = ref_h[i + 1, j]
287-
ref_allele_m = ref_h[i, j]
288-
fwd_hap_probs[i, ref_allele_mP1] += sm[i, j]
289-
bwd_hap_probs[i, ref_allele_m] += sm[i, j]
303+
ref_hap_seg_mP1 = _get_ref_hap_seg(ref_h[i, j], ref_h[i + 1, j])
304+
ref_hap_seg_m = _get_ref_hap_seg(ref_h[i - 1, j], ref_h[i, j])
305+
fwd_hap_probs[i, ref_hap_seg_mP1] += sm[i, j]
306+
bwd_hap_probs[i, ref_hap_seg_m] += sm[i, j]
290307
sum_fwd_hap_probs = np.sum(fwd_hap_probs[i, :])
291308
fwd_hap_probs[i, :] /= sum_fwd_hap_probs
292309
bwd_hap_probs[i, :] /= sum_fwd_hap_probs # Sum of forward hap probs

0 commit comments

Comments
 (0)