@@ -419,6 +419,9 @@ def interpolate_allele_probabilities_equation1(
419
419
420
420
Assume all biallelic sites.
421
421
422
+ Note that this function takes the full reference haplotypes and query haplotype,
423
+ i.e., not subsetted to genotyped markers.
424
+
422
425
:param numpy.ndarray sm: HMM state probability matrix.
423
426
:param numpy.ndarray ref_h: Reference haplotypes.
424
427
:param numpy.ndarray query_h: One query haplotype.
@@ -433,22 +436,26 @@ def interpolate_allele_probabilities_equation1(
433
436
assert sm .shape == (m , h )
434
437
assert ref_h .shape == (m + x , h )
435
438
assert len (query_h ) == m + x
439
+ genotyped_cm = convert_to_genetic_map_position (genotyped_pos )
440
+ imputed_cm = convert_to_genetic_map_position (imputed_pos )
436
441
weights = get_weights (genotyped_pos , imputed_pos )
437
442
assert len (weights ) == x
438
443
p = np .zeros ((x , 2 ), dtype = np .float64 )
439
444
440
445
def _compute_allele_probabilities (a ):
441
- """Helper function to compute probability of allele a at imputed markers."""
442
- k = 0 # Keep track of imputed marker index
443
- # l = 0 # Keep track of genotyped marker index
444
- for i in np .arange (m + x ):
445
- if query_h [i ] != - 1 :
446
- continue
446
+ """Equation 1 in BB2016."""
447
+ for i in np .arange (x ):
448
+ if imputed_cm [i ] < genotyped_cm [0 ]:
449
+ _m = 0
450
+ elif imputed_cm [i ] > genotyped_cm [- 1 ]:
451
+ _m = - 2
452
+ else :
453
+ _m = np .min (np .where (genotyped_cm > imputed_cm [i ]))
447
454
for j in np .arange (h ):
448
- if ref_h [ i , j ] == a :
449
- p [k , a ] += weights [ i ] * sm [ i , j ]
450
- p [k , a ] += ( 1 - weights [i ]) * sm [i + 1 , j ]
451
- k += 1
455
+ k = 0 # Index wrt reference markers
456
+ if ref_h [k , j ] == a :
457
+ p [i , a ] += weights [i ] * sm [_m , j ]
458
+ p [ i , a ] += ( 1 - weights [ i ]) * sm [ _m + 1 , j ]
452
459
453
460
_compute_allele_probabilities (a = 0 )
454
461
_compute_allele_probabilities (a = 1 )
0 commit comments