@@ -419,6 +419,9 @@ def interpolate_allele_probabilities_equation1(
419419
420420 Assume all biallelic sites.
421421
422+ Note that this function takes the full reference haplotypes and query haplotype,
423+ i.e., not subsetted to genotyped markers.
424+
422425 :param numpy.ndarray sm: HMM state probability matrix.
423426 :param numpy.ndarray ref_h: Reference haplotypes.
424427 :param numpy.ndarray query_h: One query haplotype.
@@ -433,22 +436,26 @@ def interpolate_allele_probabilities_equation1(
433436 assert sm .shape == (m , h )
434437 assert ref_h .shape == (m + x , h )
435438 assert len (query_h ) == m + x
439+ genotyped_cm = convert_to_genetic_map_position (genotyped_pos )
440+ imputed_cm = convert_to_genetic_map_position (imputed_pos )
436441 weights = get_weights (genotyped_pos , imputed_pos )
437442 assert len (weights ) == x
438443 p = np .zeros ((x , 2 ), dtype = np .float64 )
439444
440445 def _compute_allele_probabilities (a ):
441- """Helper function to compute probability of allele a at imputed markers."""
442- k = 0 # Keep track of imputed marker index
443- # l = 0 # Keep track of genotyped marker index
444- for i in np .arange (m + x ):
445- if query_h [i ] != - 1 :
446- continue
446+ """Equation 1 in BB2016."""
447+ for i in np .arange (x ):
448+ if imputed_cm [i ] < genotyped_cm [0 ]:
449+ _m = 0
450+ elif imputed_cm [i ] > genotyped_cm [- 1 ]:
451+ _m = - 2
452+ else :
453+ _m = np .min (np .where (genotyped_cm > imputed_cm [i ]))
447454 for j in np .arange (h ):
448- if ref_h [ i , j ] == a :
449- p [k , a ] += weights [ i ] * sm [ i , j ]
450- p [k , a ] += ( 1 - weights [i ]) * sm [i + 1 , j ]
451- k += 1
455+ k = 0 # Index wrt reference markers
456+ if ref_h [k , j ] == a :
457+ p [i , a ] += weights [i ] * sm [_m , j ]
458+ p [ i , a ] += ( 1 - weights [ i ]) * sm [ _m + 1 , j ]
452459
453460 _compute_allele_probabilities (a = 0 )
454461 _compute_allele_probabilities (a = 1 )
0 commit comments