@@ -317,7 +317,8 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
317317 int ref_len , int left , int right ,
318318 int sample , int type , int biggest_del ,
319319 int * left_shift , int * right_shift ,
320- int * band , int * tcon_len , int * cpos_pos ) {
320+ int * band , int * tcon_len , int * cpos_pos ,
321+ int pos_l , int pos_r ) {
321322 // Map ASCII ACGTN* to 012345
322323 static uint8_t base6 [256 ] = {
323324 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 , 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 , 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 , 4 ,4 ,4 ,4 ,4 ,4 ,4 ,4 ,
@@ -357,6 +358,8 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
357358 //--------------------------------------------------
358359 // Accumulate sequences into cons_base and cons_ins arrays
359360 int local_band_max = 0 ; // maximum absolute deviation from diagonal
361+ int total_span_str = 0 ;
362+ int type_depth = 0 ;
360363 for (i = 0 ; i < n_plp [s ]; i ++ ) {
361364 const bam_pileup1_t * p = plp [s ] + i ;
362365 bam1_t * b = p -> b ;
@@ -425,6 +428,7 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
425428 if (bcf_cgp_append_cons (& cons_ins [x - left ], ins ,
426429 ilen , 1 ) < 0 )
427430 goto err ;
431+ type_depth += (x == pos + 1 );
428432 } else if (x != pos + 1 ){
429433 if (bcf_cgp_append_cons (& ref_ins [x - left ], ins ,
430434 ilen , 1 ) < 0 )
@@ -446,9 +450,10 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
446450 if (x < left ) continue ;
447451 if (x >= right ) break ;
448452 if ((p -> indel == type && !p -> is_del ) || // starts here
449- (p -> indel == 0 && p -> is_del && len == - type )) // left
453+ (p -> indel == 0 && p -> is_del && len == - type )) { // left
450454 cons_base [x - left ][5 ]++ ;
451- else if (x + len <= pos + 1 || (skip_to && x > skip_to ))
455+ type_depth += (x == pos + 1 );
456+ } else if (x + len <= pos + 1 || (skip_to && x > skip_to ))
452457 ref_base [x - left ][5 ]++ ;
453458 else if (x <= pos && x + len > pos + 1 ) {
454459 // we have a deletion which overlaps pos, but
@@ -466,6 +471,9 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
466471 }
467472 }
468473
474+ if (b -> core .pos <= pos_l && x >= pos_r )
475+ total_span_str ++ ;
476+
469477 // Also track the biggest deviation +/- from diagonal. We use
470478 // this band observation in our BAQ alignment step.
471479 if (* band < local_band_max )
@@ -622,10 +630,11 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
622630 }
623631 }
624632
625- #define CONS_CUTOFF .40 // 40% needed for base vs N
626- #define CONS_CUTOFF2 .80 // 80% needed for gap in cons[1]
627- #define CONS_CUTOFF_INC .40 // 40% to include any insertion cons[0]
628- #define CONS_CUTOFF_INC2 .80 // 80% to include any insertion cons[1] HOM
633+ #define CONS_CUTOFF .40 // % needed for base vs N
634+ #define CONS_CUTOFF_DEL .35 // % to include any het del
635+ #define CONS_CUTOFF2 .80 // % needed for gap in cons[1]
636+ #define CONS_CUTOFF_INC .35 // % to include any insertion cons[0]
637+ #define CONS_CUTOFF_INC2 .80 // % to include any insertion cons[1] HOM
629638#define CONS_CUTOFF_INS .60 // and then 60% needed for it to be bases vs N
630639
631640 //--------------------------------------------------
@@ -723,14 +732,18 @@ static char **bcf_cgp_consensus(int n, int *n_plp, bam_pileup1_t **plp,
723732 if (!always_del && cons_base [i ][5 ] >= bca -> min_support ) {
724733 // Candidate HET del.
725734 if (cnum == 0 ) {
726- het_del = cons_base [i ][5 ] >= CONS_CUTOFF * tot ;
735+ int tot2 = tot ;
736+ if (i > pos - left && i <= pos - left - biggest_del )
737+ tot2 = total_span_str - type_depth ;
738+ het_del = cons_base [i ][5 ] >= CONS_CUTOFF_DEL * tot2 ;
739+
727740 if (i < 1024 ) {
728741 if (i > pos - left && i <= pos - left - biggest_del )
729742 hetd [i ] = 0 ;
730743 else
731744 hetd [i ] = het_del
732745 ? 1
733- : (cons_base [i ][5 ] >= .3 * tot ? -1 : 0 );
746+ : (cons_base [i ][5 ] >= .3 * tot2 ? -1 : 0 );
734747 }
735748 } else {
736749 // HET del uncalled on cnum 0
@@ -1391,6 +1404,26 @@ int bcf_edlib_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos,
13911404 }
13921405 int band = biggest_ins - biggest_del ; // NB del is -ve
13931406
1407+ // Find left & right extents of STR covering pos, from ref
1408+ int pos_l = pos , pos_r = pos ;
1409+ {
1410+ rep_ele * reps , * elt , * tmp ;
1411+ int pstart = MAX (0 , pos - 30 );
1412+ int pmid = pos - pstart ;
1413+ int pend = MIN (ref_len , pos + 30 );
1414+ reps = find_STR ((char * )& ref [pstart ], pend - pstart , 0 );
1415+ DL_FOREACH_SAFE (reps , elt , tmp ) {
1416+ if (elt -> end >= pmid && elt -> start <= pmid ) {
1417+ if (pos_l > pstart + elt -> start )
1418+ pos_l = pstart + elt -> start ;
1419+ if (pos_r < pstart + elt -> end )
1420+ pos_r = pstart + elt -> end ;
1421+ }
1422+ DL_DELETE (reps , elt );
1423+ free (elt );
1424+ }
1425+ }
1426+
13941427 int str_len1 = l_run , str_len2 = l_run /4 ;
13951428 for (t = 0 ; t < n_types ; ++ t ) {
13961429 int l , ir ;
@@ -1420,7 +1453,7 @@ int bcf_edlib_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos,
14201453 tcons = bcf_cgp_consensus (n , n_plp , plp , pos , bca , ref , ref_len ,
14211454 left , right , s , types [t ], biggest_del ,
14221455 & left_shift , & right_shift , & band ,
1423- tcon_len , & cpos_pos );
1456+ tcon_len , & cpos_pos , pos_l , pos_r );
14241457 // TODO: Consensus for a deletion shouldn't match the
14251458 // consensus for type 0. Eg consider
14261459 // vv vv
0 commit comments