@@ -357,6 +357,7 @@ void wfa_trim_aln_str(int full_cover, aln_str_t *aln_str, int tlen, int qlen) {
357357 }
358358 if (target_end != -1 && query_end != -1 ) break ;
359359 }
360+ if (query_end == -1 ) query_end = target_end ;
360361 assert (query_end <= target_end );
361362 aln_str -> aln_len = target_end + 1 ;
362363 aln_str -> target_beg = 0 ; aln_str -> target_end = target_end ;
@@ -379,6 +380,7 @@ void wfa_trim_aln_str(int full_cover, aln_str_t *aln_str, int tlen, int qlen) {
379380 }
380381 if (target_start != -1 && query_start != -1 ) break ;
381382 }
383+ if (query_start == -1 ) query_start = target_start ; // no = operation
382384 assert (query_start >= target_start );
383385 aln_str -> aln_len = aln_str -> aln_len - target_start ;
384386 if (target_start != 0 ) {
@@ -630,73 +632,6 @@ int abpoa_partial_aln_msa_cons(const call_var_opt_t *opt, abpoa_t *ab, int wb, i
630632 return n_cons ;
631633 }
632634
633- // reads are sorted by full-cover and length before calling abPOA
634- // use reads with full_cover == 3 as backbone for poa
635- // skip reads with full_cover == 0
636- int abpoa_partial_aln (const call_var_opt_t * opt , int n_reads , uint8_t * * read_seqs , int * read_lens , int * read_full_cover , char * * names , int max_n_cons , int * cons_lens , uint8_t * * cons_seqs ) {
637- abpoa_t * ab = abpoa_init ();
638- abpoa_para_t * abpt = abpoa_init_para ();
639- // abpt->wb = -1;
640- abpt -> out_msa = 0 ;
641- // abp->cons_algrm = ABPOA_MF;
642- abpt -> inc_path_score = 1 ;
643- abpt -> max_n_cons = max_n_cons ;
644- // msa
645- abpt -> out_cons = 1 ;
646- abpt -> match = opt -> match ; abpt -> mismatch = opt -> mismatch ;
647- abpt -> gap_open1 = opt -> gap_open1 ; abpt -> gap_ext1 = opt -> gap_ext1 ;
648- abpt -> gap_open2 = opt -> gap_open2 ; abpt -> gap_ext2 = opt -> gap_ext2 ;
649- if (LONGCALLD_VERBOSE >= 2 ) abpt -> out_msa = 1 ;
650- abpoa_post_set_para (abpt );
651- ab -> abs -> n_seq = n_reads ;
652- for (int i = 0 ; i < n_reads ; ++ i ) {
653- if (LONGCALLD_VERBOSE >= 2 ) {
654- fprintf (stderr , ">%s %d %d\n" , names [i ], read_lens [i ], read_full_cover [i ]);
655- for (int j = 0 ; j < read_lens [i ]; ++ j ) {
656- fprintf (stderr , "%c" , "ACGTN" [read_seqs [i ][j ]]);
657- } fprintf (stderr , "\n" );
658- }
659- abpoa_res_t res ;
660- res .graph_cigar = 0 , res .n_cigar = 0 ;
661- int exc_beg = 0 , exc_end = 1 , seq_beg_cut = 0 , seq_end_cut = 0 ;
662- if (i != 0 ) {
663- int ref_beg , ref_end , read_beg , read_end , beg_id , end_id ;
664- if (collect_partial_aln_beg_end (opt -> gap_aln , opt -> match , opt -> mismatch , opt -> gap_open1 , opt -> gap_ext1 , opt -> gap_open2 , opt -> gap_ext2 ,
665- read_seqs [0 ], read_lens [0 ], read_full_cover [0 ], read_seqs [i ], read_lens [i ], read_full_cover [i ], & ref_beg , & ref_end , & read_beg , & read_end ) == 0 ) {
666- if (LONGCALLD_VERBOSE >= 2 ) fprintf (stderr , "Skipped in POA: %s\n" , names [i ]);
667- continue ;
668- }
669- beg_id = ref_beg + 1 , end_id = ref_end + 1 ;
670- seq_beg_cut = read_beg - 1 , seq_end_cut = read_lens [i ] - read_end ;
671- abpoa_subgraph_nodes (ab , abpt , beg_id , end_id , & exc_beg , & exc_end );
672- }
673- if (LONGCALLD_VERBOSE >= 2 ) fprintf (stderr , "ExcBeg: %d, ExcEnd: %d, SeqBegCut: %d, SeqEndCut: %d\n" , exc_beg , exc_end , seq_beg_cut , seq_end_cut );
674- abpoa_align_sequence_to_subgraph (ab , abpt , exc_beg , exc_end , read_seqs [i ]+ seq_beg_cut , read_lens [i ]- seq_beg_cut - seq_end_cut , & res );
675- abpoa_add_subgraph_alignment (ab , abpt , exc_beg , exc_end , read_seqs [i ]+ seq_beg_cut , NULL , read_lens [i ]- seq_beg_cut - seq_end_cut , NULL , res , i , n_reads , 0 );
676- if (res .n_cigar ) free (res .graph_cigar );
677- }
678- if (LONGCALLD_VERBOSE >= 2 ) abpoa_output (ab , abpt , stderr );
679- else abpoa_output (ab , abpt , NULL );
680- abpoa_cons_t * abc = ab -> abc ;
681-
682- int n_cons = 0 ;
683- if (abc -> n_cons > 0 ) {
684- for (int i = 0 ; i < abc -> n_cons ; ++ i ) {
685- cons_lens [i ] = abc -> cons_len [i ];
686- cons_seqs [i ] = (uint8_t * )malloc (abc -> cons_len [i ] * sizeof (uint8_t ));
687- for (int j = 0 ; j < abc -> cons_len [i ]; ++ j ) {
688- cons_seqs [i ][j ] = abc -> cons_base [i ][j ];
689- }
690- if (LONGCALLD_VERBOSE >= 2 ) fprintf (stderr , "ConsLen: %d\n" , abc -> cons_len [i ]);
691- }
692- n_cons = abc -> n_cons ;
693- } else {
694- fprintf (stderr , "Unable to call consensus: %s\n" , names [0 ]);
695- }
696- abpoa_free (ab ); abpoa_free_para (abpt );
697- return n_cons ;
698- }
699-
700635 // XXX limit abpoa memory usage, avoid memory allocation failure
701636 int abpoa_aln_msa_cons (const call_var_opt_t * opt , int wb , int n_reads , int * read_ids , uint8_t * * read_seqs , int * read_lens , int max_n_cons ,
702637 int * cons_lens , uint8_t * * cons_seqs ,
@@ -1254,6 +1189,7 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
12541189 int reg_read_beg = 0 , reg_read_end = bam_cigar2qlen (chunk -> reads [read_i ]-> core .n_cigar , bam_get_cigar (chunk -> reads [read_i ]))- 1 ;
12551190 (* read_names )[i ] = bam_get_qname (chunk -> reads [read_i ]);
12561191 (* strands )[i ] = bam_is_rev (chunk -> reads [read_i ]);
1192+ int beg_is_del = 0 , end_is_del = 0 ;
12571193 for (int i = 0 ; i < n_digar ; ++ i ) {
12581194 hts_pos_t digar_beg = digars [i ].pos , digar_end ;
12591195 int op = digars [i ].type , len = digars [i ].len , qi = digars [i ].qi ;
@@ -1266,6 +1202,7 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
12661202 if (op == BAM_CDEL ) {
12671203 reg_digar_beg = reg_beg ;
12681204 reg_read_beg = qi ;
1205+ beg_is_del = 1 ;
12691206 } else {
12701207 reg_digar_beg = reg_beg ;
12711208 reg_read_beg = qi + (reg_beg - digar_beg );
@@ -1275,16 +1212,25 @@ int collect_noisy_read_info(bam_chunk_t *chunk, hts_pos_t reg_beg, hts_pos_t reg
12751212 if (op == BAM_CDEL ) {
12761213 reg_digar_end = reg_end ;
12771214 reg_read_end = qi - 1 ;
1215+ end_is_del = 1 ;
12781216 } else {
12791217 reg_digar_end = reg_end ;
12801218 reg_read_end = qi + (reg_end - digar_beg );
12811219 }
12821220 }
12831221 }
1284- if (reg_digar_beg == reg_beg && reg_digar_end == reg_end ) (* fully_covers )[i ] = 3 ;
1285- else if (reg_digar_beg == reg_beg ) (* fully_covers )[i ] = 1 ;
1286- else if (reg_digar_end == reg_end ) (* fully_covers )[i ] = 2 ;
1287- else (* fully_covers )[i ] = 0 ;
1222+ if (reg_digar_beg == reg_beg && reg_digar_end == reg_end ) {
1223+ if (beg_is_del == 0 && end_is_del == 0 ) (* fully_covers )[i ] = 3 ;
1224+ else if (beg_is_del == 0 && end_is_del == 1 ) (* fully_covers )[i ] = 1 ;
1225+ else if (beg_is_del == 1 && end_is_del == 0 ) (* fully_covers )[i ] = 2 ;
1226+ else (* fully_covers )[i ] = 0 ;
1227+ } else if (reg_digar_beg == reg_beg ) {
1228+ if (beg_is_del ) (* fully_covers )[i ] = 0 ;
1229+ else (* fully_covers )[i ] = 1 ;
1230+ } else if (reg_digar_end == reg_end ) {
1231+ if (end_is_del ) (* fully_covers )[i ] = 0 ;
1232+ else (* fully_covers )[i ] = 2 ;
1233+ } else (* fully_covers )[i ] = 0 ;
12881234 // if (2*(reg_read_end-reg_read_beg+1) < (reg_end-reg_beg+1)) return 0;
12891235 (* read_seqs )[i ] = (uint8_t * )malloc ((reg_read_end - reg_read_beg + 1 ) * sizeof (uint8_t ));
12901236 (* read_quals )[i ] = (uint8_t * )malloc ((reg_read_end - reg_read_beg + 1 ) * sizeof (uint8_t ));
0 commit comments