@@ -1322,7 +1322,7 @@ int collect_digar_from_ref_seq(bam_chunk_t *chunk, int read_i, const struct call
13221322
13231323int bam_chunk_init0 (bam_chunk_t * chunk , int n_reads , int n_bam ) {
13241324 // input
1325- chunk -> n_reads = 0 ; chunk -> m_reads = n_reads ;
1325+ chunk -> n_reads = 0 ; chunk -> m_reads = n_reads ; chunk -> ordered_read_ids = ( int * ) malloc ( n_reads * sizeof ( int ));
13261326 chunk -> n_bam = n_bam ;
13271327 chunk -> n_up_ovlp_reads = (int * )calloc (n_bam , sizeof (int ));
13281328 chunk -> up_ovlp_read_i = (int * * )malloc (n_bam * sizeof (int * ));
@@ -1364,6 +1364,7 @@ int bam_chunk_init0(bam_chunk_t *chunk, int n_reads, int n_bam) {
13641364int bam_chunk_realloc (bam_chunk_t * chunk ) {
13651365 int m_reads = chunk -> m_reads * 2 ;
13661366 chunk -> reads = (bam1_t * * )realloc (chunk -> reads , m_reads * sizeof (bam1_t * ));
1367+ chunk -> ordered_read_ids = (int * )realloc (chunk -> ordered_read_ids , m_reads * sizeof (int ));
13671368 for (int i = 0 ; i < chunk -> n_bam ; ++ i ) {
13681369 chunk -> up_ovlp_read_i [i ] = (int * )realloc (chunk -> up_ovlp_read_i [i ], m_reads * sizeof (int ));
13691370 chunk -> down_ovlp_read_i [i ] = (int * )realloc (chunk -> down_ovlp_read_i [i ], m_reads * sizeof (int ));
@@ -1408,6 +1409,7 @@ void bam_chunk_free(bam_chunk_t *chunk) {
14081409 cr_destroy (chunk -> digars [i ].noisy_regs );
14091410 }
14101411 }
1412+ free (chunk -> ordered_read_ids );
14111413 if (LONGCALLD_VERBOSE >= 2 ) {
14121414 for (int i = 0 ; i < chunk -> m_reads ; i ++ ) {
14131415 bam_destroy1 (chunk -> reads [i ]);
@@ -1470,6 +1472,7 @@ void bam_chunk_post_free(bam_chunk_t *chunk) {
14701472 }
14711473 free (chunk -> reads );
14721474 }
1475+ free (chunk -> ordered_read_ids );
14731476}
14741477
14751478// free variables that will not be used in stitch & make variants
@@ -1560,6 +1563,48 @@ static int is_ovlp_with_next_region(const struct call_var_pl_t *pl, bam_chunk_t
15601563 }
15611564}
15621565
1566+ typedef struct {
1567+ int read_i ;
1568+ hts_pos_t pos , end ;
1569+ int NM ;
1570+ char * qname ;
1571+ } bam_read_sort_t ;
1572+
1573+ int comp_bam_read_sort (const void * a , const void * b ) {
1574+ const bam_read_sort_t * ra = (const bam_read_sort_t * )a ;
1575+ const bam_read_sort_t * rb = (const bam_read_sort_t * )b ;
1576+ if (ra -> pos != rb -> pos ) return (ra -> pos < rb -> pos ) ? -1 : 1 ;
1577+ if (ra -> end != rb -> end ) return (ra -> end < rb -> end ) ? 1 : -1 ;
1578+ if (ra -> NM != rb -> NM ) return (ra -> NM < rb -> NM ) ? -1 : 1 ;
1579+ return strcmp (ra -> qname , rb -> qname );
1580+ }
1581+
1582+ int bam_get_NM (bam1_t * read ) {
1583+ uint8_t * nm_ptr = bam_aux_get (read , "NM" );
1584+ if (nm_ptr != NULL ) {
1585+ return bam_aux2i (nm_ptr );
1586+ } else {
1587+ return 0 ;
1588+ }
1589+ }
1590+
1591+ void sort_chunk_reads (bam_chunk_t * chunk ) {
1592+ bam_read_sort_t * read_sorts = (bam_read_sort_t * )malloc (chunk -> n_reads * sizeof (bam_read_sort_t ));
1593+ for (int i = 0 ; i < chunk -> n_reads ; ++ i ) {
1594+ read_sorts [i ].read_i = i ;
1595+ read_sorts [i ].pos = chunk -> reads [i ]-> core .pos ;
1596+ read_sorts [i ].end = bam_endpos (chunk -> reads [i ]);
1597+ read_sorts [i ].NM = bam_get_NM (chunk -> reads [i ]);
1598+ read_sorts [i ].qname = bam_get_qname (chunk -> reads [i ]);
1599+ }
1600+ // sort by pos, end, NM, qname
1601+ qsort (read_sorts , chunk -> n_reads , sizeof (bam_read_sort_t ), comp_bam_read_sort );
1602+ for (int i = 0 ; i < chunk -> n_reads ; ++ i ) {
1603+ chunk -> ordered_read_ids [i ] = read_sorts [i ].read_i ;
1604+ }
1605+ free (read_sorts );
1606+ }
1607+
15631608// load ref_seq/read in reg_chunks[reg_chunk_i]->tid/beg/end[reg_i] to chunks
15641609int collect_ref_seq_bam_main (const struct call_var_pl_t * pl , struct call_var_io_aux_t * io_aux , int reg_chunk_i , int reg_i , bam_chunk_t * chunk ) {
15651610 assert (reg_chunk_i < pl -> n_reg_chunks ); assert (reg_i < pl -> reg_chunks [reg_chunk_i ].n_regions );
@@ -1608,6 +1653,7 @@ int collect_ref_seq_bam_main(const struct call_var_pl_t *pl, struct call_var_io_
16081653 if (LONGCALLD_VERBOSE >= 2 ) {
16091654 fprintf (stderr , "CHUNK: tname: %s, tid: %d, beg: %" PRId64 ", end: %" PRId64 ", n_reads: %d\n" , chunk -> tname , chunk -> tid , chunk -> reg_beg , chunk -> reg_end , chunk -> n_reads );
16101655 }
1656+ sort_chunk_reads (chunk );
16111657 return chunk -> n_reads ;
16121658}
16131659
@@ -1928,7 +1974,10 @@ int write_read_to_bam(bam_chunk_t *chunk, const struct call_var_opt_t *opt, cons
19281974 int read_i = 0 , start_to_output = 0 ;
19291975 while (sam_itr_next (in_bam , iter , read ) >= 0 ) {
19301976 if (read -> core .flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FSUPPLEMENTARY ) || read -> core .qual < min_mapq ) {
1931- if (start_to_output == 1 ) write_unprocessed_read_to_bam (chunk , header , opt -> out_aln_fp , read );
1977+ if (start_to_output == 1 ) {
1978+ // write_unprocessed_read_to_bam(chunk, header, opt->out_aln_fp, read);
1979+ n_out_reads ++ ;
1980+ }
19321981 continue ;
19331982 }
19341983 // check if read is overlapping with previous region
0 commit comments