@@ -86,8 +86,8 @@ typedef struct
8686 int32_t * int32_arr ;
8787 int ntmp_arr1 , ntmp_arr2 , nint32_arr ;
8888 kstring_t * tmp_str ;
89- kstring_t * tmp_als , tmp_kstr ;
90- int ntmp_als ;
89+ kstring_t * tmp_als , * tmp_del , tmp_kstr ;
90+ int ntmp_als , ntmp_del ;
9191 rbuf_t rbuf ;
9292 int buf_win ; // maximum distance between two records to consider
9393 int aln_win ; // the realignment window size (maximum repeat size)
@@ -398,10 +398,32 @@ static int realign(args_t *args, bcf1_t *line)
398398
399399 // make a copy of each allele for trimming
400400 hts_expand0 (kstring_t ,line -> n_allele ,args -> ntmp_als ,args -> tmp_als );
401+ hts_expand0 (kstring_t ,line -> n_allele ,args -> ntmp_del ,args -> tmp_del );
401402 kstring_t * als = args -> tmp_als ;
403+ kstring_t * del = args -> tmp_del ;
402404 for (i = 0 ; i < line -> n_allele ; i ++ )
403405 {
404- if ( line -> d .allele [i ][0 ]== '<' ) return ERR_SYMBOLIC ; // symbolic allele
406+ del [i ].l = 0 ;
407+ if ( line -> d .allele [i ][0 ]== '<' )
408+ {
409+ // symbolic allele, only <DEL.*> will be realigned
410+ if ( strncmp ("<DEL" ,line -> d .allele [i ],4 ) ) return ERR_SYMBOLIC ;
411+ if ( nref < line -> rlen )
412+ {
413+ free (ref );
414+ reflen = line -> rlen ;
415+ ref = faidx_fetch_seq (args -> fai , (char * )args -> hdr -> id [BCF_DT_CTG ][line -> rid ].key , line -> pos , line -> pos + reflen - 1 , & nref );
416+ if ( !ref ) error ("faidx_fetch_seq failed at %s:%" PRId64 "\n" , args -> hdr -> id [BCF_DT_CTG ][line -> rid ].key , (int64_t ) line -> pos + 1 );
417+ seq_to_upper (ref ,0 );
418+ replace_iupac_codes (ref ,nref ); // any non-ACGT character in fasta ref is replaced with N
419+ als [0 ].l = 0 ;
420+ kputs (ref , & als [0 ]);
421+ als [i ].l = 0 ;
422+ kputsn (ref ,1 ,& als [i ]);
423+ kputs (line -> d .allele [i ],& del [i ]);
424+ continue ;
425+ }
426+ }
405427 if ( line -> d .allele [i ][0 ]== '*' ) return ERR_SPANNING_DELETION ; // spanning deletion
406428 if ( has_non_acgtn (line -> d .allele [i ],line -> shared .l ) )
407429 {
@@ -493,7 +515,8 @@ static int realign(args_t *args, bcf1_t *line)
493515 for (i = 0 ; i < line -> n_allele ; i ++ )
494516 {
495517 if (i > 0 ) kputc (',' ,& args -> tmp_kstr );
496- kputsn (als [i ].s ,als [i ].l ,& args -> tmp_kstr );
518+ if ( del [i ].l ) kputs (del [i ].s ,& args -> tmp_kstr );
519+ else kputsn (als [i ].s ,als [i ].l ,& args -> tmp_kstr );
497520 }
498521 args -> tmp_kstr .s [ args -> tmp_kstr .l ] = 0 ;
499522 bcf_update_alleles_str (args -> out_hdr ,line ,args -> tmp_kstr .s );
@@ -1939,7 +1962,10 @@ static void destroy_data(args_t *args)
19391962 free (args -> maps [i ].map );
19401963 for (i = 0 ; i < args -> ntmp_als ; i ++ )
19411964 free (args -> tmp_als [i ].s );
1965+ for (i = 0 ; i < args -> ntmp_del ; i ++ )
1966+ free (args -> tmp_del [i ].s );
19421967 free (args -> tmp_als );
1968+ free (args -> tmp_del );
19431969 free (args -> tmp_kstr .s );
19441970 if ( args -> tmp_str )
19451971 {
0 commit comments