@@ -527,15 +527,12 @@ merge_by_byte(const bam1_t *a, const bam1_t *b, bam1_t *c) {
527527
528528static inline void
529529flip_conversion (bam1_t *aln) {
530- if (aln->core .flag & BAM_FREVERSE)
531- aln->core .flag = aln->core .flag & (~BAM_FREVERSE);
532- else
533- aln->core .flag = aln->core .flag | BAM_FREVERSE;
530+ aln->core .flag ^= BAM_FREVERSE; // ADS: flip the "reverse" bit
534531
535532 revcomp_seq_by_byte (aln);
536533
537534 // ADS: don't like *(cv + 1) below, but no HTSlib function for it?
538- uint8_t * cv = bam_aux_get (aln, " CV" );
535+ auto cv = bam_aux_get (aln, " CV" );
539536 if (!cv) throw dnmt_error (" bam_aux_get failed for CV" );
540537 *(cv + 1 ) = ' T' ;
541538}
@@ -812,31 +809,40 @@ standardize_format(const string &input_format, bam1_t *aln) {
812809 if (input_format == " abismal" || input_format == " walt" ) return ;
813810
814811 if (input_format == " bsmap" ) {
815- // A/T rich; get the ZS tag value
816- auto zs_tag = bam_aux_get (aln, " ZS" );
812+ // A/T rich: get the ZS tag value
813+ const auto zs_tag = bam_aux_get (aln, " ZS" );
817814 if (!zs_tag) throw dnmt_error (" bam_aux_get for ZS (invalid bsmap)" );
818815 // ADS: test for errors on the line below
819- const uint8_t cv = string (bam_aux2Z (zs_tag))[1 ] == ' -' ? ' A' : ' T' ;
816+ const auto zs_tag_value = string (bam_aux2Z (zs_tag));
817+ if (zs_tag_value.empty ()) throw dnmt_error (" empty ZS tag in bsmap format" );
818+ if (zs_tag_value[0 ] != ' -' && zs_tag_value[0 ] != ' +' )
819+ throw dnmt_error (" invalid ZS tag in bsmap format" );
820+ const uint8_t cv = zs_tag_value[1 ] == ' -' ? ' A' : ' T' ;
820821 // get the "mismatches" tag
821- auto nm_tag = bam_aux_get (aln, " NM" );
822- if (!nm_tag) throw dnmt_error (" bam_aux_get for NM (invalid bsmap) " );
822+ const auto nm_tag = bam_aux_get (aln, " NM" );
823+ if (!nm_tag) throw dnmt_error (" invalid NM tag in bsmap format " );
823824 const int64_t nm = bam_aux2i (nm_tag);
824825
825- aln->l_data = bam_get_aux (aln) - aln->data ; // del aux (no data resize)
826+ // ADS: this should delete the aux data by truncating the used
827+ // range within the bam1_t while avoiding resizing memory
828+ aln->l_data = bam_get_aux (aln) - aln->data ;
826829
827830 /* add the tags we want */
828831 // "udpate" for "int" because it determines the right size; even
829- // though we just deleted all tags, it will add it back here.
832+ // though we just deleted all tags, it will add it back here
830833 err_code = bam_aux_update_int (aln, " NM" , nm);
831- if (err_code < 0 ) throw dnmt_error (err_code, " bam_aux_update_int" );
834+ if (err_code < 0 )
835+ throw dnmt_error (err_code, " error setting NM in bsmap format" );
836+
832837 // "append" for "char" because there is no corresponding update
833838 err_code = bam_aux_append (aln, " CV" , ' A' , 1 , &cv);
834- if (err_code < 0 ) throw dnmt_error (err_code, " bam_aux_append" );
839+ if (err_code < 0 )
840+ throw dnmt_error (err_code, " error setting conversion in bsmap format" );
835841
836- if ( bam_is_rev (aln))
837- revcomp_seq_by_byte (aln); // reverse complement if needed
842+ // reverse complement if needed
843+ if ( bam_is_rev (aln)) revcomp_seq_by_byte (aln);
838844 }
839- if (input_format == " bismark" ) {
845+ else if (input_format == " bismark" ) {
840846 // ADS: Previously we modified the read names at the first
841847 // underscore. Even if the names are still that way, it should no
842848 // longer be needed since we compare names up to a learned suffix.
@@ -864,6 +870,9 @@ standardize_format(const string &input_format, bam1_t *aln) {
864870 if (bam_is_rev (aln))
865871 revcomp_seq_by_byte (aln); // reverse complement if needed
866872 }
873+ // ADS: the condition below should be checked much earlier, ideally
874+ // before the output file is created
875+ else throw runtime_error (" incorrect format specified: " + input_format);
867876
868877 // Be sure this doesn't depend on mapper! Removes the "qual" part of
869878 // the data in a bam1_t struct but does not change its uncompressed
0 commit comments