Skip to content

Commit 425e8ea

Browse files
bam_record_utils.cpp:standardize_format: Updates for bsmap
1 parent e202b6f commit 425e8ea

File tree

1 file changed

+26
-17
lines changed

1 file changed

+26
-17
lines changed

src/common/bam_record_utils.cpp

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -527,15 +527,12 @@ merge_by_byte(const bam1_t *a, const bam1_t *b, bam1_t *c) {
527527

528528
static inline void
529529
flip_conversion(bam1_t *aln) {
530-
if (aln->core.flag & BAM_FREVERSE)
531-
aln->core.flag = aln->core.flag & (~BAM_FREVERSE);
532-
else
533-
aln->core.flag = aln->core.flag | BAM_FREVERSE;
530+
aln->core.flag ^= BAM_FREVERSE; // ADS: flip the "reverse" bit
534531

535532
revcomp_seq_by_byte(aln);
536533

537534
// ADS: don't like *(cv + 1) below, but no HTSlib function for it?
538-
uint8_t *cv = bam_aux_get(aln, "CV");
535+
auto cv = bam_aux_get(aln, "CV");
539536
if (!cv) throw dnmt_error("bam_aux_get failed for CV");
540537
*(cv + 1) = 'T';
541538
}
@@ -812,31 +809,40 @@ standardize_format(const string &input_format, bam1_t *aln) {
812809
if (input_format == "abismal" || input_format == "walt") return;
813810

814811
if (input_format == "bsmap") {
815-
// A/T rich; get the ZS tag value
816-
auto zs_tag = bam_aux_get(aln, "ZS");
812+
// A/T rich: get the ZS tag value
813+
const auto zs_tag = bam_aux_get(aln, "ZS");
817814
if (!zs_tag) throw dnmt_error("bam_aux_get for ZS (invalid bsmap)");
818815
// ADS: test for errors on the line below
819-
const uint8_t cv = string(bam_aux2Z(zs_tag))[1] == '-' ? 'A' : 'T';
816+
const auto zs_tag_value = string(bam_aux2Z(zs_tag));
817+
if (zs_tag_value.empty()) throw dnmt_error("empty ZS tag in bsmap format");
818+
if (zs_tag_value[0] != '-' && zs_tag_value[0] != '+')
819+
throw dnmt_error("invalid ZS tag in bsmap format");
820+
const uint8_t cv = zs_tag_value[1] == '-' ? 'A' : 'T';
820821
// get the "mismatches" tag
821-
auto nm_tag = bam_aux_get(aln, "NM");
822-
if (!nm_tag) throw dnmt_error("bam_aux_get for NM (invalid bsmap)");
822+
const auto nm_tag = bam_aux_get(aln, "NM");
823+
if (!nm_tag) throw dnmt_error("invalid NM tag in bsmap format");
823824
const int64_t nm = bam_aux2i(nm_tag);
824825

825-
aln->l_data = bam_get_aux(aln) - aln->data; // del aux (no data resize)
826+
// ADS: this should delete the aux data by truncating the used
827+
// range within the bam1_t while avoiding resizing memory
828+
aln->l_data = bam_get_aux(aln) - aln->data;
826829

827830
/* add the tags we want */
828831
// "udpate" for "int" because it determines the right size; even
829-
// though we just deleted all tags, it will add it back here.
832+
// though we just deleted all tags, it will add it back here
830833
err_code = bam_aux_update_int(aln, "NM", nm);
831-
if (err_code < 0) throw dnmt_error(err_code, "bam_aux_update_int");
834+
if (err_code < 0)
835+
throw dnmt_error(err_code, "error setting NM in bsmap format");
836+
832837
// "append" for "char" because there is no corresponding update
833838
err_code = bam_aux_append(aln, "CV", 'A', 1, &cv);
834-
if (err_code < 0) throw dnmt_error(err_code, "bam_aux_append");
839+
if (err_code < 0)
840+
throw dnmt_error(err_code, "error setting conversion in bsmap format");
835841

836-
if (bam_is_rev(aln))
837-
revcomp_seq_by_byte(aln); // reverse complement if needed
842+
// reverse complement if needed
843+
if (bam_is_rev(aln)) revcomp_seq_by_byte(aln);
838844
}
839-
if (input_format == "bismark") {
845+
else if (input_format == "bismark") {
840846
// ADS: Previously we modified the read names at the first
841847
// underscore. Even if the names are still that way, it should no
842848
// longer be needed since we compare names up to a learned suffix.
@@ -864,6 +870,9 @@ standardize_format(const string &input_format, bam1_t *aln) {
864870
if (bam_is_rev(aln))
865871
revcomp_seq_by_byte(aln); // reverse complement if needed
866872
}
873+
// ADS: the condition below should be checked much earlier, ideally
874+
// before the output file is created
875+
else throw runtime_error("incorrect format specified: " + input_format);
867876

868877
// Be sure this doesn't depend on mapper! Removes the "qual" part of
869878
// the data in a bam1_t struct but does not change its uncompressed

0 commit comments

Comments
 (0)