Skip to content

Commit 771c53c

Browse files
committed
Fix CRAM QS values where ambiguity codes are in the reference.
Commit 0aa2af3 changes QS encoding from verbatim copying out of s->qual_blk and into calling codecs[DS_QS]->encode. This was part of the plan to be able to perform the data transforms on QS, included in CRAM 4.0 prototype. Unfortunately the ordering was wrong, with a copy of all quals followed by encoding any explicit QS items produced by the feature codes. The correct ordering is the reversal of this.
1 parent 3294afb commit 771c53c

File tree

5 files changed

+16
-12
lines changed

5 files changed

+16
-12
lines changed

io_lib/cram_encode.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ static int cram_encode_slice_read(cram_fd *fd,
535535
int32_t i32;
536536
int64_t i64;
537537
unsigned char uc;
538+
int explicit_qual = 0;
538539

539540
//fprintf(stderr, "Encode seq %d, %d/%d FN=%d, %s\n", rec, core->byte, core->bit, cr->nfeature, s->name_ds->str + cr->name);
540541

@@ -609,11 +610,6 @@ static int cram_encode_slice_read(cram_fd *fd,
609610
/* Aux tags */
610611
r |= h->codecs[DS_TL]->encode(s, h->codecs[DS_TL], (char *)&cr->TL, 1);
611612

612-
// qual
613-
r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS],
614-
(char *)BLOCK_DATA(s->qual_blk) + cr->qual,
615-
cr->len);
616-
617613
// features (diffs)
618614
if (!(cr->flags & BAM_FUNMAP)) {
619615
int prev_pos = 0, j;
@@ -686,6 +682,7 @@ static int cram_encode_slice_read(cram_fd *fd,
686682
uc = f->B.qual;
687683
r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS],
688684
(char *)&uc, 1);
685+
explicit_qual++;
689686
break;
690687

691688
case 'b':
@@ -700,6 +697,7 @@ static int cram_encode_slice_read(cram_fd *fd,
700697
uc = f->Q.qual;
701698
r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS],
702699
(char *)&uc, 1);
700+
explicit_qual++;
703701
break;
704702

705703
case 'N':
@@ -736,6 +734,11 @@ static int cram_encode_slice_read(cram_fd *fd,
736734
r |= h->codecs[DS_BA]->encode(s, h->codecs[DS_BA], seq, cr->len);
737735
}
738736

737+
// qual
738+
r |= h->codecs[DS_QS]->encode(s, h->codecs[DS_QS],
739+
(char *)BLOCK_DATA(s->qual_blk) + cr->qual
740+
+ explicit_qual, cr->len);
741+
739742
return r ? -1 : 0;
740743
}
741744

tests/data/xx#MD.sam

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
@SQ SN:xx LN:30
22
@CO All MD and NM should match the stored values
33
a 0 xx 6 1 10M * 0 0 AAAAATTTTT * co:Z:no fields
4-
a 0 xx 6 1 10M * 0 0 AAAAGGTTTT *
4+
a 0 xx 6 1 11M * 0 0 AAAAGRTTTTT ABCDEFGHIJK
55
a 0 xx 6 1 10M * 0 0 GAAAATTTTG *
66
i 0 xx 6 1 5M1I5M * 0 0 AAAAAGTTTTT *
77
i 0 xx 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT *
@@ -11,12 +11,12 @@ d 0 xx 6 1 5M10D5M * 0 0 AAAAACCCCC *
1111
d 0 xx 6 1 5M10N5M * 0 0 AAAAACCCCC *
1212
sid 0 xx 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC *
1313
a 0 xx 6 1 10M * 0 0 AAAAATTTTT * MD:Z:10 NM:i:0 co:Z:correct fields
14-
a 0 xx 6 1 10M * 0 0 AAAAGGTTTT * MD:Z:4A0T4 NM:i:2
14+
a 0 xx 6 1 11M * 0 0 AAAAGRTTTTT ABCDEFGHIJK MD:Z:4A0T4Y0 NM:i:3
1515
a 0 xx 6 1 10M * 0 0 GAAAATTTTG * MD:Z:0A8T0 NM:i:2
1616
i 0 xx 6 1 5M1I5M * 0 0 AAAAAGTTTTT * MD:Z:10 NM:i:1
1717
i 0 xx 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT * MD:Z:10 NM:i:3
1818
i 0 xx 6 1 10M2I * 0 0 AAAAATTTTTCC * MD:Z:10 NM:i:2
1919
i 0 xx 6 1 10M2P2I * 0 0 AAAAATTTTTCC * MD:Z:10 NM:i:2
20-
d 0 xx 6 1 5M10D5M * 0 0 AAAAACCCCC * MD:Z:5^TTTTTTTTTT5 NM:i:10
20+
d 0 xx 6 1 5M10D5M * 0 0 AAAAACCCCC * MD:Z:5^TTTTTYTTTT5 NM:i:10
2121
d 0 xx 6 1 5M10N5M * 0 0 AAAAACCCCC * MD:Z:10 NM:i:0
22-
sid 0 xx 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC * MD:Z:4^ATTTTTTTTT0T3 NM:i:16
22+
sid 0 xx 6 1 1S4M10D5I4M1S * 0 0 AAAAAGGGGGCCCCC * MD:Z:4^ATTTTTYTTT0T3 NM:i:16

tests/data/xx#MD2.sam

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
@SQ SN:xx LN:30
22
@CO All MD and/or NM should differ to the stored values
33
a 0 xx 6 1 10M * 0 0 AAAAATTTTT * MD:Z:9 NM:i:0 co:Z:MD incorrect fields
4-
a 0 xx 6 1 10M * 0 0 AAAAGGTTTT * MD:Z:4A0A4 NM:i:2
4+
a 0 xx 6 1 11M * 0 0 AAAAGGTTTTT * MD:Z:4A0T4Y0 NM:i:2
5+
a 0 xx 6 1 11M * 0 0 AAAAGGTTTTT * MD:Z:4A0T4N0 NM:i:3
56
a 0 xx 6 1 10M * 0 0 GAAAATTTTG * MD:Z:0G8T0 NM:i:2
67
i 0 xx 6 1 5M1I5M * 0 0 AAAAAGTTTTT * MD:Z:11 NM:i:1
78
i 0 xx 6 1 5M3I5M * 0 0 AAAAAGGGTTTTT * MD:Z:1A1 NM:i:3

tests/data/xx#rg.sam

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
@HD VN:1.4 SO:coordinate
2-
@SQ SN:xx LN:30 AS:? SP:? UR:? M5:bbf4de6d8497a119dda6e074521643dc
2+
@SQ SN:xx LN:30 AS:? SP:? UR:? M5:1224b81d8664d77635e1620d7f2c1523
33
@RG ID:x1 SM:x1
44
@RG ID:x2 SM:x2 LB:x PG:foo:bar PI:1111
55
@PG ID:emacs PN:emacs VN:23.1.1

tests/data/xx.fa

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
>xx
2-
AAAAAAAAAATTTTTTTTTTCCCCCCCCCC
2+
AAAAAAAAAATTTTTYTTTTCCCCCCCCCC
33
>yy
44
AAAAAAAAAATTTTTTTTTT
55

0 commit comments

Comments
 (0)