Skip to content

Commit 8b6689c

Browse files
committed
Preserve LAA tag when all referenced alleles are removed
If all alleles referenced by LAA tags in a VCF record are removed, the MISSING value needs to be stored for each sample. This stops bcf_update_format_int32() from removing the LAA tag completely, which would invalidate other tags like LAD and LPL that depend on LAA being present.
1 parent 7d4d067 commit 8b6689c

File tree

2 files changed

+35
-9
lines changed

2 files changed

+35
-9
lines changed

test/test-vcf-api.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -979,15 +979,22 @@ void test_bcf_remove_allele_set(void)
979979
"5\t110350\t.\tT\t<INS>,<INS>\t.\tPASS\tIMPRECISE;SVLEN=100,200;CIEND=-50,50,-25,25;CIPOS=-10,10,-20,20\tGT\t0/1\t0/1\t0/1",
980980
"5\t110500\t.\tT\t<CNV>,<CNV>\t.\tPASS\tIMPRECISE;SVLEN=50,100;CILEN=0,25,-25,25;CN=2,4;CICN=-0.5,1,-1.5,1.5\tGT\t0/1\t0/1\t0/1",
981981
"5\t110700\t.\tA\t<INS:ME>,<INS:ME>\t.\tPASS\tMEINFO=AluY,1,260,+,FLAM_C,1,110,-;METRANS=1,94820,95080,+,1,129678,129788,-\tGT\t0/1\t0/1\t0/1",
982-
"5\t112000\t.\tC\t<CNV:TR>,<CNV:TR>\t.\tPASS\tRN=2,1;RUS=CAG,TTG,CA;RUL=3,3,2;RB=12,6,6;RUC=4,2,3;RUB=3,3,3,3,3,3,2,2,2;SVLEN=18,6"
982+
"5\t112000\t.\tC\t<CNV:TR>,<CNV:TR>\t.\tPASS\tRN=2,1;RUS=CAG,TTG,CA;RUL=3,3,2;RB=12,6,6;RUC=4,2,3;RUB=3,3,3,3,3,3,2,2,2;SVLEN=18,6",
983+
"5\t113000\t.\tT\tC,A\t.\tPASS\tAC=90,1;AD=6,5,6;AF=0.009,0.0001;VL_A_STR_INFO=alt_c,alt_a;VL_R_STR_INFO=ref,alt_c,alt_a\tGT:LAA:LAD:LEC:LPL:VL_LA_STR_FMT:VL_LG_STR_FMT:VL_LR_STR_FMT\t0/0:.:3:.:0:.:gt_00:ref\t0/1:1,2:3,2,0:44,27:114,0,15,35,73,113:alt_c,alt_a:gt_00,gt_01,gt_11,gt_02,gt_12,gt_22:ref,alt_c,alt_a\t1/1:1:0,3:46:110,15,0:alt_c:gt_00,gt_01,gt_11:ref,alt_c",
984+
"5\t114000\t.\tT\tC,A\t.\tPASS\tAC=90,1;AD=6,5,6;AF=0.009,0.0001;VL_A_STR_INFO=alt_c,alt_a;VL_R_STR_INFO=ref,alt_c,alt_a\tGT:LAA:LAD:LEC:LPL:VL_LA_STR_FMT:VL_LG_STR_FMT:VL_LR_STR_FMT\t0/0:.:3:.:0:.:gt_00:ref\t0/1:1,2:3,2,0:44,27:114,0,15,35,73,113:alt_c,alt_a:gt_00,gt_01,gt_11,gt_02,gt_12,gt_22:ref,alt_c,alt_a\t1/1:1:0,3:46:110,15,0:alt_c:gt_00,gt_01,gt_11:ref,alt_c",
983985
};
984986
const char * expected[] = {
987+
// 2nd allele removed
985988
"5\t110285\t.\tT\tC\t.\tPASS\tAC=1;AD=6,5;AF=0.99;VL_A_STR_INFO=alt_c;VL_R_STR_INFO=ref,alt_c\tGT:AD:EC:PL:VL_A_STR_FMT:VL_G_STR_FMT:VL_R_STR_FMT\t.:.:.:.:.:.:.\t0/1:6,5:4:114,0,15:alt_c:gt_00,gt_01,gt_11:ref,alt_c\t.:.:.:.:.:.:.",
986989
"5\t110290\t.\tT\tC\t.\tPASS\tAC=90;AD=6,5;AF=0.009;VL_A_STR_INFO=alt_c;VL_R_STR_INFO=ref,alt_c\tGT:LAA:LAD:LEC:LPL:VL_LA_STR_FMT:VL_LG_STR_FMT:VL_LR_STR_FMT\t0/0:.:3:.:0:.:gt_00:ref\t0/1:1:3,2:44:114,0,15:alt_c:gt_00,gt_01,gt_11:ref,alt_c\t1/1:1:0,3:46:110,15,0:alt_c:gt_00,gt_01,gt_11:ref,alt_c",
987990
"5\t110350\t.\tT\t<INS>\t.\tPASS\tIMPRECISE;SVLEN=100;CIEND=-50,50;CIPOS=-10,10\tGT\t0/1\t0/1\t0/1",
988991
"5\t110500\t.\tT\t<CNV>\t.\tPASS\tIMPRECISE;SVLEN=50;CILEN=0,25;CN=2;CICN=-0.5,1\tGT\t0/1\t0/1\t0/1",
989992
"5\t110700\t.\tA\t<INS:ME>\t.\tPASS\tMEINFO=AluY,1,260,+;METRANS=1,94820,95080,+\tGT\t0/1\t0/1\t0/1",
990-
"5\t112000\t.\tC\t<CNV:TR>\t.\tPASS\tRN=2;RUS=CAG,TTG;RUL=3,3;RB=12,6;RUC=4,2;RUB=3,3,3,3,3,3;SVLEN=18"
993+
"5\t112000\t.\tC\t<CNV:TR>\t.\tPASS\tRN=2;RUS=CAG,TTG;RUL=3,3;RB=12,6;RUC=4,2;RUB=3,3,3,3,3,3;SVLEN=18",
994+
// 1st allele removed
995+
"5\t113000\t.\tT\tA\t.\tPASS\tAC=1;AD=6,6;AF=0.0001;VL_A_STR_INFO=alt_a;VL_R_STR_INFO=ref,alt_a\tGT:LAA:LAD:LEC:LPL:VL_LA_STR_FMT:VL_LG_STR_FMT:VL_LR_STR_FMT\t0/0:.:3:.:0:.:gt_00:ref\t0/.:1:3,0:27:114,35,113:alt_a:gt_00,gt_02,gt_22:ref,alt_a\t./.:.:0:.:110:.:gt_00:ref",
996+
// Both alleles removed
997+
"5\t114000\t.\tT\t.\t.\tPASS\tAD=6;VL_R_STR_INFO=ref\tGT:LAA:LAD:LEC:LPL:VL_LA_STR_FMT:VL_LG_STR_FMT:VL_LR_STR_FMT\t0/0:.:3:.:0:.:gt_00:ref\t0/.:.:3:.:114:.:gt_00:ref\t./.:.:0:.:110:.:gt_00:ref",
991998
};
992999

9931000
kstring_t kstr = KS_INITIALIZE;
@@ -1011,7 +1018,15 @@ void test_bcf_remove_allele_set(void)
10111018
for (i = 0; i < sizeof(inputs)/sizeof(inputs[0]); i++)
10121019
{
10131020
check0(read_vcf_line(inputs[i], hdr, rec, &kstr));
1014-
kbs_insert(rm_set, 2);
1021+
kbs_clear(rm_set);
1022+
if (rec->pos == 113000 - 1) {
1023+
kbs_insert(rm_set, 1);
1024+
} else if (rec->pos == 114000 - 1) {
1025+
kbs_insert(rm_set, 1);
1026+
kbs_insert(rm_set, 2);
1027+
} else {
1028+
kbs_insert(rm_set, 2);
1029+
}
10151030
check0(bcf_remove_allele_set(hdr, rec, rm_set));
10161031
check0(vcf_format(hdr, rec, ks_clear(&kstr)));
10171032
chomp(&kstr);

vcfutils.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,13 +1031,24 @@ int bcf_remove_allele_set(const bcf_hdr_t *header, bcf1_t *line, const struct kb
10311031
if (max_k < num_laa_vals)
10321032
{
10331033
// Max number of items has shrunk, so consolidate.
1034-
for (i = 1; i < line->n_sample; i++)
1035-
{
1036-
memmove(&laa[i * max_k],
1037-
&laa[i * num_laa_vals],
1038-
max_k * sizeof(laa[0]));
1034+
if (max_k > 0) {
1035+
for (i = 1; i < line->n_sample; i++)
1036+
{
1037+
memmove(&laa[i * max_k],
1038+
&laa[i * num_laa_vals],
1039+
max_k * sizeof(laa[0]));
1040+
}
1041+
num_laa = line->n_sample * max_k;
1042+
} else {
1043+
// No values left - all referenced alleles must have been
1044+
// removed. Store MISSING to prevent the LAA tag from
1045+
// also being removed (which would invalidate LAD,
1046+
// LPL etc.)
1047+
assert(num_laa >= line->n_sample);
1048+
for (i = 0; i < line->n_sample; i++)
1049+
laa[i] = bcf_int32_missing;
1050+
num_laa = line->n_sample;
10391051
}
1040-
num_laa = line->n_sample * max_k;
10411052
}
10421053
// Push back new LAA values
10431054
if (bcf_update_format_int32(header, line,

0 commit comments

Comments
 (0)