Skip to content

Commit f4d7c27

Browse files
committed
Make -t F_MISSING work with -S groups.txt. Fixes #2447
1 parent 35ed1f0 commit f4d7c27

File tree

8 files changed

+52
-12
lines changed

8 files changed

+52
-12
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ Changes affecting specific commands:
3131

3232
- Fix header formatting error for INFO/F_MISSING which must be Number=1 (#2442)
3333

34+
- Make `-t 'F_MISSING'` work with `-S groups.txt` (#2447)
35+
3436
* bcftools gtcheck
3537

3638
- The program is now able to process gVCF blocks. Newly, monoallelic sites are excluded only

filter.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,9 @@ static void filters_set_alt_string(filter_t *flt, bcf1_t *line, token_t *tok)
13661366
}
13671367
static void filters_set_nmissing(filter_t *flt, bcf1_t *line, token_t *tok)
13681368
{
1369+
if ( !tok->nsamples ) error("The function %s works with FORMAT fields\n", tok->tag);
1370+
assert(tok->usmpl);
1371+
13691372
bcf_unpack(line, BCF_UN_FMT);
13701373
if ( !line->n_sample )
13711374
{
@@ -1384,11 +1387,13 @@ static void filters_set_nmissing(filter_t *flt, bcf1_t *line, token_t *tok)
13841387
return;
13851388
}
13861389

1387-
int j,nmissing = 0;
1390+
int j,nsmpl = 0, nmissing = 0;
13881391
#define BRANCH(type_t, convert, is_vector_end) { \
13891392
for (i=0; i<line->n_sample; i++) \
13901393
{ \
1394+
if ( !tok->usmpl[i] ) continue; \
13911395
uint8_t *ptr = fmt->p + i*fmt->size; \
1396+
nsmpl++; \
13921397
for (j=0; j<fmt->n; j++) \
13931398
{ \
13941399
type_t val = convert(&ptr[j * sizeof(type_t)]); \
@@ -1405,7 +1410,7 @@ static void filters_set_nmissing(filter_t *flt, bcf1_t *line, token_t *tok)
14051410
}
14061411
#undef BRANCH
14071412
tok->nvalues = 1;
1408-
tok->values[0] = tok->tag[0]=='N' ? nmissing : (double)nmissing / line->n_sample;
1413+
tok->values[0] = tok->tag[0]=='N' ? nmissing : (nsmpl ? (double)nmissing / nsmpl : 0);
14091414
}
14101415
static int func_npass(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
14111416
{
@@ -1414,16 +1419,17 @@ static int func_npass(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stac
14141419
if ( !tok->nsamples ) error("The function %s works with FORMAT fields\n", rtok->tag);
14151420
assert(tok->usmpl);
14161421

1417-
int i, npass = 0;
1422+
int i, nsmpl = 0, npass = 0;
14181423
for (i=0; i<tok->nsamples; i++)
14191424
{
14201425
if ( !tok->usmpl[i] ) continue;
1426+
nsmpl++;
14211427
if ( tok->pass_samples[i] ) npass++;
14221428
}
14231429
hts_expand(double,1,rtok->mvalues,rtok->values);
14241430
rtok->nsamples = 0;
14251431
rtok->nvalues = 1;
1426-
rtok->values[0] = rtok->tag[0]=='N' ? npass : (line->n_sample ? 1.0*npass/line->n_sample : 0);
1432+
rtok->values[0] = rtok->tag[0]=='N' ? npass : (nsmpl ? 1.0*npass/nsmpl : 0);
14271433

14281434
return 1;
14291435
}
@@ -3144,6 +3150,14 @@ static int filters_init1_ext(filter_t *filter, char *str, int len, token_t *tok)
31443150
filter->ext[filter->n_ext-1] = tok->ht_type;
31453151
return 0;
31463152
}
3153+
static void init_usmpl(filter_t *flt, token_t *tok)
3154+
{
3155+
if ( tok->nsamples ) return;
3156+
int i;
3157+
tok->nsamples = bcf_hdr_nsamples(flt->hdr);
3158+
tok->usmpl = (uint8_t*) malloc(tok->nsamples);
3159+
for (i=0; i<tok->nsamples; i++) tok->usmpl[i] = 1;
3160+
}
31473161
static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
31483162
{
31493163
tok->vl_len = BCF_VL_FIXED;
@@ -3295,6 +3309,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
32953309
tok->setter = &filters_set_nmissing;
32963310
tok->tag = strdup("N_MISSING");
32973311
tok->ht_type = BCF_HT_INT;
3312+
init_usmpl(filter,tok);
32983313
return 0;
32993314
}
33003315
else if ( !strncasecmp(str,"F_MISSING",len) )
@@ -3303,6 +3318,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
33033318
tok->setter = &filters_set_nmissing;
33043319
tok->tag = strdup("F_MISSING");
33053320
tok->ht_type = BCF_HT_REAL;
3321+
init_usmpl(filter,tok);
33063322
return 0;
33073323
}
33083324
}
@@ -3335,13 +3351,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
33353351
if ( tok->idx==-3 && bcf_hdr_id2length(filter->hdr,BCF_HL_FMT,tok->hdr_id)!=BCF_VL_R )
33363352
error("Error: GT subscripts can be used only with Number=R tags\n");
33373353
}
3338-
else if ( is_fmt && !tok->nsamples )
3339-
{
3340-
int i;
3341-
tok->nsamples = bcf_hdr_nsamples(filter->hdr);
3342-
tok->usmpl = (uint8_t*) malloc(tok->nsamples);
3343-
for (i=0; i<tok->nsamples; i++) tok->usmpl[i] = 1;
3344-
}
3354+
else if ( is_fmt ) init_usmpl(filter,tok);
33453355

33463356
tok->hl_type = is_fmt ? BCF_HL_FMT : BCF_HL_INFO;
33473357
if ( tok->hdr_id >= 0 ) tok->vl_len = bcf_hdr_id2length(filter->hdr,tok->hl_type,tok->hdr_id);

plugins/fill-tags.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ void list_tags(void)
532532
"INFO/AN Number:1 Type:Integer .. Total number of alleles in called genotypes\n"
533533
"INFO/ExcHet Number:A Type:Float .. Test excess heterozygosity; 1=good, 0=bad\n"
534534
"INFO/END Number:1 Type:Integer .. End position of the variant\n"
535-
"INFO/F_MISSING Number:1 Type:Float .. Fraction of missing genotypes (all samples, experimental)\n"
535+
"INFO/F_MISSING Number:1 Type:Float .. Fraction of missing genotypes, synonymous with 'F_MISSING=F_PASS(GT=\"mis\")'\n"
536536
"INFO/HWE Number:A Type:Float .. HWE test (PMID:15789306); 1=good, 0=bad\n"
537537
"INFO/MAF Number:1 Type:Float .. Frequency of the second most common allele\n"
538538
"INFO/NS Number:1 Type:Integer .. Number of samples with data\n"

test/fmissing.1.out

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
chr1 1 . C T 30 PASS DP=2470;F_MISSING_AAA=0.5;F_MISSING_BBB=0.375;F_MISSING=0.4 GT:PL:DP:AD:GQ ./.:0,18,181:6:6,0:21 0/0:0,30,255:10:10,0:33 ./.:70,0,141:9:6,3:66 0/1:244,0,180:18:8,10:127 ./.:0,18,152:6:6,0:21 0/0:0,48,255:16:16,0:127 0/0:0,30,255:11:10,0:127 ./.:0,24,233:8:8,0:127 0/0:0,57,255:19:19,0:127 0/0:0,84,255:28:28,0:127
2+
chr1 2 . G A 30 PASS DP=2450;F_MISSING_AAA=0.5;F_MISSING_BBB=0.25;F_MISSING=0.3 GT:PL:DP:AD:GQ ./.:184,21,0:7:0,7:25 1/1:255,33,0:11:0,11:37 0/1:169,0,69:10:3,7:64 0/1:206,0,244:18:9,9:127 ./.:166,18,0:6:0,6:22 0/0:0,45,255:15:15,0:127 0/0:0,36,255:12:12,0:127 ./.:0,27,235:9:9,0:127 0/0:0,57,255:19:19,0:127 0/0:0,24,255:28:27,1:127

test/fmissing.2.out

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
chr1 1 . C T 30 PASS DP=2470;N_MISSING_AAA=1;N_MISSING_BBB=3;N_MISSING=4 GT:PL:DP:AD:GQ ./.:0,18,181:6:6,0:21 0/0:0,30,255:10:10,0:33 ./.:70,0,141:9:6,3:66 0/1:244,0,180:18:8,10:127 ./.:0,18,152:6:6,0:21 0/0:0,48,255:16:16,0:127 0/0:0,30,255:11:10,0:127 ./.:0,24,233:8:8,0:127 0/0:0,57,255:19:19,0:127 0/0:0,84,255:28:28,0:127
2+
chr1 2 . G A 30 PASS DP=2450;N_MISSING_AAA=1;N_MISSING_BBB=2;N_MISSING=3 GT:PL:DP:AD:GQ ./.:184,21,0:7:0,7:25 1/1:255,33,0:11:0,11:37 0/1:169,0,69:10:3,7:64 0/1:206,0,244:18:9,9:127 ./.:166,18,0:6:0,6:22 0/0:0,45,255:15:15,0:127 0/0:0,36,255:12:12,0:127 ./.:0,27,235:9:9,0:127 0/0:0,57,255:19:19,0:127 0/0:0,24,255:28:27,1:127

test/fmissing.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
AAA1 AAA
2+
AAA2 AAA
3+
BBB1 BBB
4+
BBB2 BBB
5+
BBB3 BBB
6+
BBB4 BBB
7+
BBB5 BBB
8+
BBB6 BBB
9+
BBB7 BBB
10+
BBB8 BBB

test/fmissing.vcf

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
##fileformat=VCFv4.2
2+
##contig=<ID=chr1,length=2>
3+
##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">
4+
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
5+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">
6+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths (high-quality bases)">
7+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
8+
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled Genotype Quality">
9+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAA1 AAA2 BBB1 BBB2 BBB3 BBB4 BBB5 BBB6 BBB7 BBB8
10+
chr1 1 . C T 30 PASS DP=2470 GT:PL:DP:AD:GQ ./.:0,18,181:6:6,0:21 0/0:0,30,255:10:10,0:33 ./.:70,0,141:9:6,3:66 0/1:244,0,180:18:8,10:127 ./.:0,18,152:6:6,0:21 0/0:0,48,255:16:16,0:127 0/0:0,30,255:11:10,0:127 ./.:0,24,233:8:8,0:127 0/0:0,57,255:19:19,0:127 0/0:0,84,255:28:28,0:127
11+
chr1 2 . G A 30 PASS DP=2450 GT:PL:DP:AD:GQ ./.:184,21,0:7:0,7:25 1/1:255,33,0:11:0,11:37 0/1:169,0,69:10:3,7:64 0/1:206,0,244:18:9,9:127 ./.:166,18,0:6:0,6:22 0/0:0,45,255:15:15,0:127 0/0:0,36,255:12:12,0:127 ./.:0,27,235:9:9,0:127 0/0:0,57,255:19:19,0:127 0/0:0,24,255:28:27,1:127

test/test.pl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,9 @@
682682
run_test(\&test_vcf_plugin,$opts,in=>'query.variantkey',out=>'query.add-variantkey.vcf',cmd=>'+add-variantkey',args=>'');
683683
run_test(\&test_vcf_plugin,$opts,in=>'query.variantkey',out=>'variantkey-hex.out',cmd=>'+variantkey-hex',args=>'test/');
684684
run_test(\&test_vcf_plugin,$opts,in=>'query.nucleotide',out=>'query.allele-length.tsv',cmd=>'+allele-length',args=>'');
685+
run_test(\&test_vcf_plugin,$opts,in=>'fmissing',out=>'fmissing.1.out',cmd=>'+fill-tags --no-version',args=>q[-- -S {PATH}/fmissing.txt -t 'F_MISSING' | grep -v ^#]);
686+
run_test(\&test_vcf_plugin,$opts,in=>'fmissing',out=>'fmissing.1.out',cmd=>'+fill-tags --no-version',args=>q[-- -S {PATH}/fmissing.txt -t 'F_MISSING:1=F_PASS(GT="mis")' | grep -v ^#]);
687+
run_test(\&test_vcf_plugin,$opts,in=>'fmissing',out=>'fmissing.2.out',cmd=>'+fill-tags --no-version',args=>q[-- -S {PATH}/fmissing.txt -t 'N_MISSING:1=int(N_PASS(GT="mis"))' | grep -v ^#]);
685688
run_test(\&test_vcf_plugin,$opts,in=>'fisher',out=>'fisher.1.out',cmd=>'+fill-tags --no-version',args=>q[-- -t 'FT:1=phred(fisher(INFO/DP4))']);
686689
run_test(\&test_vcf_plugin,$opts,in=>'fisher',out=>'fisher.2.out',cmd=>'+fill-tags --no-version',args=>q[-- -t 'FT:1=phred(fisher(INFO/ADF[0,2],INFO/ADR[0,2]))']);
687690
run_test(\&test_vcf_plugin,$opts,in=>'fisher',out=>'fisher.3.out',cmd=>'+fill-tags --no-version',args=>q[-- -t 'FT:1=phred(fisher(INFO/ADF,INFO/ADR))']);

0 commit comments

Comments
 (0)