Skip to content

Commit d6cf97b

Browse files
committed
Exclude features outside transcript boundaries
Resolves #2323
1 parent 3351124 commit d6cf97b

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ Changes affecting specific commands:
1616

1717
- The command `convert --gvcf2vcf` was not filling the REF allele when BCF was output (#243)
1818

19+
* bcftools csq
20+
21+
- Check the input GFF for features outside transcript boundaries (#2323)
22+
1923
* bcftools query
2024

2125
- The functions used in -i/-e filtering expressions (such as SUM, MEDIAN, etc) can be

gff.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* The MIT License
22
3-
Copyright (c) 2023 Genome Research Ltd.
3+
Copyright (c) 2023-2024 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -122,7 +122,7 @@ struct gff_t_
122122

123123
struct {
124124
int unknown_chr,unknown_tscript_biotype,unknown_strand,unknown_phase,duplicate_id;
125-
int unknown_cds_phase,incomplete_cds,wrong_phase,overlapping_cds;
125+
int unknown_cds_phase,incomplete_cds,wrong_phase,overlapping_cds,ftr_out_of_bounds;
126126
} warned;
127127
};
128128

@@ -1010,7 +1010,16 @@ int gff_parse(gff_t *gff)
10101010
khint_t k = kh_get(int2tscript, aux->id2tr, (int)ftr->trid);
10111011
if ( k==kh_end(aux->id2tr) ) continue; // no corresponding transcript registered, must be an unsupported biotype
10121012

1013+
// check whether the feature respects transcript's beg,end coordinates
10131014
gf_tscript_t *tr = kh_val(aux->id2tr,k);
1015+
if ( ftr->beg < tr->beg || ftr->end > tr->end )
1016+
{
1017+
if ( !gff->warned.ftr_out_of_bounds || gff->verbosity > 1 )
1018+
fprintf(stderr,"Warning: The GFF contains features outside the transcript boundaries .. %s\n",gff_id2string(gff,transcript,tr->id));
1019+
gff->warned.ftr_out_of_bounds++;
1020+
continue;
1021+
}
1022+
10141023
tr->used = 1;
10151024
tr->gene->used = 1;
10161025

0 commit comments

Comments
 (0)