|
1 | 1 | /* The MIT License |
2 | 2 |
|
3 | | - Copyright (c) 2023 Genome Research Ltd. |
| 3 | + Copyright (c) 2023-2024 Genome Research Ltd. |
4 | 4 |
|
5 | 5 | Author: Petr Danecek <[email protected]> |
6 | 6 |
|
@@ -122,7 +122,7 @@ struct gff_t_ |
122 | 122 |
|
123 | 123 | struct { |
124 | 124 | int unknown_chr,unknown_tscript_biotype,unknown_strand,unknown_phase,duplicate_id; |
125 | | - int unknown_cds_phase,incomplete_cds,wrong_phase,overlapping_cds; |
| 125 | + int unknown_cds_phase,incomplete_cds,wrong_phase,overlapping_cds,ftr_out_of_bounds; |
126 | 126 | } warned; |
127 | 127 | }; |
128 | 128 |
|
@@ -1010,7 +1010,16 @@ int gff_parse(gff_t *gff) |
1010 | 1010 | khint_t k = kh_get(int2tscript, aux->id2tr, (int)ftr->trid); |
1011 | 1011 | if ( k==kh_end(aux->id2tr) ) continue; // no corresponding transcript registered, must be an unsupported biotype |
1012 | 1012 |
|
| 1013 | + // check whether the feature respects transcript's beg,end coordinates |
1013 | 1014 | gf_tscript_t *tr = kh_val(aux->id2tr,k); |
| 1015 | + if ( ftr->beg < tr->beg || ftr->end > tr->end ) |
| 1016 | + { |
| 1017 | + if ( !gff->warned.ftr_out_of_bounds || gff->verbosity > 1 ) |
| 1018 | + fprintf(stderr,"Warning: The GFF contains features outside the transcript boundaries .. %s\n",gff_id2string(gff,transcript,tr->id)); |
| 1019 | + gff->warned.ftr_out_of_bounds++; |
| 1020 | + continue; |
| 1021 | + } |
| 1022 | + |
1014 | 1023 | tr->used = 1; |
1015 | 1024 | tr->gene->used = 1; |
1016 | 1025 |
|
|
0 commit comments