Skip to content

Commit 8920652

Browse files
author
jaudoux
committed
Correct bug in GFF annotation loading due to UTR having unusual id<->parent IDs
1 parent 17559b5 commit 8920652

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

dist.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ license = MIT
44
copyright_holder = Jérôme Audoux
55
copyright_year = 2018
66

7-
version = 0.004
7+
version = 0.005
88

99
[@Basic]
1010
[PkgVersion]

lib/DEkupl/Annotations.pm

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ sub loadFromGFF {
7979
$parent = DEkupl::Utils::getAtomicGeneID($parent) if defined $parent;
8080

8181
# Add the id to parent relationship
82-
if(defined $parent && defined $id) {
83-
# Todo we should only store that information fore transcript_id to gene_id
82+
if($annot->{feature} eq 'transcript' && defined $parent && defined $id) {
8483
$id_to_parents{$id} = $parent;
8584
}
8685
# next if !defined $id;

t/DEkupl-Analyzer-Annotations.t

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
use strict;
22
use warnings;
33

4-
use Test::More tests => 12;
5-
4+
use Test::More tests => 15;
5+
use DEkupl::IntervalQuery;
66
use DEkupl::GenomicInterval;
7+
use DEkupl::Annotations;
78
use DEkupl::Annotations::Exon;
89
use DEkupl::Annotations::Gene;
910
use DEkupl::Analyzer::Annotations;
11+
use Inline::Files 0.68;
12+
use File::Temp;
13+
use Test::Exception;
1014

1115
my $query = DEkupl::GenomicInterval->new(
1216
'chr' => '12',
@@ -104,3 +108,52 @@ my $geneC_exon1 = DEkupl::Annotations::Exon->new(
104108
is($candidate->{is_exonic}, 1);
105109
is($candidate->{is_intronic}, 0);
106110
}
111+
112+
# Test on real GFF file
113+
{
114+
my ($fh, $gff_file) = File::Temp::tempfile( SUFFIX => '.gff', UNLINK => 0);
115+
while(<GFF>) {print $fh $_;}
116+
$fh->close;
117+
118+
my $annotations = DEkupl::Annotations->new();
119+
$annotations->loadFromGFF($gff_file);
120+
121+
my $interval_query = DEkupl::IntervalQuery->new();
122+
$interval_query->loadAnnotations($annotations);
123+
124+
# chr11:5,225,504-5,225,560
125+
my $query = DEkupl::GenomicInterval->new(
126+
'chr' => 'chr11',
127+
'start' => 5225504,
128+
'end' => 5225560,
129+
'strand' => '-',
130+
);
131+
132+
my $results = $interval_query->fetchByRegion($query);
133+
my $candidate = DEkupl::Analyzer::Annotations::_selectBestCandidate($results, $query);
134+
135+
is($candidate->{gene}->id, 'ENSG00000244734');
136+
is($candidate->{is_exonic}, 1);
137+
is($candidate->{is_intronic}, 0);
138+
}
139+
140+
__GFF__
141+
##gff-version 3
142+
#description: evidence-based annotation of the human genome (GRCh38), version 31 (Ensembl 97)
143+
#provider: GENCODE
144+
#contact: gencode-help@ebi.ac.uk
145+
#format: gff3
146+
#date: 2019-06-27
147+
##sequence-region chr1 1 248956422
148+
chr11 HAVANA gene 5225464 5229395 . - . ID=ENSG00000244734.4;gene_id=ENSG00000244734.4;gene_type=protein_coding;gene_name=HBB;level=2;hgnc_id=HGNC:4827;havana_gene=OTTHUMG00000066678.8
149+
chr11 HAVANA transcript 5225464 5227071 . - . ID=ENST00000335295.4;Parent=ENSG00000244734.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
150+
chr11 HAVANA exon 5226930 5227071 . - . ID=exon:ENST00000335295.4:1;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=1;exon_id=ENSE00001829867.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
151+
chr11 HAVANA CDS 5226930 5227021 . - 0 ID=CDS:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=1;exon_id=ENSE00001829867.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
152+
chr11 HAVANA start_codon 5227019 5227021 . - 0 ID=start_codon:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=1;exon_id=ENSE00001829867.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
153+
chr11 HAVANA exon 5226577 5226799 . - . ID=exon:ENST00000335295.4:2;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=2;exon_id=ENSE00001057381.1;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
154+
chr11 HAVANA CDS 5226577 5226799 . - 1 ID=CDS:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=2;exon_id=ENSE00001057381.1;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
155+
chr11 HAVANA exon 5225464 5225726 . - . ID=exon:ENST00000335295.4:3;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=3;exon_id=ENSE00001600613.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
156+
chr11 HAVANA CDS 5225598 5225726 . - 0 ID=CDS:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=3;exon_id=ENSE00001600613.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
157+
chr11 HAVANA stop_codon 5225598 5225600 . - 0 ID=stop_codon:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=3;exon_id=ENSE00001600613.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
158+
chr11 HAVANA five_prime_UTR 5227022 5227071 . - . ID=UTR5:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=1;exon_id=ENSE00001829867.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2
159+
chr11 HAVANA three_prime_UTR 5225464 5225597 . - . ID=UTR3:ENST00000335295.4;Parent=ENST00000335295.4;gene_id=ENSG00000244734.4;transcript_id=ENST00000335295.4;gene_type=protein_coding;gene_name=HBB;transcript_type=protein_coding;transcript_name=HBB-201;exon_number=3;exon_id=ENSE00001600613.2;level=2;protein_id=ENSP00000333994.3;transcript_support_level=1;hgnc_id=HGNC:4827;tag=CAGE_supported_TSS,basic,MANE_Select,appris_principal_1,CCDS;ccdsid=CCDS7753.1;havana_gene=OTTHUMG00000066678.8;havana_transcript=OTTHUMT00000495006.2

0 commit comments

Comments
 (0)