Skip to content

Commit 10f0233

Browse files
committed
fixes #19
Added some unit tests too
1 parent 4066114 commit 10f0233

File tree

2 files changed

+121
-12
lines changed

2 files changed

+121
-12
lines changed

agfusion/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1152,7 +1152,7 @@ def _fetch_transcript_cds(self):
11521152
elif self.gene3prime.junction <= cds[0]:
11531153
break
11541154
else:
1155-
self.transcript_cds_junction_3prime += (self.gene3prime.junction - cds[0] + 1)
1155+
self.transcript_cds_junction_3prime += (self.gene3prime.junction - cds[0])
11561156
else:
11571157
for cds in self.transcript2.coding_sequence_position_ranges:
11581158
if self.gene3prime.junction <= cds[0]:

test/test.py

Lines changed: 120 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,116 @@
33
import agfusion
44
from agfusion import utils
55
import pyensembl
6-
from Bio import SeqIO
6+
from Bio import SeqIO, Seq, Alphabet
77

88
data = pyensembl.EnsemblRelease(84,'mouse')
99
db = agfusion.AGFusionDB(abspath(join(curdir,'agfusion.mus_musculus.84.db')))
1010
db.build = 'mus_musculus_84'
1111

12+
data_human = pyensembl.EnsemblRelease(75,'human')
13+
db_human = agfusion.AGFusionDB(abspath(join(curdir,'agfusion.homo_sapiens.75.db')))
14+
db_human.build = 'homo_sapiens_75'
15+
16+
17+
class TestSequencePrediction_human(unittest.TestCase):
18+
def test_1(self):
19+
"""
20+
test CDS and prortein correct for junction that is on exon boundaries and
21+
produces an out-of-frame protein.
22+
"""
23+
24+
#test the dna and protein coding sequences are correct by comparing
25+
#with manually generally sequences
26+
27+
fusion = agfusion.Fusion(
28+
gene5prime="TMEM87B",
29+
gene5primejunction=112843681,
30+
gene3prime="MERTK",
31+
gene3primejunction=112722768,
32+
db=db_human,
33+
pyensembl_data=data_human,
34+
protein_databases=['pfam', 'tmhmm'],
35+
noncanonical=False
36+
)
37+
38+
fusion.save_transcript_cdna('TMEM87B-MERTK-case0')
39+
fusion.save_transcript_cds('TMEM87B-MERTK-case0')
40+
fusion.save_proteins('TMEM87B-MERTK-case0')
41+
#fusion.save_images('DLG1-BRAF_mouse')
42+
43+
test_cds = open('./data/test-human-case-0.txt','r').read()
44+
test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
45+
test_protein = test_protein[0:test_protein.find('*')]
46+
47+
trans=fusion.transcripts['ENST00000283206-ENST00000295408']
48+
49+
assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 0)"
50+
assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 0)"
51+
52+
def test_2(self):
53+
"""
54+
"""
55+
56+
#test the dna and protein coding sequences are correct by comparing
57+
#with manually generally sequences
58+
59+
fusion = agfusion.Fusion(
60+
gene5prime="TMEM87B",
61+
gene5primejunction=112843681,
62+
gene3prime="MERTK",
63+
gene3primejunction=112722769,
64+
db=db_human,
65+
pyensembl_data=data_human,
66+
protein_databases=['pfam', 'tmhmm'],
67+
noncanonical=False
68+
)
69+
70+
fusion.save_transcript_cdna('TMEM87B-MERTK-case2')
71+
fusion.save_transcript_cds('TMEM87B-MERTK-case2')
72+
fusion.save_proteins('TMEM87B-MERTK-case2')
73+
#fusion.save_images('DLG1-BRAF_mouse')
74+
75+
test_cds = open('./data/test-human-case-2.txt','r').read()
76+
test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
77+
test_protein = test_protein[0:test_protein.find('*')]
78+
79+
trans=fusion.transcripts['ENST00000283206-ENST00000295408']
80+
81+
assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 2)"
82+
assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 2)"
83+
84+
def test_3(self):
85+
"""
86+
"""
87+
88+
#test the dna and protein coding sequences are correct by comparing
89+
#with manually generally sequences
90+
91+
fusion = agfusion.Fusion(
92+
gene5prime="TMEM87B",
93+
gene5primejunction=112843681,
94+
gene3prime="MERTK",
95+
gene3primejunction=112722771,
96+
db=db_human,
97+
pyensembl_data=data_human,
98+
protein_databases=['pfam', 'tmhmm'],
99+
noncanonical=False
100+
)
101+
102+
fusion.save_transcript_cdna('TMEM87B-MERTK-case3')
103+
fusion.save_transcript_cds('TMEM87B-MERTK-case3')
104+
fusion.save_proteins('TMEM87B-MERTK-case3')
105+
#fusion.save_images('DLG1-BRAF_mouse')
106+
107+
test_cds = open('./data/test-human-case-3.txt','r').read()
108+
test_protein = Seq.Seq(test_cds,alphabet=Alphabet.generic_dna).translate()
109+
test_protein = test_protein[0:test_protein.find('*')]
110+
111+
trans=fusion.transcripts['ENST00000283206-ENST00000295408']
112+
113+
assert test_cds==trans.cds.seq, "cds is wrongly predicted for human fusion (case 3)"
114+
assert test_protein==trans.protein.seq, "protein is wrongly predicted for human fusion (case 3)"
115+
12116
class TestSequencePrediction(unittest.TestCase):
13117
def test_1(self):
14118
"""
@@ -273,13 +377,17 @@ def test_1(self):
273377

274378
class TestFusionCatcher(unittest.TestCase):
275379
def test_1(self):
380+
381+
agfusion_db = agfusion.AGFusionDB("agfusion.homo_sapiens.84.db", debug=False)
382+
383+
276384
all_fusions = ['Adamts9-Ano2','Trp53-Sat2','1700112E06Rik-Runx1','Runx1-1700112E06Rik','Rell1-Lhfpl3','Phc1-Smarca2','Lrrc8d-Gbp11','C920009B18Rik-H60b']
277-
for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt'):
385+
for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt',db.logger):
278386
fusion = agfusion.Fusion(
279-
gene5prime=fusion['ensembl_5prime'],
280-
gene5primejunction=fusion['junction_5prime'],
281-
gene3prime=fusion['ensembl_3prime'],
282-
gene3primejunction=fusion['junction_3prime'],
387+
gene5prime=fusion['gene5prime'],
388+
gene5primejunction=fusion['gene5prime_junction'],
389+
gene3prime=fusion['gene3prime'],
390+
gene3primejunction=fusion['gene3prime_junction'],
283391
db=db,
284392
pyensembl_data=data,
285393
protein_databases=['pfam'],
@@ -289,13 +397,14 @@ def test_1(self):
289397

290398
class TestSTARFusion(unittest.TestCase):
291399
def test_1(self):
400+
292401
all_fusions = ['Adamts9-Ano2','Trp53-Sat2','1700112E06Rik-Runx1','Runx1-1700112E06Rik','Rell1-Lhfpl3','Phc1-Smarca2','Lrrc8d-Gbp11','C920009B18Rik-H60b']
293-
for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt'):
402+
for fusion in agfusion.parsers['fusioncatcher']('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt',db.logger):
294403
fusion = agfusion.Fusion(
295-
gene5prime=fusion['ensembl_5prime'],
296-
gene5primejunction=fusion['junction_5prime'],
297-
gene3prime=fusion['ensembl_3prime'],
298-
gene3primejunction=fusion['junction_3prime'],
404+
gene5prime=fusion['gene5prime'],
405+
gene5primejunction=fusion['gene5prime_junction'],
406+
gene3prime=fusion['gene3prime'],
407+
gene3primejunction=fusion['gene3prime_junction'],
299408
db=db,
300409
pyensembl_data=data,
301410
protein_databases=['pfam'],

0 commit comments

Comments
 (0)