33import agfusion
44from agfusion import utils
55import pyensembl
6- from Bio import SeqIO
6+ from Bio import SeqIO , Seq , Alphabet
77
88data = pyensembl .EnsemblRelease (84 ,'mouse' )
99db = agfusion .AGFusionDB (abspath (join (curdir ,'agfusion.mus_musculus.84.db' )))
1010db .build = 'mus_musculus_84'
1111
12+ data_human = pyensembl .EnsemblRelease (75 ,'human' )
13+ db_human = agfusion .AGFusionDB (abspath (join (curdir ,'agfusion.homo_sapiens.75.db' )))
14+ db_human .build = 'homo_sapiens_75'
15+
16+
17+ class TestSequencePrediction_human (unittest .TestCase ):
18+ def test_1 (self ):
19+ """
20+ test CDS and prortein correct for junction that is on exon boundaries and
21+ produces an out-of-frame protein.
22+ """
23+
24+ #test the dna and protein coding sequences are correct by comparing
25+ #with manually generally sequences
26+
27+ fusion = agfusion .Fusion (
28+ gene5prime = "TMEM87B" ,
29+ gene5primejunction = 112843681 ,
30+ gene3prime = "MERTK" ,
31+ gene3primejunction = 112722768 ,
32+ db = db_human ,
33+ pyensembl_data = data_human ,
34+ protein_databases = ['pfam' , 'tmhmm' ],
35+ noncanonical = False
36+ )
37+
38+ fusion .save_transcript_cdna ('TMEM87B-MERTK-case0' )
39+ fusion .save_transcript_cds ('TMEM87B-MERTK-case0' )
40+ fusion .save_proteins ('TMEM87B-MERTK-case0' )
41+ #fusion.save_images('DLG1-BRAF_mouse')
42+
43+ test_cds = open ('./data/test-human-case-0.txt' ,'r' ).read ()
44+ test_protein = Seq .Seq (test_cds ,alphabet = Alphabet .generic_dna ).translate ()
45+ test_protein = test_protein [0 :test_protein .find ('*' )]
46+
47+ trans = fusion .transcripts ['ENST00000283206-ENST00000295408' ]
48+
49+ assert test_cds == trans .cds .seq , "cds is wrongly predicted for human fusion (case 0)"
50+ assert test_protein == trans .protein .seq , "protein is wrongly predicted for human fusion (case 0)"
51+
52+ def test_2 (self ):
53+ """
54+ """
55+
56+ #test the dna and protein coding sequences are correct by comparing
57+ #with manually generally sequences
58+
59+ fusion = agfusion .Fusion (
60+ gene5prime = "TMEM87B" ,
61+ gene5primejunction = 112843681 ,
62+ gene3prime = "MERTK" ,
63+ gene3primejunction = 112722769 ,
64+ db = db_human ,
65+ pyensembl_data = data_human ,
66+ protein_databases = ['pfam' , 'tmhmm' ],
67+ noncanonical = False
68+ )
69+
70+ fusion .save_transcript_cdna ('TMEM87B-MERTK-case2' )
71+ fusion .save_transcript_cds ('TMEM87B-MERTK-case2' )
72+ fusion .save_proteins ('TMEM87B-MERTK-case2' )
73+ #fusion.save_images('DLG1-BRAF_mouse')
74+
75+ test_cds = open ('./data/test-human-case-2.txt' ,'r' ).read ()
76+ test_protein = Seq .Seq (test_cds ,alphabet = Alphabet .generic_dna ).translate ()
77+ test_protein = test_protein [0 :test_protein .find ('*' )]
78+
79+ trans = fusion .transcripts ['ENST00000283206-ENST00000295408' ]
80+
81+ assert test_cds == trans .cds .seq , "cds is wrongly predicted for human fusion (case 2)"
82+ assert test_protein == trans .protein .seq , "protein is wrongly predicted for human fusion (case 2)"
83+
84+ def test_3 (self ):
85+ """
86+ """
87+
88+ #test the dna and protein coding sequences are correct by comparing
89+ #with manually generally sequences
90+
91+ fusion = agfusion .Fusion (
92+ gene5prime = "TMEM87B" ,
93+ gene5primejunction = 112843681 ,
94+ gene3prime = "MERTK" ,
95+ gene3primejunction = 112722771 ,
96+ db = db_human ,
97+ pyensembl_data = data_human ,
98+ protein_databases = ['pfam' , 'tmhmm' ],
99+ noncanonical = False
100+ )
101+
102+ fusion .save_transcript_cdna ('TMEM87B-MERTK-case3' )
103+ fusion .save_transcript_cds ('TMEM87B-MERTK-case3' )
104+ fusion .save_proteins ('TMEM87B-MERTK-case3' )
105+ #fusion.save_images('DLG1-BRAF_mouse')
106+
107+ test_cds = open ('./data/test-human-case-3.txt' ,'r' ).read ()
108+ test_protein = Seq .Seq (test_cds ,alphabet = Alphabet .generic_dna ).translate ()
109+ test_protein = test_protein [0 :test_protein .find ('*' )]
110+
111+ trans = fusion .transcripts ['ENST00000283206-ENST00000295408' ]
112+
113+ assert test_cds == trans .cds .seq , "cds is wrongly predicted for human fusion (case 3)"
114+ assert test_protein == trans .protein .seq , "protein is wrongly predicted for human fusion (case 3)"
115+
12116class TestSequencePrediction (unittest .TestCase ):
13117 def test_1 (self ):
14118 """
@@ -273,13 +377,17 @@ def test_1(self):
273377
274378class TestFusionCatcher (unittest .TestCase ):
275379 def test_1 (self ):
380+
381+ agfusion_db = agfusion .AGFusionDB ("agfusion.homo_sapiens.84.db" , debug = False )
382+
383+
276384 all_fusions = ['Adamts9-Ano2' ,'Trp53-Sat2' ,'1700112E06Rik-Runx1' ,'Runx1-1700112E06Rik' ,'Rell1-Lhfpl3' ,'Phc1-Smarca2' ,'Lrrc8d-Gbp11' ,'C920009B18Rik-H60b' ]
277- for fusion in agfusion .parsers ['fusioncatcher' ]('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt' ):
385+ for fusion in agfusion .parsers ['fusioncatcher' ]('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt' , db . logger ):
278386 fusion = agfusion .Fusion (
279- gene5prime = fusion ['ensembl_5prime ' ],
280- gene5primejunction = fusion ['junction_5prime ' ],
281- gene3prime = fusion ['ensembl_3prime ' ],
282- gene3primejunction = fusion ['junction_3prime ' ],
387+ gene5prime = fusion ['gene5prime ' ],
388+ gene5primejunction = fusion ['gene5prime_junction ' ],
389+ gene3prime = fusion ['gene3prime ' ],
390+ gene3primejunction = fusion ['gene3prime_junction ' ],
283391 db = db ,
284392 pyensembl_data = data ,
285393 protein_databases = ['pfam' ],
@@ -289,13 +397,14 @@ def test_1(self):
289397
290398class TestSTARFusion (unittest .TestCase ):
291399 def test_1 (self ):
400+
292401 all_fusions = ['Adamts9-Ano2' ,'Trp53-Sat2' ,'1700112E06Rik-Runx1' ,'Runx1-1700112E06Rik' ,'Rell1-Lhfpl3' ,'Phc1-Smarca2' ,'Lrrc8d-Gbp11' ,'C920009B18Rik-H60b' ]
293- for fusion in agfusion .parsers ['fusioncatcher' ]('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt' ):
402+ for fusion in agfusion .parsers ['fusioncatcher' ]('./data/FusionsFindingAlgorithms/FusionCatcher/final-list_candidate-fusion-genes.txt' , db . logger ):
294403 fusion = agfusion .Fusion (
295- gene5prime = fusion ['ensembl_5prime ' ],
296- gene5primejunction = fusion ['junction_5prime ' ],
297- gene3prime = fusion ['ensembl_3prime ' ],
298- gene3primejunction = fusion ['junction_3prime ' ],
404+ gene5prime = fusion ['gene5prime ' ],
405+ gene5primejunction = fusion ['gene5prime_junction ' ],
406+ gene3prime = fusion ['gene3prime ' ],
407+ gene3primejunction = fusion ['gene3prime_junction ' ],
299408 db = db ,
300409 pyensembl_data = data ,
301410 protein_databases = ['pfam' ],
0 commit comments