11using GPCRAnalysis
22using GPCRAnalysis: three2char
33# MSA interface functions
4- using GPCRAnalysis: sequenceindexes, isgap, isunknown, sequencekeys, msasequence, residuematrix,
4+ using GPCRAnalysis: sequenceindexes, columnindexes, isgap, isunknown, sequencekeys, msasequence, residuematrix,
55 subseqs, subseqs!, percent_similarity
66using MIToS: MSA, Pfam
77using MIToS. MSA: coverage, GappedAlphabet, nsequences
8- nsequences, sequencenames, getsequencemapping, getcolumnmapping
8+ using BioStockholm : BioStockholm
99using BioStructures
1010using FASTX
1111using GaussianMixtureAlignment
@@ -16,8 +16,6 @@ using JuMP, HiGHS
1616using ColorTypes
1717using Test
1818
19- columnindexes (msa:: MSA.AbstractMultipleSequenceAlignment ) = MSA. getcolumnmapping (msa)
20-
2119# skip the network-hitting components by setting `skip_download = true` in the global namespace
2220
2321@testset " GPCRAnalysis.jl" begin
@@ -66,6 +64,7 @@ columnindexes(msa::MSA.AbstractMultipleSequenceAlignment) = MSA.getcolumnmapping
6664 @testset " MSA" begin
6765 # The test file is copied from MIToS/test/data, with gratitude
6866 pf09645_sto = " PF09645_full.stockholm"
67+ # # First in MIToS format
6968 msa = MSA. read_file (pf09645_sto, Pfam. Stockholm)
7069 @test MSA. nsequences (filter_species! (deepcopy (msa), " ATV" )) == 1
7170 @test MSA. nsequences (filter_long! (deepcopy (msa), 70 )) == 3
@@ -91,15 +90,43 @@ columnindexes(msa::MSA.AbstractMultipleSequenceAlignment) = MSA.getcolumnmapping
9190 @test AccessionCode (msa, MSACode (" Y070_ATV/2-70" )) == AccessionCode (" Q3V4T1" )
9291 @test MSACode (msa, AccessionCode (" Q3V4T1" )) == MSACode (" Y070_ATV/2-70" )
9392 @test msa[MSACode (" Y070_ATV/2-70" )][8 ] == msa[AccessionCode (" Q3V4T1" )][8 ] == MSA. Residue (' V' )
93+
94+ # # Now in BioStockholm format
95+ msa = read (pf09645_sto, BioStockholm. MSA)
96+ @test length (sequencekeys (filter_species! (deepcopy (msa), " ATV" ))) == 1
97+ @test length (sequencekeys (filter_long! (deepcopy (msa), 70 ))) == 3
98+
99+ idx = SequenceMapping ([0 , 4 , 5 , 0 ])
100+ seqvals = fill (NaN , 9 )
101+ seqvals[idx] = [0.1 , 0.2 , 0.3 , 0.4 ]
102+ @test seqvals[4 ] == 0.2
103+ @test seqvals[5 ] == 0.3
104+ @test all (isnan, seqvals[1 : 3 ])
105+ @test all (isnan, seqvals[6 : end ])
106+
107+ # analyze
108+ e = columnwise_entropy (msa)
109+ @test length (e) == size (residuematrix (msa), 2 ) && e[9 ] == 0
110+ e2 = columnwise_entropy (identity, msa)
111+ @test all (e2 .>= e)
112+ @test ! all (e2 .== e)
113+
114+ @test size (project_sequences (msa)) == (3 , 4 )
115+ @test size (project_sequences (msa; fracvar= 0.5 )) == (1 , 4 )
116+
117+ @test AccessionCode (msa, MSACode (" Y070_ATV/2-70" )) == AccessionCode (" Q3V4T1" )
118+ @test MSACode (msa, AccessionCode (" Q3V4T1" )) == MSACode (" Y070_ATV/2-70" )
119+ @test msa[MSACode (" Y070_ATV/2-70" )][8 ] == msa[AccessionCode (" Q3V4T1" )][8 ] == ' V'
94120 end
95121 @testset " Properties" begin
96122 pf09645_sto = " PF09645_full.stockholm"
97- msa = MSA. read_file (pf09645_sto, Pfam. Stockholm)
98- X = aa_properties_matrix (msa)
99- ΔX = X .- mean (X, dims= 2 )
100- i = findfirst (== (14 ), columnindexes (msa))
101- @test all (iszero, ΔX[i, :])
102- @test ! all (iszero, ΔX[i- 1 , :])
123+ for msa in (MSA. read_file (pf09645_sto, Pfam. Stockholm), read (pf09645_sto, BioStockholm. MSA))
124+ X = aa_properties_matrix (msa)
125+ ΔX = X .- mean (X, dims= 2 )
126+ i = findfirst (== (14 ), columnindexes (msa))
127+ @test all (iszero, ΔX[i, :])
128+ @test ! all (iszero, ΔX[i- 1 , :])
129+ end
103130 seqs = FASTAReader (open (" test.fasta" )) do io
104131 collect (io)
105132 end
0 commit comments