@@ -5,7 +5,7 @@ using GPCRAnalysis: sequenceindexes, isgap, isunknown, sequencekeys, msasequence
55 subseqs, subseqs!, percent_similarity
66using MIToS: MSA, Pfam
77using MIToS. MSA: coverage, GappedAlphabet, nsequences
8- nsequences, sequencenames, getsequencemapping, getcolumnmapping
8+ using BioStockholm : BioStockholm
99using BioStructures
1010using FASTX
1111using GaussianMixtureAlignment
@@ -17,6 +17,7 @@ using ColorTypes
1717using Test
1818
1919columnindexes (msa:: MSA.AbstractMultipleSequenceAlignment ) = MSA. getcolumnmapping (msa)
20+ columnindexes (msa:: BioStockholm.MSA ) = eachindex (first (msa. seq). second)
2021
2122# skip the network-hitting components by setting `skip_download = true` in the global namespace
2223
@@ -66,6 +67,7 @@ columnindexes(msa::MSA.AbstractMultipleSequenceAlignment) = MSA.getcolumnmapping
6667 @testset " MSA" begin
6768 # The test file is copied from MIToS/test/data, with gratitude
6869 pf09645_sto = " PF09645_full.stockholm"
70+ # # First in MIToS format
6971 msa = MSA. read_file (pf09645_sto, Pfam. Stockholm)
7072 @test MSA. nsequences (filter_species! (deepcopy (msa), " ATV" )) == 1
7173 @test MSA. nsequences (filter_long! (deepcopy (msa), 70 )) == 3
@@ -91,15 +93,43 @@ columnindexes(msa::MSA.AbstractMultipleSequenceAlignment) = MSA.getcolumnmapping
9193 @test AccessionCode (msa, MSACode (" Y070_ATV/2-70" )) == AccessionCode (" Q3V4T1" )
9294 @test MSACode (msa, AccessionCode (" Q3V4T1" )) == MSACode (" Y070_ATV/2-70" )
9395 @test msa[MSACode (" Y070_ATV/2-70" )][8 ] == msa[AccessionCode (" Q3V4T1" )][8 ] == MSA. Residue (' V' )
96+
97+ # # Now in BioStockholm format
98+ msa = read (pf09645_sto, BioStockholm. MSA)
99+ @test length (sequencekeys (filter_species! (deepcopy (msa), " ATV" ))) == 1
100+ @test length (sequencekeys (filter_long! (deepcopy (msa), 70 ))) == 3
101+
102+ idx = SequenceMapping ([0 , 4 , 5 , 0 ])
103+ seqvals = fill (NaN , 9 )
104+ seqvals[idx] = [0.1 , 0.2 , 0.3 , 0.4 ]
105+ @test seqvals[4 ] == 0.2
106+ @test seqvals[5 ] == 0.3
107+ @test all (isnan, seqvals[1 : 3 ])
108+ @test all (isnan, seqvals[6 : end ])
109+
110+ # analyze
111+ e = columnwise_entropy (msa)
112+ @test length (e) == size (msa, 2 ) && e[9 ] == 0
113+ e2 = columnwise_entropy (identity, msa)
114+ @test all (e2 .>= e)
115+ @test ! all (e2 .== e)
116+
117+ @test size (project_sequences (msa)) == (3 , 4 )
118+ @test size (project_sequences (msa; fracvar= 0.5 )) == (1 , 4 )
119+
120+ @test AccessionCode (msa, MSACode (" Y070_ATV/2-70" )) == AccessionCode (" Q3V4T1" )
121+ @test MSACode (msa, AccessionCode (" Q3V4T1" )) == MSACode (" Y070_ATV/2-70" )
122+ @test msa[MSACode (" Y070_ATV/2-70" )][8 ] == msa[AccessionCode (" Q3V4T1" )][8 ] == MSA. Residue (' V' )
94123 end
95124 @testset " Properties" begin
96125 pf09645_sto = " PF09645_full.stockholm"
97- msa = MSA. read_file (pf09645_sto, Pfam. Stockholm)
98- X = aa_properties_matrix (msa)
99- ΔX = X .- mean (X, dims= 2 )
100- i = findfirst (== (14 ), columnindexes (msa))
101- @test all (iszero, ΔX[i, :])
102- @test ! all (iszero, ΔX[i- 1 , :])
126+ for msa in (MSA. read_file (pf09645_sto, Pfam. Stockholm), read (pf09645_sto, BioStockholm. MSA))
127+ X = aa_properties_matrix (msa)
128+ ΔX = X .- mean (X, dims= 2 )
129+ i = findfirst (== (14 ), columnindexes (msa))
130+ @test all (iszero, ΔX[i, :])
131+ @test ! all (iszero, ΔX[i- 1 , :])
132+ end
103133 seqs = FASTAReader (open (" test.fasta" )) do io
104134 collect (io)
105135 end
0 commit comments