Skip to content

Commit 49414ff

Browse files
authored
Add aa_properties_matrix (#37)
This is useful for covariance analyses.
1 parent 60394e9 commit 49414ff

File tree

5 files changed

+54
-5
lines changed

5 files changed

+54
-5
lines changed

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
name = "GPCRAnalysis"
22
uuid = "c1d73f9e-d42a-418a-8d5b-c7b00ec0358f"
3-
authors = ["Tim Holy <[email protected]> and contributors"]
43
version = "0.5.1"
4+
authors = ["Tim Holy <[email protected]> and contributors"]
55

66
[deps]
77
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
88
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
99
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
1010
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
11+
FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
1112
FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
1213
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
1314
GaussianMixtureAlignment = "f2431ed1-b9c2-4fdb-af1b-a74d6c93b3b3"
@@ -38,6 +39,7 @@ BioStructures = "4.2"
3839
ColorTypes = "0.11, 0.12"
3940
Distances = "0.10"
4041
Downloads = "1"
42+
FASTX = "2"
4143
FixedPointNumbers = "0.8"
4244
GZip = "0.6"
4345
GaussianMixtureAlignment = "0.2.2, 0.3"

src/GPCRAnalysis.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ using Statistics
55
using LinearAlgebra
66

77
using BioStructures
8+
using FASTX
89

910
using MIToS: MIToS, Pfam, MSA
1011
using MIToS.MSA: AbstractMultipleSequenceAlignment, AnnotatedAlignedSequence, AnnotatedMultipleSequenceAlignment,
@@ -51,7 +52,7 @@ export project_sequences, columnwise_entropy, align, residue_centroid, residue_c
5152
alphacarbon_coordinates_matrix, chargelocations, positive_locations, negative_locations
5253
export StructAlign, residueindex, ismapped, skipnothing
5354
export BWScheme, lookupbw
54-
export aa_properties, aa_properties_zscored
55+
export aa_properties, aa_properties_zscored, aa_properties_matrix
5556
export sidechaincentroid, scvector, inward_tm_residues, inward_ecl_residues
5657
export features_from_structure
5758
export forcecomponents, optimize_weights, forcedict

src/properties.jl

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
1+
# Biophysical properties of amino acids
2+
13
struct AAProperties <: AbstractVector{Float64}
24
charge::Float64
35
hydropathy::Float64 # from https://www.sciencedirect.com/science/article/pii/0022283682905150?via%3Dihub & https://en.wikipedia.org/wiki/Amino_acid
46
volume::Float64 # from http://proteinsandproteomics.org/content/free/tables_1/table08.pdf (van der Waals volume in ų)
57
end
6-
Base.size(v::AAProperties) = (3,)
8+
Base.size(::AAProperties) = (3,)
79
Base.getindex(v::AAProperties, i::Int) = i == 1 ? v.charge :
810
i == 2 ? v.hydropathy :
911
i == 3 ? v.volume : Base.throw_boundserror(v, i)
12+
13+
StaticArrays.SVector(v::AAProperties) = SVector{3}(v.charge, v.hydropathy, v.volume)
14+
15+
Base.:(+)(v::AAProperties, w::AAProperties) = AAProperties(v.charge + w.charge, v.hydropathy + w.hydropathy, v.volume + w.volume)
16+
Base.:(-)(v::AAProperties, w::AAProperties) = AAProperties(v.charge - w.charge, v.hydropathy - w.hydropathy, v.volume - w.volume)
17+
Base.:(/)(v::AAProperties, r::Real) = AAProperties(v.charge / r, v.hydropathy / r, v.volume / r)
18+
Base.:(*)(u::AAProperties, v::Adjoint{Float64, GPCRAnalysis.AAProperties}) = SVector(u) * SVector(v')'
19+
Base.:(\)(L::LowerTriangular{T, StaticArraysCore.SMatrix{3, 3, T, 9}}, v::AAProperties) where T<:Real = L \ SVector(v)
20+
1021
const aa_properties = Dict(
1122
MSA.Residue('A') => AAProperties(0, 1.8, 67),
1223
MSA.Residue('R') => AAProperties(1, -4.5, 148),
@@ -35,4 +46,18 @@ const featμ = sum(p for (r, p) in aa_properties)/length(aa_properties)
3546
const featC = sum((Δp = p - featμ; Δp*Δp') for (r, p) in aa_properties)/(length(aa_properties)-1)
3647
const featChol = cholesky(featC)
3748

38-
const aa_properties_zscored = Dict(r => SVector{3}(featChol.L \ (p - featμ)) for (r, p) in aa_properties)
49+
const aa_properties_zscored = Dict(r => (featChol.L \ (p - featμ)) for (r, p) in aa_properties)
50+
51+
function aa_properties_matrix(msa::AbstractMultipleSequenceAlignment)
52+
props = copy(aa_properties_zscored)
53+
props[GAP] = zero(valtype(props))
54+
props[MSA.Residue('X')] = zero(valtype(props))
55+
return [props[residue] for residue in permutedims(msa)]
56+
end
57+
58+
function aa_properties_matrix(seqs::AbstractVector{FASTX.FASTA.Record})
59+
props = Dict(Char(r) => v for (r, v) in aa_properties_zscored)
60+
props['-'] = zero(valtype(props))
61+
props['X'] = zero(valtype(props))
62+
return reduce(hcat, [[props[r] for r in sequence(rec)] for rec in seqs])
63+
end

test/runtests.jl

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
using GPCRAnalysis
22
using MIToS: MSA, Pfam
33
using MIToS.MSA: @res_str, three2residue, coverage, GappedAlphabet, filtersequences!, percentsimilarity,
4-
nsequences, sequencenames, getsequencemapping
4+
nsequences, sequencenames, getsequencemapping, getcolumnmapping
55
using BioStructures
6+
using FASTX
67
using GaussianMixtureAlignment
78
using InvertedIndices
89
using Statistics
@@ -85,6 +86,22 @@ using Test
8586
@test MSACode(msa, AccessionCode("Q3V4T1")) == MSACode("Y070_ATV/2-70")
8687
@test msa[MSACode("Y070_ATV/2-70")][8] == msa[AccessionCode("Q3V4T1")][8] == MSA.Residue('V')
8788
end
89+
@testset "Properties" begin
90+
pf09645_sto = "PF09645_full.stockholm"
91+
msa = MSA.read_file(pf09645_sto, Pfam.Stockholm)
92+
X = aa_properties_matrix(msa)
93+
ΔX = X .- mean(X, dims=2)
94+
i = findfirst(==(14), getcolumnmapping(msa))
95+
@test all(iszero, ΔX[i, :])
96+
@test !all(iszero, ΔX[i-1, :])
97+
seqs = FASTAReader(open("test.fasta")) do io
98+
collect(io)
99+
end
100+
X = aa_properties_matrix(seqs)
101+
ΔX = X .- mean(X, dims=2)
102+
@test all(iszero, ΔX[1, :])
103+
@test !all(iszero, ΔX[3, :])
104+
end
88105
@testset "ChimeraX" begin
89106
tmpfile = tempname() * ".cxc"
90107
chimerax_script(tmpfile, ["ABCD.pdb", "EFGH.pdb"], [[5, 10, 15], [7, 11]])

test/test.fasta

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
> Seq:1
2+
ACA
3+
> Seq:2
4+
ACC

0 commit comments

Comments
 (0)