|
| 1 | +# Biophysical properties of amino acids |
| 2 | + |
1 | 3 | struct AAProperties <: AbstractVector{Float64} |
2 | 4 | charge::Float64 |
3 | 5 | hydropathy::Float64 # from https://www.sciencedirect.com/science/article/pii/0022283682905150?via%3Dihub & https://en.wikipedia.org/wiki/Amino_acid |
4 | 6 | volume::Float64 # from http://proteinsandproteomics.org/content/free/tables_1/table08.pdf (van der Waals volume in ų) |
5 | 7 | end |
6 | | -Base.size(v::AAProperties) = (3,) |
| 8 | +Base.size(::AAProperties) = (3,) |
7 | 9 | Base.getindex(v::AAProperties, i::Int) = i == 1 ? v.charge : |
8 | 10 | i == 2 ? v.hydropathy : |
9 | 11 | i == 3 ? v.volume : Base.throw_boundserror(v, i) |
| 12 | + |
| 13 | +StaticArrays.SVector(v::AAProperties) = SVector{3}(v.charge, v.hydropathy, v.volume) |
| 14 | + |
| 15 | +Base.:(+)(v::AAProperties, w::AAProperties) = AAProperties(v.charge + w.charge, v.hydropathy + w.hydropathy, v.volume + w.volume) |
| 16 | +Base.:(-)(v::AAProperties, w::AAProperties) = AAProperties(v.charge - w.charge, v.hydropathy - w.hydropathy, v.volume - w.volume) |
| 17 | +Base.:(/)(v::AAProperties, r::Real) = AAProperties(v.charge / r, v.hydropathy / r, v.volume / r) |
| 18 | +Base.:(*)(u::AAProperties, v::Adjoint{Float64, GPCRAnalysis.AAProperties}) = SVector(u) * SVector(v')' |
| 19 | +Base.:(\)(L::LowerTriangular{T, StaticArraysCore.SMatrix{3, 3, T, 9}}, v::AAProperties) where T<:Real = L \ SVector(v) |
| 20 | + |
10 | 21 | const aa_properties = Dict( |
11 | 22 | MSA.Residue('A') => AAProperties(0, 1.8, 67), |
12 | 23 | MSA.Residue('R') => AAProperties(1, -4.5, 148), |
@@ -35,4 +46,18 @@ const featμ = sum(p for (r, p) in aa_properties)/length(aa_properties) |
35 | 46 | const featC = sum((Δp = p - featμ; Δp*Δp') for (r, p) in aa_properties)/(length(aa_properties)-1) |
36 | 47 | const featChol = cholesky(featC) |
37 | 48 |
|
38 | | -const aa_properties_zscored = Dict(r => SVector{3}(featChol.L \ (p - featμ)) for (r, p) in aa_properties) |
| 49 | +const aa_properties_zscored = Dict(r => (featChol.L \ (p - featμ)) for (r, p) in aa_properties) |
| 50 | + |
| 51 | +function aa_properties_matrix(msa::AbstractMultipleSequenceAlignment) |
| 52 | + props = copy(aa_properties_zscored) |
| 53 | + props[GAP] = zero(valtype(props)) |
| 54 | + props[MSA.Residue('X')] = zero(valtype(props)) |
| 55 | + return [props[residue] for residue in permutedims(msa)] |
| 56 | +end |
| 57 | + |
| 58 | +function aa_properties_matrix(seqs::AbstractVector{FASTX.FASTA.Record}) |
| 59 | + props = Dict(Char(r) => v for (r, v) in aa_properties_zscored) |
| 60 | + props['-'] = zero(valtype(props)) |
| 61 | + props['X'] = zero(valtype(props)) |
| 62 | + return reduce(hcat, [[props[r] for r in sequence(rec)] for rec in seqs]) |
| 63 | +end |
0 commit comments