|
| 1 | +module GPCRAnalysisMIToSExt |
| 2 | + |
| 3 | +using GPCRAnalysis |
| 4 | +using Downloads |
| 5 | +using BioStructures |
| 6 | +using ProgressMeter |
| 7 | + |
| 8 | +using GPCRAnalysis: ChainLike, ResidueLike, StructureLike, _entropy, validate_seq_residues, rex_alphafold_pdbs |
| 9 | + |
| 10 | +using MIToS: MIToS, Pfam, MSA |
| 11 | +using MIToS.MSA: AbstractMultipleSequenceAlignment, AnnotatedAlignedSequence, AnnotatedMultipleSequenceAlignment, |
| 12 | + ReducedAlphabet, ResidueAlphabet, GAP, XAA |
| 13 | +using MIToS.MSA: getsequence, getannotsequence, getsequencemapping, getresidues, three2residue, sequencenames, |
| 14 | + filtersequences, filtersequences!, percentsimilarity, getcolumnmapping |
| 15 | + |
| 16 | + |
| 17 | +# Low-level API implementation |
| 18 | +GPCRAnalysis.sequenceindexes(msaseq::AnnotatedAlignedSequence) = getsequencemapping(msaseq) |
| 19 | +GPCRAnalysis.sequenceindexes(msaseq::AbstractMultipleSequenceAlignment, i::Int) = getsequencemapping(msaseq, i) |
| 20 | +GPCRAnalysis.isgap(res::MSA.Residue) = res == GAP |
| 21 | +GPCRAnalysis.isunknown(res::MSA.Residue) = res == XAA |
| 22 | +GPCRAnalysis.sequencekeys(msa::AbstractMultipleSequenceAlignment) = sequencenames(msa) |
| 23 | +GPCRAnalysis.msasequence(msa::AbstractMultipleSequenceAlignment, key) = getsequence(msa, key) |
| 24 | +GPCRAnalysis.residuematrix(msa::AbstractMultipleSequenceAlignment) = getresidues(msa) |
| 25 | +GPCRAnalysis.subseqs(msa::AbstractMultipleSequenceAlignment, rowmask) = filtersequences(msa, rowmask) |
| 26 | +GPCRAnalysis.subseqs!(msa::AbstractMultipleSequenceAlignment, rowmask) = filtersequences!(msa, rowmask) |
| 27 | +GPCRAnalysis.percent_similarity(msa::AbstractMultipleSequenceAlignment) = percentsimilarity(msa) |
| 28 | +GPCRAnalysis.columnindexes(msa::MSA.AbstractMultipleSequenceAlignment) = getcolumnmapping(msa) |
| 29 | + |
| 30 | +Base.getindex(msa::AbstractMultipleSequenceAlignment, seqname::MSACode) = getsequence(msa, seqname.name) |
| 31 | +Base.getindex(msa::AbstractMultipleSequenceAlignment, seqname::AccessionCode) = getsequence(msa, MSACode(msa, seqname).name) |
| 32 | + |
| 33 | +function GPCRAnalysis.AccessionCode(msa::AnnotatedMultipleSequenceAlignment, seqname::AbstractString) |
| 34 | + AccessionCode(uniprotX(getannotsequence(msa, seqname, "AC", seqname))) |
| 35 | +end |
| 36 | +GPCRAnalysis.AccessionCode(msa::AnnotatedMultipleSequenceAlignment, seqname::MSACode) = AccessionCode(msa, seqname.name) |
| 37 | +GPCRAnalysis.AccessionCode(::AnnotatedMultipleSequenceAlignment, seqname::AccessionCode) = seqname |
| 38 | + |
| 39 | +function GPCRAnalysis.MSACode(msa::AnnotatedMultipleSequenceAlignment, accession::AbstractString) |
| 40 | + seqnames = sequencenames(msa) |
| 41 | + return MSACode(seqnames[findfirst(x -> AccessionCode(msa, x).name == accession, seqnames)]) |
| 42 | +end |
| 43 | +GPCRAnalysis.MSACode(msa::AnnotatedMultipleSequenceAlignment, accession::AccessionCode) = MSACode(msa, accession.name) |
| 44 | +GPCRAnalysis.MSACode(::AnnotatedMultipleSequenceAlignment, accession::MSACode) = accession |
| 45 | + |
| 46 | +GPCRAnalysis.SequenceMapping(seq::AnnotatedAlignedSequence) = SequenceMapping(getsequencemapping(seq)) |
| 47 | + |
| 48 | +# Move this to MIToS? |
| 49 | +if !hasmethod(getsequencemapping, Tuple{AnnotatedAlignedSequence}) |
| 50 | + function MIToS.MSA.getsequencemapping(seq::AnnotatedAlignedSequence) |
| 51 | + getsequencemapping(seq, sequencenames(seq)[1]) |
| 52 | + end |
| 53 | + function MIToS.MSA.getsequencemapping(msa::Union{AnnotatedAlignedSequence,AnnotatedMultipleSequenceAlignment}, seq_id::String) |
| 54 | + MIToS.MSA._str2int_mapping(getannotsequence(msa, seq_id, "SeqMap")) |
| 55 | + end |
| 56 | + function MIToS.MSA.getsequencemapping(msa::AnnotatedMultipleSequenceAlignment, seqid::Regex) |
| 57 | + id = findfirst(str -> occursin(seqid, str), sequencenames(msa)) |
| 58 | + getsequencemapping(msa, id) |
| 59 | + end |
| 60 | +end |
| 61 | + |
| 62 | +const reduced_code = ReducedAlphabet("(AILMV)(NQST)(RHK)(DE)(FWY)CGP") |
| 63 | + |
| 64 | +""" |
| 65 | + columnwise_entropy(msa, aacode = reduced_code) |
| 66 | +
|
| 67 | +Call `columnwise_entropy` after mapping each residue through `aacode`. |
| 68 | +
|
| 69 | +The default code is `ReducedAlphabet("(AILMV)(NQST)(RHK)(DE)(FWY)CGP")`, which |
| 70 | +groups residues into categories hydrophobic, polar, charged, aromatic, and |
| 71 | +"special." |
| 72 | +""" |
| 73 | +GPCRAnalysis.columnwise_entropy(msa::AbstractMultipleSequenceAlignment, aacode::ResidueAlphabet=reduced_code) = |
| 74 | + GPCRAnalysis.columnwise_entropy(r -> aacode[r], msa) |
| 75 | + |
| 76 | +end |
0 commit comments