2929Dong-Sheng Cao, Nan Xiao, Qing-Song Xu, and Alex F. Chen for ``Rcpi``.
3030""" .strip ()
3131
32+ # --- Constants --------------------------------------------------------------
33+
34+ # fmt: off
35+ _CODE1 = [
36+ "A" , "R" , "N" , "D" , "C" , "Q" , "E" , "G" , "H" , "I" ,
37+ "L" , "K" , "M" , "F" , "P" , "S" , "T" , "W" , "Y" , "V" ,
38+ "O" , "U" , "B" , "Z" , "J" , "X"
39+ ]
40+
41+ # fmt: off
42+ _CODE3 = [
43+ "Ala" , "Arg" , "Asn" , "Asp" , "Cys" , "Gln" , "Glu" , "Gly" , "His" , "Ile" ,
44+ "Leu" , "Lys" , "Met" , "Phe" , "Pro" , "Ser" , "Thr" , "Trp" , "Tyr" , "Val" ,
45+ "Pyl" , "Sec" , "Asx" , "Glx" , "Xle" , "Xaa"
46+ ]
47+
48+ _TRANS = bytes (
49+ _CODE1 .index (chr (x ).upper ()) if chr (x ).upper () in _CODE1 else _CODE1 .index ("X" )
50+ for x in range (256 )
51+ )
52+
53+
54+ # --- Helper classes ---------------------------------------------------------
3255
3356class BLOSUMIndices (typing .NamedTuple ):
3457 """The BLOSUM62-derived indices of a peptide.
@@ -543,6 +566,8 @@ class ZScales(typing.NamedTuple):
543566 z5 : float
544567
545568
569+ # --- Peptide class ----------------------------------------------------------
570+
546571class Peptide (typing .Sequence [str ]):
547572 """A sequence of amino acids.
548573
@@ -552,22 +577,6 @@ class Peptide(typing.Sequence[str]):
552577
553578 """
554579
555- # --- Class constants ----------------------------------------------------
556-
557- # fmt: off
558- _CODE1 = [
559- "A" , "R" , "N" , "D" , "C" , "Q" , "E" , "G" , "H" , "I" ,
560- "L" , "K" , "M" , "F" , "P" , "S" , "T" , "W" , "Y" , "V" ,
561- "O" , "U" , "B" , "Z" , "J" , "X"
562- ]
563-
564- # fmt: off
565- _CODE3 = [
566- "Ala" , "Arg" , "Asn" , "Asp" , "Cys" , "Gln" , "Glu" , "Gly" , "His" , "Ile" ,
567- "Leu" , "Lys" , "Met" , "Phe" , "Pro" , "Ser" , "Thr" , "Trp" , "Tyr" , "Val" ,
568- "Pyl" , "Sec" , "Asx" , "Glx" , "Xle" , "Xaa"
569- ]
570-
571580 # --- Class methods ------------------------------------------------------
572581
573582 @classmethod
@@ -641,11 +650,10 @@ def __init__(self, sequence: str) -> None:
641650 """
642651 # store the sequence in text format
643652 self .sequence : str = sequence
644- # store an encoded version of the sequence as an array of indices
645- encoder = {aa :i for i ,aa in enumerate (self ._CODE1 )}
646- self .encoded = array .array ('B' )
647- for i , aa in enumerate (sequence ):
648- self .encoded .append (encoder .get (aa , encoder ["X" ]))
653+ # encode the sequence as ASCII bytes
654+ binary = sequence .encode ('ascii' )
655+ # encode the peptide with the given encoding
656+ self .encoded = array .array ('B' , binary .translate (_TRANS ))
649657
650658 def __len__ (self ) -> int :
651659 return len (self .sequence )
@@ -709,7 +717,7 @@ def auto_correlation(
709717 sigma = statistics .stdev (table .values ())
710718 table = {k : (v - mu ) / sigma for k , v in table .items ()}
711719 # build look up table
712- lut = [table .get (aa , 0.0 ) for aa in self . _CODE1 ]
720+ lut = [table .get (aa , 0.0 ) for aa in _CODE1 ]
713721 # compute using Cruciani formula
714722 if numpy is None :
715723 s1 = s2 = 0.0
@@ -743,7 +751,7 @@ def auto_covariance(
743751 sigma = statistics .stdev (table .values ())
744752 table = {k : (v - mu ) / sigma for k , v in table .items ()}
745753 # build the lookup table
746- lut = [table .get (aa , 0.0 ) for aa in self . _CODE1 ]
754+ lut = [table .get (aa , 0.0 ) for aa in _CODE1 ]
747755 # compute correlation using Cruciani formula
748756 if numpy is None :
749757 s = 0.0
@@ -784,8 +792,8 @@ def cross_covariance(
784792 table2 = {k : (v - mu2 ) / sigma2 for k , v in table2 .items ()}
785793
786794 # build the lookup table
787- lut1 = [table1 .get (aa , 0.0 ) for aa in self . _CODE1 ]
788- lut2 = [table2 .get (aa , 0.0 ) for aa in self . _CODE1 ]
795+ lut1 = [table1 .get (aa , 0.0 ) for aa in _CODE1 ]
796+ lut2 = [table2 .get (aa , 0.0 ) for aa in _CODE1 ]
789797
790798 # compute using Cruciani formula
791799 if numpy is None :
@@ -837,7 +845,7 @@ def profile(
837845 # number of residues in the peptide sequence
838846 if len (self ) >= window :
839847 # build a look-up table and index values
840- lut = [table .get (aa , default ) for aa in self . _CODE1 ]
848+ lut = [table .get (aa , default ) for aa in _CODE1 ]
841849 if numpy is None :
842850 values = [lut [i ] for i in self .encoded ]
843851 else :
@@ -882,7 +890,7 @@ def counts(self) -> typing.Dict[str, int]:
882890 # some overhead.
883891 return {
884892 aa : self .sequence .count (aa )
885- for aa in self . _CODE1
893+ for aa in _CODE1
886894 }
887895
888896 def frequencies (self ) -> typing .Dict [str , float ]:
@@ -1067,8 +1075,8 @@ def charge(self, pH: float = 7, pKscale: str = "Lehninger") -> float:
10671075 scale_sign = tables .CHARGE ["sign" ]
10681076
10691077 # build a look-up table for the pKa scale and the charge sign
1070- lut_pKa = [scale_pKa .get (aa , 0.0 ) for aa in self . _CODE1 ]
1071- lut_sign = [scale_sign .get (aa , 0.0 ) for aa in self . _CODE1 ]
1078+ lut_pKa = [scale_pKa .get (aa , 0.0 ) for aa in _CODE1 ]
1079+ lut_sign = [scale_sign .get (aa , 0.0 ) for aa in _CODE1 ]
10721080
10731081 # compute charge of each amino-acid, and sum
10741082 if numpy is None :
@@ -1130,7 +1138,7 @@ def hydrophobic_moment(self, window: int = 11, angle: int = 100) -> float:
11301138 """
11311139 window = min (window , len (self ))
11321140 scale = tables .HYDROPHOBICITY ["Eisenberg" ]
1133- lut = [scale .get (aa , 0.0 ) for aa in self . _CODE1 ]
1141+ lut = [scale .get (aa , 0.0 ) for aa in _CODE1 ]
11341142 angles = [(angle * i ) % 360 for i in range (window )]
11351143
11361144 if numpy is None :
@@ -1970,7 +1978,7 @@ def structural_class(
19701978 s = sum ((pep_frequencies [x ]- table [x ])** 2 for x in table )
19711979 distances [name ] = math .sqrt (s )
19721980 elif distance == "mahalanobis" or distance == "discriminant" :
1973- x = [pep_frequencies [aa ] for aa in sorted (self . _CODE1 [:20 ])]
1981+ x = [pep_frequencies [aa ] for aa in sorted (_CODE1 [:20 ])]
19741982 for name ,tables in dataset .items ():
19751983 if name == "all" :
19761984 continue
@@ -1982,7 +1990,7 @@ def structural_class(
19821990 continue
19831991 # compute Mahalanobis distance using the components of
19841992 # the eigendecomposition
1985- xm = [mean [aa ] for aa in sorted (self . _CODE1 [:20 ])]
1993+ xm = [mean [aa ] for aa in sorted (_CODE1 [:20 ])]
19861994 y = [
19871995 sum (eivecs [i ][j ] * (x [j ] - xm [j ]) for j in range (20 ))
19881996 for i in range (20 )
0 commit comments