1
+ from pymol import cmd
2
+ from io import StringIO
3
+
4
+ try :
5
+ from Bio .SeqUtils .ProtParam import ProteinAnalysis
6
+ from Bio import SeqIO
7
+ from Bio .Seq import Seq
8
+ except ModuleNotFoundError :
9
+ # Note that Bio package might be missing from Pymol 2 installation!
10
+ # run "pip install biopython"
11
+ import subprocess , sys
12
+ print ("Biopython is missing! Installing..." )
13
+ try :
14
+ subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , 'biopython' ])
15
+ print (f"Successfully installed biopython" )
16
+ # need to import libs after first installation, otherwise protparam fails to run
17
+ from Bio .SeqUtils .ProtParam import ProteinAnalysis
18
+ from Bio import SeqIO
19
+ from Bio .Seq import Seq
20
+ except subprocess .CalledProcessError as e :
21
+ print (f"Failed to install biopython: { e } " )
22
+
23
+ @cmd .extend
24
+ def protparam (selection = 'enabled' , bychain = 0 ):
25
+ '''
26
+ DESCRIPTION:
27
+ Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content.
28
+ By default, combines all chains of each object into the single sequence.
29
+
30
+ USAGE:
31
+ protparam selection, [bychain]
32
+
33
+ DEPENDENCIES:
34
+ biopython
35
+ '''
36
+ #TODO: add pretty output suitable for copy-pasting
37
+ for entry in cmd .get_object_list (selection ):
38
+ sequence_obj = cmd .get_fastastr (f"{ selection } and { entry } " )
39
+ fasta_io = StringIO (sequence_obj )
40
+ sequences = list (SeqIO .parse (fasta_io , "fasta" ))
41
+ if not bychain :
42
+ #combine all chains into the single one
43
+ sequences = [Seq ('' ).join ([s .seq for s in sequences ])]
44
+ else :
45
+ sequences = [s .seq for s in sequences ]
46
+ for sequence in sequences :
47
+ sequence = str (sequence ).replace ('?' ,'' ).strip ()
48
+ analysis = ProteinAnalysis (sequence )
49
+ counts_aa = analysis .count_amino_acids ()
50
+ print (f"Protein name: { entry } " )
51
+ print (f"Sequence: { sequence } " )
52
+ print (f"\n Protein length: { analysis .length } aa" )
53
+ print (f"Molecular Weight: { analysis .molecular_weight ():.1f} Da" )
54
+ print (f"Isoelectric point: { analysis .isoelectric_point ():.2f} " )
55
+ print (f"Count of aminoacids: { counts_aa } \n \n " )
56
+
57
+
58
+ def test_protparam ():
59
+ cmd .reinitialize ()
60
+ cmd .fab ('ACDEFG?HIKLMN' , 'm1' )
61
+ cmd .do ('protparam m1' )
0 commit comments