Skip to content

Commit 12cd006

Browse files
author
Evgenii Osipov
committed
Added protparam script to calculate protein length, Mw, pI and aa content
1 parent ab965cc commit 12cd006

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

scripts/protparam.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from pymol import cmd
2+
from io import StringIO
3+
4+
try:
5+
from Bio.SeqUtils.ProtParam import ProteinAnalysis
6+
from Bio import SeqIO
7+
from Bio.Seq import Seq
8+
except ModuleNotFoundError:
9+
# Note that Bio package might be missing from Pymol 2 installation!
10+
# run "pip install biopython"
11+
import subprocess, sys
12+
print("Biopython is missing! Installing...")
13+
try:
14+
subprocess.check_call([sys.executable, "-m", "pip", "install", 'biopython'])
15+
print(f"Successfully installed biopython")
16+
# need to import libs after first installation, otherwise protparam fails to run
17+
from Bio.SeqUtils.ProtParam import ProteinAnalysis
18+
from Bio import SeqIO
19+
from Bio.Seq import Seq
20+
except subprocess.CalledProcessError as e:
21+
print(f"Failed to install biopython: {e}")
22+
23+
@cmd.extend
24+
def protparam(selection='enabled', bychain=0):
25+
'''
26+
DESCRIPTION:
27+
Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content.
28+
By default, combines all chains of each object into the single sequence.
29+
30+
USAGE:
31+
protparam selection, [bychain]
32+
33+
DEPENDENCIES:
34+
biopython
35+
'''
36+
#TODO: add pretty output suitable for copy-pasting
37+
for entry in cmd.get_object_list(selection):
38+
sequence_obj = cmd.get_fastastr(f"{selection} and {entry}")
39+
fasta_io = StringIO(sequence_obj)
40+
sequences = list(SeqIO.parse(fasta_io, "fasta"))
41+
if not bychain:
42+
#combine all chains into the single one
43+
sequences = [Seq('').join([s.seq for s in sequences])]
44+
else:
45+
sequences = [s.seq for s in sequences]
46+
for sequence in sequences:
47+
sequence = str(sequence).replace('?','').strip()
48+
analysis = ProteinAnalysis(sequence)
49+
counts_aa = analysis.count_amino_acids()
50+
print(f"Protein name: {entry}")
51+
print(f"Sequence: {sequence}")
52+
print(f"\nProtein length: {analysis.length} aa")
53+
print(f"Molecular Weight: {analysis.molecular_weight():.1f} Da")
54+
print(f"Isoelectric point: {analysis.isoelectric_point():.2f}")
55+
print(f"Count of aminoacids: {counts_aa}\n\n")
56+
57+
58+
def test_protparam():
59+
cmd.reinitialize()
60+
cmd.fab('ACDEFG?HIKLMN', 'm1')
61+
cmd.do('protparam m1')

0 commit comments

Comments
 (0)