Skip to content

Commit ea55193

Browse files
committed
use Model instead of Structure
1 parent 1c53958 commit ea55193

File tree

5 files changed

+175
-134
lines changed

5 files changed

+175
-134
lines changed

src/prodigy_prot/cli.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
from argparse import RawTextHelpFormatter
99
from pathlib import Path
1010

11-
from Bio.PDB.Structure import Structure
11+
from Bio.PDB.Model import Model
1212

13-
from prodigy_prot.modules.parsers import (get_parser, parse_structure,
14-
validate_structure)
13+
from prodigy_prot.modules.parsers import parse_structure
1514
from prodigy_prot.modules.prodigy import Prodigy
1615

1716
# setup logging
@@ -100,27 +99,28 @@ def main():
10099
sys.exit(1)
101100

102101
for input_f in input_list:
103-
structure, n_chains, n_res = parse_structure(str(input_f))
102+
models: list[Model]
103+
n_chains: int
104+
n_res: int
105+
models, n_chains, n_res = parse_structure(str(input_f))
106+
identifier = Path(input_f).stem
104107

105-
if len(input_list) > 1:
108+
for model in models:
106109
log.info("#" * 42)
107-
108-
log.info(
109-
"[+] Parsed structure file {0} ({1} chains, {2} residues)".format(
110-
structure.id, n_chains, n_res
110+
log.info(
111+
f"[+] Processing structure {identifier}_model{model.id} ({n_chains} chains, {n_res} residues)"
112+
)
113+
prodigy = Prodigy(model, args.selection, args.temperature)
114+
prodigy.predict(
115+
distance_cutoff=args.distance_cutoff, acc_threshold=args.acc_threshold
111116
)
112-
)
113-
prodigy = Prodigy(structure, args.selection, args.temperature)
114-
prodigy.predict(
115-
distance_cutoff=args.distance_cutoff, acc_threshold=args.acc_threshold
116-
)
117-
prodigy.print_prediction(quiet=args.quiet)
118-
119-
if args.contact_list:
120-
prodigy.print_contacts(outfile=str(struct_path.with_suffix(".ic")))
121-
122-
if args.pymol_selection:
123-
prodigy.print_pymol_script(outfile=str(struct_path.with_suffix(".pml")))
117+
prodigy.print_prediction(quiet=args.quiet)
118+
119+
if args.contact_list:
120+
prodigy.print_contacts(outfile=str(struct_path.with_suffix(".ic")))
121+
122+
if args.pymol_selection:
123+
prodigy.print_pymol_script(outfile=str(struct_path.with_suffix(".pml")))
124124

125125

126126
if __name__ == "__main__":

src/prodigy_prot/modules/freesasa_tools.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66

77
import freesasa
8+
from Bio.PDB.Model import Model
89
from Bio.PDB.Structure import Structure
910
from freesasa import Classifier, calc, structureFromBioPDB
1011

@@ -14,7 +15,7 @@
1415
freesasa.setVerbosity(freesasa.nowarnings)
1516

1617

17-
def execute_freesasa_api(structure: Structure) -> tuple[dict, dict]:
18+
def execute_freesasa_api(model: Model) -> tuple[dict, dict]:
1819
"""
1920
Calls freesasa using its Python API and returns
2021
per-residue accessibilities.
@@ -26,9 +27,14 @@ def execute_freesasa_api(structure: Structure) -> tuple[dict, dict]:
2627

2728
classifier = Classifier(str(NACCESS_CONFIG))
2829

30+
# NOTE: `structureFromBioPDB` requires a Structure object
31+
# so here build one from a model
32+
s = Structure(model.id)
33+
s.add(model)
34+
2935
try:
3036
struct = structureFromBioPDB(
31-
structure,
37+
s,
3238
classifier,
3339
)
3440
result = calc(struct)

src/prodigy_prot/modules/parsers.py

Lines changed: 123 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55
import logging
66
import sys
7+
import typing
78
import warnings
89
from pathlib import Path
910
from typing import Optional, Union
1011

12+
from Bio.PDB.Atom import DisorderedAtom
1113
from Bio.PDB.Chain import Chain
1214
from Bio.PDB.MMCIFParser import MMCIFParser
15+
from Bio.PDB.Model import Model
1316
from Bio.PDB.PDBExceptions import PDBConstructionWarning
1417
from Bio.PDB.PDBParser import PDBParser
1518
from Bio.PDB.Polypeptide import PPBuilder, is_aa
@@ -26,112 +29,110 @@ def get_parser(input_f: Path) -> Union[PDBParser, MMCIFParser]:
2629
return PDBParser()
2730

2831

32+
def ignore(r):
33+
return r.id[0][0] == "W" or r.id[0][0] == "H"
34+
35+
2936
def validate_structure(
30-
s: Structure, selection: Optional[list[str]] = None, clean: bool = True
31-
) -> Structure:
32-
33-
# setup logging
34-
logger = logging.getLogger("Prodigy")
35-
36-
# Keep first model only
37-
if len(s) > 1:
38-
logger.warning(
39-
(
40-
"[!] Structure contains more than one model."
41-
" Only the first one will be kept"
42-
)
43-
)
44-
model_one = s[0].id
45-
for m in s.child_list[:]:
46-
if m.id != model_one:
47-
s.detach_child(m.id)
48-
49-
# process selected chains
50-
chains: list[Chain] = list(s.get_chains())
51-
chain_ids = set([c.id for c in chains])
52-
53-
if selection:
54-
sel_chains = []
55-
# Match selected chain with structure
56-
for sel in selection:
57-
for c_str in sel.split(","):
58-
sel_chains.append(c_str)
59-
if c_str not in chain_ids:
37+
input_strcture_obj: Structure,
38+
selection: Optional[list[str]] = None,
39+
clean: bool = True,
40+
) -> list[Model]:
41+
42+
result: list[Model] = []
43+
for model in [m for m in input_strcture_obj.child_list]:
44+
45+
# process selected chains
46+
chains: list[Chain] = list(model.get_chains())
47+
chain_ids = set([c.id for c in chains])
48+
49+
if selection:
50+
sel_chains = []
51+
# Match selected chain with structure
52+
for sel in selection:
53+
for c_str in sel.split(","):
54+
sel_chains.append(c_str)
55+
if c_str not in chain_ids:
56+
raise ValueError(
57+
f"Selected chain not present in provided structure: {c_str}"
58+
)
59+
60+
# Remove unselected chains
61+
def _ignore_helper(x) -> bool:
62+
return x.id not in sel_chains
63+
64+
for c in chains:
65+
if _ignore_helper(c):
66+
if c.parent is not None:
67+
c.parent.detach_child(c.id)
68+
69+
# Double occupancy check
70+
for atom in list(model.get_atoms()):
71+
if atom.is_disordered():
72+
atom = typing.cast(DisorderedAtom, atom)
73+
residue = atom.parent
74+
assert residue is not None
75+
sel_at = atom.selected_child
76+
assert sel_at is not None
77+
sel_at.altloc = " "
78+
sel_at.disordered_flag = 0
79+
residue.detach_child(atom.id)
80+
residue.add(sel_at)
81+
82+
# Insertion code check
83+
for c in chains:
84+
for residue in c.get_residues():
85+
if residue.get_id()[2] != " ":
86+
c.detach_child(residue.id)
87+
88+
if clean:
89+
# Remove HETATMs and solvent
90+
res_list = list(model.get_residues())
91+
92+
for res in res_list:
93+
if ignore(res):
94+
chain = res.parent
95+
assert chain is not None
96+
chain.detach_child(res.id)
97+
elif not is_aa(res, standard=True):
6098
raise ValueError(
61-
f"Selected chain not present in provided structure: {c_str}"
99+
"Unsupported non-standard amino acid found: {0}".format(
100+
res.resname
101+
)
62102
)
63103

64-
# Remove unselected chains
65-
def _ignore_helper(x) -> bool:
66-
return x.id not in sel_chains
67-
68-
for c in chains:
69-
if _ignore_helper(c):
70-
if c.parent is not None:
71-
c.parent.detach_child(c.id)
72-
73-
# Double occupancy check
74-
for atom in list(s.get_atoms()):
75-
if atom.is_disordered():
76-
residue = atom.parent
77-
sel_at = atom.selected_child
78-
sel_at.altloc = " "
79-
sel_at.disordered_flag = 0
80-
residue.detach_child(atom.id)
81-
residue.add(sel_at)
82-
83-
# Insertion code check
84-
for c in chains:
85-
for residue in c.get_residues():
86-
if residue.get_id()[2] != " ":
87-
c.detach_child(residue.id)
88-
89-
if clean:
90-
# Remove HETATMs and solvent
91-
res_list = list(s.get_residues())
92-
93-
def _ignore(r):
94-
return r.id[0][0] == "W" or r.id[0][0] == "H"
95-
96-
for res in res_list:
97-
if _ignore(res):
98-
chain = res.parent
99-
chain.detach_child(res.id)
100-
elif not is_aa(res, standard=True):
101-
raise ValueError(
102-
"Unsupported non-standard amino acid found: {0}".format(res.resname)
104+
# Remove Hydrogens
105+
atom_list = list(model.get_atoms())
106+
107+
def _ignore(x):
108+
return x.element == "H"
109+
110+
for atom in atom_list:
111+
if _ignore(atom):
112+
residue = atom.parent
113+
assert residue is not None
114+
residue.detach_child(atom.name)
115+
116+
# Detect gaps and compare with no. of chains
117+
pep_builder = PPBuilder()
118+
peptides = pep_builder.build_peptides(model)
119+
n_peptides = len(peptides)
120+
121+
if n_peptides != len(chain_ids):
122+
message = "[!] Structure contains gaps:\n"
123+
for i_pp, pp in enumerate(peptides):
124+
message += (
125+
"\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > "
126+
"{2.parent.id} {2.resname}{2.id[1]}\n".format(i_pp, pp[0], pp[-1])
103127
)
128+
log.warning(message)
104129

105-
# Remove Hydrogens
106-
atom_list = list(s.get_atoms())
130+
result.append(model)
107131

108-
def _ignore(x):
109-
return x.element == "H"
132+
return result
110133

111-
for atom in atom_list:
112-
if _ignore(atom):
113-
residue = atom.parent
114-
residue.detach_child(atom.name)
115134

116-
# Detect gaps and compare with no. of chains
117-
pep_builder = PPBuilder()
118-
peptides = pep_builder.build_peptides(s)
119-
n_peptides = len(peptides)
120-
121-
if n_peptides != len(chain_ids):
122-
message = "[!] Structure contains gaps:\n"
123-
for i_pp, pp in enumerate(peptides):
124-
message += (
125-
"\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > "
126-
"{2.parent.id} {2.resname}{2.id[1]}\n".format(i_pp, pp[0], pp[-1])
127-
)
128-
logger.warning(message)
129-
# raise Exception(message)
130-
131-
return s
132-
133-
134-
def parse_structure(path: str) -> tuple[Structure, int, int]:
135+
def parse_structure(path: str) -> tuple[list[Model], int, int]:
135136
"""Return a validated `Structure`, number of chains and number of residues"""
136137

137138
extension = Path(path).suffix
@@ -154,13 +155,33 @@ def parse_structure(path: str) -> tuple[Structure, int, int]:
154155

155156
assert isinstance(original_structure, Structure)
156157

157-
structure = validate_structure(original_structure)
158+
models: list[Model] = validate_structure(original_structure)
158159

159160
# Get number of chains
160-
number_of_chains = len(set([c.id for c in structure.get_chains()]))
161+
chain_dict = {}
162+
res_dict = {}
163+
for model in models:
164+
chain_dict.update({c.id: c for c in model.get_chains()})
165+
res_dict.update({r.id: r for r in model.get_residues()})
166+
167+
## Make sure all models have the same chains
168+
# Get chain sets for all models
169+
chain_sets = [set(chain.id for chain in model.get_chains()) for model in models]
170+
171+
# Check if all sets are identical
172+
if not all(chain_set == chain_sets[0] for chain_set in chain_sets):
173+
raise ValueError(
174+
"Not all models have the same chains. Found chain sets: "
175+
+ ", ".join(str(s) for s in chain_sets)
176+
)
177+
178+
res_sets = [set(res.id for res in model.get_residues()) for model in models]
161179

162-
# Get number of residues
163-
number_of_residues = len(list(structure.get_residues()))
180+
if not all(res_set == res_sets[0] for res_set in res_sets):
181+
raise ValueError(
182+
"Not all models have the same residues. Found residue sets: "
183+
+ ", ".join(str(s) for s in res_sets)
184+
)
164185

165186
# structure, n_chains, n_res = parse_structure(path=str(struct_path))
166-
return (structure, number_of_chains, number_of_residues)
187+
return (models, len(chain_sets[0]), len(res_sets[0]))

0 commit comments

Comments
 (0)