Skip to content

Commit b86a7e6

Browse files
committed
move pdb2pqr function into detect protonation module
1 parent c006e6c commit b86a7e6

File tree

2 files changed

+115
-134
lines changed

2 files changed

+115
-134
lines changed

deeprank2/tools/pdbprep/detect_protonation.py

Lines changed: 115 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,31 @@
1+
from dataclasses import dataclass
12
from tempfile import TemporaryFile
23

34
from openmm import LangevinIntegrator, unit
45
from openmm import app as mmapp
6+
from pdb2pqr.config import FORCE_FIELDS
7+
from pdb2pqr.main import main_driver as pdb2pqr
58

69
_MIN_ATOMS_TO_PROTONATE = 5 # TODO: why do we need this check?
710
_TEMPERATURE = 310
811

912

1013
def add_hydrogens(
1114
pdb_str: str,
12-
protonated_sequence: list[str | None],
1315
max_iterations: int = 100,
16+
constraint_tolerance: float = 1e-05,
1417
random_seed: int = 917,
1518
) -> str:
16-
"""Add hydrogens."""
17-
with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
19+
"""Add hydrogens to pdb file.
20+
21+
Args:
22+
pdb_str: String representation of pdb file, preprocessed using deeprank2.tools.pdbprep.preprocess.
23+
max_iterations: Maximum number of iterations to perform during energy minimization. Defaults to 100.
24+
constraint_tolerance: Distance tolerance of LangevinIntegrator within which constraints are maintained, as a
25+
fraction of the constrained distance. Defaults to 1e-05.
26+
random_seed: Random seed for LangevinIntegrator.
27+
"""
28+
with TemporaryFile(mode="w", suffix="pdb") as input_pdb, TemporaryFile(mode="r") as output_pdb:
1829
input_pdb.write(pdb_str)
1930

2031
# PARAMETERS
@@ -25,6 +36,7 @@ def add_hydrogens(
2536
# PREPARES MODEL
2637
forcefield = mmapp.ForceField(forcefield_model, water_model)
2738
structure = mmapp.PDBFile(input_pdb)
39+
protonated_sequence = _detect_protonation_state(pdb_str)
2840

2941
model = mmapp.Modeller(structure.topology, structure.positions)
3042
model.addHydrogens(forcefield=forcefield, variants=protonated_sequence)
@@ -35,13 +47,13 @@ def add_hydrogens(
3547
system = forcefield.createSystem(structure.topology)
3648

3749
integrator = LangevinIntegrator(
38-
_TEMPERATURE * unit.kelvin,
39-
1.0 / unit.picosecond,
40-
2.0 * unit.femtosecond,
50+
temperature=_TEMPERATURE * unit.kelvin,
51+
frictionCoeff=1.0 / unit.picosecond,
52+
stepSize=2.0 * unit.femtosecond,
4153
)
4254

4355
integrator.setRandomNumberSeed(random_seed)
44-
integrator.setConstraintTolerance(0.00001)
56+
integrator.setConstraintTolerance(constraint_tolerance)
4557

4658
simulation = mmapp.Simulation(
4759
structure.topology,
@@ -68,15 +80,10 @@ def add_hydrogens(
6880
return output_pdb.read()
6981

7082

71-
def detect_protonation_state(pdb_str: str) -> list[str | None]:
72-
"""Detect protonation states.
83+
def _detect_protonation_state(pdb_str: str) -> list[str | None]:
84+
"""Detect protonation states and return them as a sequence of alternative residue names."""
85+
_calculate_protonation_state(pdb_str)
7386

74-
Args:
75-
pdb_str (str): string representation of pdb file.
76-
77-
Returns:
78-
list of protonation-specific residue names, which can be used to ... #TODO: finish this sentence
79-
"""
8087
pdb_lines = pdb_str.splitlines()
8188
protonable_residues = ("HIS", "ASP", "GLU", "CYS", "LYS")
8289

@@ -108,8 +115,11 @@ def detect_protonation_state(pdb_str: str) -> list[str | None]:
108115
return residues
109116

110117

111-
def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str: # noqa: PLR0911
112-
"""Returns alternate residue name based on protonation state."""
118+
def _protonation_resname( # noqa:PLR0911
119+
resname: str,
120+
atoms_in_residue: list[str],
121+
) -> str:
122+
"""Return alternate residue name based on protonation state."""
113123
if resname == "HIS":
114124
if "HD1" in atoms_in_residue and "HE2" in atoms_in_residue:
115125
return "HIP"
@@ -134,12 +144,97 @@ def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str: # n
134144
return resname
135145

136146

137-
# This module is modified from https://github.com/DeepRank/pdbprep/blob/main/
138-
# original modules names: detect_protonation.py and add_hydrogens.py),
147+
def _calculate_protonation_state(
148+
pdb_str: str,
149+
forcefield: str = "AMBER",
150+
) -> str:
151+
"""Calculate the protonation states using PDB2PQR.
152+
153+
PDB2PQR can only use files (no strings) as input and output, which is why this function is wrapped inside
154+
TemporaryFile context managers.
155+
"""
156+
with TemporaryFile(mode="w", suffix="pdb") as input_pdb, TemporaryFile(mode="r") as output_pdb:
157+
input_pdb.write(pdb_str)
158+
159+
input_args = _Pdb2pqrArgs(input_pdb, output_pdb, forcefield)
160+
pdb2pqr(input_args)
161+
162+
return output_pdb.read()
163+
164+
165+
@dataclass
166+
class _Pdb2pqrArgs:
167+
"""Input arguments to `main_driver` function of PDB2PQR.
168+
169+
These are usually given via CLI using argparse. All arguments, including those kept as default need to be given to
170+
`main_driver` if called from script.
171+
The argument given to `main_driver` is accessed via dot notation and is iterated over, which is why this is created
172+
as a dataclass with an iterator.
173+
174+
Args*:
175+
input_path: Input file path.
176+
output_pqr: Output file path.
177+
ff: Name of the selected forcefield.
178+
179+
*all other arguments should remain untouched.
180+
181+
Raises:
182+
ValueError: if the forcefield is not recognized
183+
"""
184+
185+
input_path: str
186+
output_pqr: str
187+
ff: str = "AMBER"
188+
189+
# arguments set different from default
190+
debump: bool = True
191+
keep_chain: bool = True
192+
log_level: str = "CRITICAL"
193+
194+
# arguments kept as default
195+
ph: float = 7.0
196+
assign_only: bool = False
197+
clean: bool = False
198+
userff: None = None
199+
ffout: None = None
200+
usernames: None = None
201+
ligand: None = None
202+
neutraln: bool = False
203+
neutralc: bool = False
204+
drop_water: bool = False
205+
pka_method: None = None
206+
opt: bool = True
207+
include_header: bool = False
208+
whitespace: bool = False
209+
pdb_output: None = None
210+
apbs_input: None = None
211+
212+
def __post_init__(self):
213+
self._index = 0
214+
if self.ff.lower() not in FORCE_FIELDS:
215+
msg = f"Forcefield {self.ff} not recognized. Valid options: {FORCE_FIELDS}."
216+
raise ValueError(msg)
217+
if self.ff.lower() != "amber":
218+
msg = f"Forcefield given as {self.ff}. Currently only AMBER forcefield is implemented."
219+
raise NotImplementedError(msg)
220+
221+
def __iter__(self):
222+
return self
223+
224+
def __next__(self):
225+
settings = vars(self)
226+
if self._index < len(settings):
227+
setting = list(settings)[self._index]
228+
self._index += 1
229+
return setting
230+
raise StopIteration
231+
232+
233+
# Part of this module is modified from https://github.com/DeepRank/pdbprep/blob/main/
234+
# original module names: detect_protonation.py and add_hydrogens.py),
139235
# written by João M.C. Teixeira (https://github.com/joaomcteixeira)
140236
# publishd under the following license:
141237

142-
143238
# Apache License
144239
# Version 2.0, January 2004
145240
# http://www.apache.org/licenses/

deeprank2/tools/pdbprep/pras_pdb2pqr.py

Lines changed: 0 additions & 114 deletions
This file was deleted.

0 commit comments

Comments
 (0)