1+ from dataclasses import dataclass
12from tempfile import TemporaryFile
23
34from openmm import LangevinIntegrator , unit
45from openmm import app as mmapp
6+ from pdb2pqr .config import FORCE_FIELDS
7+ from pdb2pqr .main import main_driver as pdb2pqr
58
69_MIN_ATOMS_TO_PROTONATE = 5 # TODO: why do we need this check?
710_TEMPERATURE = 310
811
912
1013def add_hydrogens (
1114 pdb_str : str ,
12- protonated_sequence : list [str | None ],
1315 max_iterations : int = 100 ,
16+ constraint_tolerance : float = 1e-05 ,
1417 random_seed : int = 917 ,
1518) -> str :
16- """Add hydrogens."""
17- with TemporaryFile (mode = "w" , suffix = "pdb" , encoding = "utf-8" ) as input_pdb , TemporaryFile (mode = "r" , encoding = "utf-8" ) as output_pdb :
19+ """Add hydrogens to pdb file.
20+
21+ Args:
22+ pdb_str: String representation of pdb file, preprocessed using deeprank2.tools.pdbprep.preprocess.
23+ max_iterations: Maximum number of iterations to perform during energy minimization. Defaults to 100.
24+ constraint_tolerance: Distance tolerance of LangevinIntegrator within which constraints are maintained, as a
25+ fraction of the constrained distance. Defaults to 1e-05.
26+ random_seed: Random seed for LangevinIntegrator.
27+ """
28+ with TemporaryFile (mode = "w" , suffix = "pdb" ) as input_pdb , TemporaryFile (mode = "r" ) as output_pdb :
1829 input_pdb .write (pdb_str )
1930
2031 # PARAMETERS
@@ -25,6 +36,7 @@ def add_hydrogens(
2536 # PREPARES MODEL
2637 forcefield = mmapp .ForceField (forcefield_model , water_model )
2738 structure = mmapp .PDBFile (input_pdb )
39+ protonated_sequence = _detect_protonation_state (pdb_str )
2840
2941 model = mmapp .Modeller (structure .topology , structure .positions )
3042 model .addHydrogens (forcefield = forcefield , variants = protonated_sequence )
@@ -35,13 +47,13 @@ def add_hydrogens(
3547 system = forcefield .createSystem (structure .topology )
3648
3749 integrator = LangevinIntegrator (
38- _TEMPERATURE * unit .kelvin ,
39- 1.0 / unit .picosecond ,
40- 2.0 * unit .femtosecond ,
50+ temperature = _TEMPERATURE * unit .kelvin ,
51+ frictionCoeff = 1.0 / unit .picosecond ,
52+ stepSize = 2.0 * unit .femtosecond ,
4153 )
4254
4355 integrator .setRandomNumberSeed (random_seed )
44- integrator .setConstraintTolerance (0.00001 )
56+ integrator .setConstraintTolerance (constraint_tolerance )
4557
4658 simulation = mmapp .Simulation (
4759 structure .topology ,
@@ -68,15 +80,10 @@ def add_hydrogens(
6880 return output_pdb .read ()
6981
7082
71- def detect_protonation_state (pdb_str : str ) -> list [str | None ]:
72- """Detect protonation states.
83+ def _detect_protonation_state (pdb_str : str ) -> list [str | None ]:
84+ """Detect protonation states and return them as a sequence of alternative residue names."""
85+ _calculate_protonation_state (pdb_str )
7386
74- Args:
75- pdb_str (str): string representation of pdb file.
76-
77- Returns:
78- list of protonation-specific residue names, which can be used to ... #TODO: finish this sentence
79- """
8087 pdb_lines = pdb_str .splitlines ()
8188 protonable_residues = ("HIS" , "ASP" , "GLU" , "CYS" , "LYS" )
8289
@@ -108,8 +115,11 @@ def detect_protonation_state(pdb_str: str) -> list[str | None]:
108115 return residues
109116
110117
111- def _protonation_resname (resname : str , atoms_in_residue : list [str ]) -> str : # noqa: PLR0911
112- """Returns alternate residue name based on protonation state."""
118+ def _protonation_resname ( # noqa:PLR0911
119+ resname : str ,
120+ atoms_in_residue : list [str ],
121+ ) -> str :
122+ """Return alternate residue name based on protonation state."""
113123 if resname == "HIS" :
114124 if "HD1" in atoms_in_residue and "HE2" in atoms_in_residue :
115125 return "HIP"
@@ -134,12 +144,97 @@ def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str: # n
134144 return resname
135145
136146
137- # This module is modified from https://github.com/DeepRank/pdbprep/blob/main/
138- # original modules names: detect_protonation.py and add_hydrogens.py),
147+ def _calculate_protonation_state (
148+ pdb_str : str ,
149+ forcefield : str = "AMBER" ,
150+ ) -> str :
151+ """Calculate the protonation states using PDB2PQR.
152+
153+ PDB2PQR can only use files (no strings) as input and output, which is why this function is wrapped inside
154+ TemporaryFile context managers.
155+ """
156+ with TemporaryFile (mode = "w" , suffix = "pdb" ) as input_pdb , TemporaryFile (mode = "r" ) as output_pdb :
157+ input_pdb .write (pdb_str )
158+
159+ input_args = _Pdb2pqrArgs (input_pdb , output_pdb , forcefield )
160+ pdb2pqr (input_args )
161+
162+ return output_pdb .read ()
163+
164+
165+ @dataclass
166+ class _Pdb2pqrArgs :
167+ """Input arguments to `main_driver` function of PDB2PQR.
168+
169+ These are usually given via CLI using argparse. All arguments, including those kept as default need to be given to
170+ `main_driver` if called from script.
171+ The argument given to `main_driver` is accessed via dot notation and is iterated over, which is why this is created
172+ as a dataclass with an iterator.
173+
174+ Args*:
175+ input_path: Input file path.
176+ output_pqr: Output file path.
177+ ff: Name of the selected forcefield.
178+
179+ *all other arguments should remain untouched.
180+
181+ Raises:
182+ ValueError: if the forcefield is not recognized
183+ """
184+
185+ input_path : str
186+ output_pqr : str
187+ ff : str = "AMBER"
188+
189+ # arguments set different from default
190+ debump : bool = True
191+ keep_chain : bool = True
192+ log_level : str = "CRITICAL"
193+
194+ # arguments kept as default
195+ ph : float = 7.0
196+ assign_only : bool = False
197+ clean : bool = False
198+ userff : None = None
199+ ffout : None = None
200+ usernames : None = None
201+ ligand : None = None
202+ neutraln : bool = False
203+ neutralc : bool = False
204+ drop_water : bool = False
205+ pka_method : None = None
206+ opt : bool = True
207+ include_header : bool = False
208+ whitespace : bool = False
209+ pdb_output : None = None
210+ apbs_input : None = None
211+
212+ def __post_init__ (self ):
213+ self ._index = 0
214+ if self .ff .lower () not in FORCE_FIELDS :
215+ msg = f"Forcefield { self .ff } not recognized. Valid options: { FORCE_FIELDS } ."
216+ raise ValueError (msg )
217+ if self .ff .lower () != "amber" :
218+ msg = f"Forcefield given as { self .ff } . Currently only AMBER forcefield is implemented."
219+ raise NotImplementedError (msg )
220+
221+ def __iter__ (self ):
222+ return self
223+
224+ def __next__ (self ):
225+ settings = vars (self )
226+ if self ._index < len (settings ):
227+ setting = list (settings )[self ._index ]
228+ self ._index += 1
229+ return setting
230+ raise StopIteration
231+
232+
233+ # Part of this module is modified from https://github.com/DeepRank/pdbprep/blob/main/
234+ # original module names: detect_protonation.py and add_hydrogens.py),
139235# written by João M.C. Teixeira (https://github.com/joaomcteixeira)
140236# publishd under the following license:
141237
142-
143238# Apache License
144239# Version 2.0, January 2004
145240# http://www.apache.org/licenses/
0 commit comments