1+ from pathlib import Path
2+ from tempfile import TemporaryFile
3+
14from pdbtools import pdb_delresname , pdb_fixinsert , pdb_keepcoord , pdb_reatom , pdb_reres , pdb_rplresname , pdb_selaltloc , pdb_sort , pdb_tidy
5+ from Pras_Server .RunType import InitRunType as PRAS
26
37from deeprank2 .domain .aminoacidlist import amino_acids_by_code , amino_acids_by_letter
48
59
10+ def preprocess_pdbs (
11+ pdb_path : str | Path ,
12+ rename_residues : dict [str , str ] | None = None ,
13+ ) -> str :
14+ """Preprocess a pdb file for adding/fixing hydrogens.
15+
16+ Args:
17+ pdb_path: Path of pdb file to preprocess.
18+ rename_residues: Dictionary mapping of non-standard residue names (keys) to their standard names.
19+ Defaults to:
20+ {
21+ "MSE": "MET",
22+ "HIP": "HIS",
23+ "HIE": "HIS",
24+ "HID": "HIS",
25+ "HSE": "HIS",
26+ "HSD": "HIS",
27+ }
28+ """
29+ with Path (pdb_path ).open ("r" ) as f :
30+ pdb_str = f .read ()
31+
32+ pdb_str = _run_pdb_tools (pdb_str , rename_residues )
33+ pdb_str = _add_missing_heavy_atoms (pdb_str )
34+
35+ return pdb_str # noqa: RET504
36+
37+
638def _run_pdb_tools (
739 pdb_str : str ,
840 rename_residues : dict [str , str ] | None = None ,
@@ -22,23 +54,11 @@ def _run_pdb_tools(
2254 8. Renumber atoms from 1.
2355 9. Tidy up to somewhat adhere to pdb format specifications.
2456
25- Args:
26- pdb_str: string representation of pdb file.
27- rename_residues: dictionary mapping non-standard residue names (keys) to their standard names. Defaults to:
28- {
29- "MSE": "MET",
30- "HIP": "HIS",
31- "HIE": "HIS",
32- "HID": "HIS",
33- "HSE": "HIS",
34- "HSD": "HIS",
35- }
36-
3757 Raises:
3858 ValueError: if an invalid amino acid (3-letter or 1-letter) code is given as a value to rename_residues.
3959
4060 Returns:
41- str: updated pdb
61+ str: Updated pdb
4262 """
4363 if not rename_residues :
4464 rename_residues = {
@@ -70,3 +90,22 @@ def _run_pdb_tools(
7090 new_pdb = pdb_tidy .run (new_pdb ) # Tidy up to somewhat adhere to pdb format specifications
7191
7292 return "" .join (list (new_pdb ))
93+
94+
95+ def _add_missing_heavy_atoms (pdb_str : str ) -> str :
96+ """Add missing heavy atoms (usually many) using PRAS.
97+
98+ PRAS can only use files (no strings) as input and output, which is why this function is wrapped inside
99+ TemporaryFile context managers.
100+
101+ Returns:
102+ str: Updated pdb
103+ """
104+ with TemporaryFile (mode = "w" , suffix = "pdb" , encoding = "utf-8" ) as input_pdb , TemporaryFile (mode = "r" , encoding = "utf-8" ) as output_pdb :
105+ input_pdb .write (pdb_str )
106+
107+ fixing = PRAS (ofname = output_pdb )
108+ fixing .fname = input_pdb
109+ fixing .ProcessOther () # write to specified filename
110+
111+ return output_pdb .read ()
0 commit comments