Skip to content

Commit c006e6c

Browse files
committed
add PRAS to preprocess file
1 parent 7a13f07 commit c006e6c

File tree

1 file changed

+52
-13
lines changed

1 file changed

+52
-13
lines changed

deeprank2/tools/pdbprep/preprocess.py

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,40 @@
1+
from pathlib import Path
2+
from tempfile import TemporaryFile
3+
14
from pdbtools import pdb_delresname, pdb_fixinsert, pdb_keepcoord, pdb_reatom, pdb_reres, pdb_rplresname, pdb_selaltloc, pdb_sort, pdb_tidy
5+
from Pras_Server.RunType import InitRunType as PRAS
26

37
from deeprank2.domain.aminoacidlist import amino_acids_by_code, amino_acids_by_letter
48

59

10+
def preprocess_pdbs(
11+
pdb_path: str | Path,
12+
rename_residues: dict[str, str] | None = None,
13+
) -> str:
14+
"""Preprocess a pdb file for adding/fixing hydrogens.
15+
16+
Args:
17+
pdb_path: Path of pdb file to preprocess.
18+
rename_residues: Dictionary mapping of non-standard residue names (keys) to their standard names.
19+
Defaults to:
20+
{
21+
"MSE": "MET",
22+
"HIP": "HIS",
23+
"HIE": "HIS",
24+
"HID": "HIS",
25+
"HSE": "HIS",
26+
"HSD": "HIS",
27+
}
28+
"""
29+
with Path(pdb_path).open("r") as f:
30+
pdb_str = f.read()
31+
32+
pdb_str = _run_pdb_tools(pdb_str, rename_residues)
33+
pdb_str = _add_missing_heavy_atoms(pdb_str)
34+
35+
return pdb_str # noqa: RET504
36+
37+
638
def _run_pdb_tools(
739
pdb_str: str,
840
rename_residues: dict[str, str] | None = None,
@@ -22,23 +54,11 @@ def _run_pdb_tools(
2254
8. Renumber atoms from 1.
2355
9. Tidy up to somewhat adhere to pdb format specifications.
2456
25-
Args:
26-
pdb_str: string representation of pdb file.
27-
rename_residues: dictionary mapping non-standard residue names (keys) to their standard names. Defaults to:
28-
{
29-
"MSE": "MET",
30-
"HIP": "HIS",
31-
"HIE": "HIS",
32-
"HID": "HIS",
33-
"HSE": "HIS",
34-
"HSD": "HIS",
35-
}
36-
3757
Raises:
3858
ValueError: if an invalid amino acid (3-letter or 1-letter) code is given as a value to rename_residues.
3959
4060
Returns:
41-
str: updated pdb
61+
str: Updated pdb
4262
"""
4363
if not rename_residues:
4464
rename_residues = {
@@ -70,3 +90,22 @@ def _run_pdb_tools(
7090
new_pdb = pdb_tidy.run(new_pdb) # Tidy up to somewhat adhere to pdb format specifications
7191

7292
return "".join(list(new_pdb))
93+
94+
95+
def _add_missing_heavy_atoms(pdb_str: str) -> str:
96+
"""Add missing heavy atoms (usually many) using PRAS.
97+
98+
PRAS can only use files (no strings) as input and output, which is why this function is wrapped inside
99+
TemporaryFile context managers.
100+
101+
Returns:
102+
str: Updated pdb
103+
"""
104+
with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
105+
input_pdb.write(pdb_str)
106+
107+
fixing = PRAS(ofname=output_pdb)
108+
fixing.fname = input_pdb
109+
fixing.ProcessOther() # write to specified filename
110+
111+
return output_pdb.read()

0 commit comments

Comments
 (0)