Skip to content

Commit 82ab1f5

Browse files
author
James Parkhurst
committed
Fix pdb file getter
1 parent 57f4856 commit 82ab1f5

File tree

1 file changed

+52
-22
lines changed

1 file changed

+52
-22
lines changed

profet/pdb.py

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
import pypdb.clients.pdb.pdb_client
2-
from pypdb.clients.search.search_client import perform_search
3-
from pypdb.clients.search.search_client import ReturnType
4-
from pypdb.clients.search.operators import text_operators
5-
from pypdb.clients.pdb.pdb_client import PDBFileType
1+
from urllib.request import urlretrieve
2+
from rcsbsearchapi import TextQuery
63

74

85
class PDB_DB:
@@ -16,9 +13,26 @@ def __init__(self):
1613
Initialise the PDB data base class
1714
1815
"""
19-
self.return_type = ReturnType.ENTRY
16+
pass
17+
# self.return_type = ReturnType.ENTRY
2018
self.results = []
2119

20+
def uniprot_id_to_pdb_id(self, uniprot_id: str):
21+
"""
22+
Convert a uniprot_id to a pdb_id by selecting first entry
23+
24+
Args:
25+
uniprot_id: The uniprot id of the protein
26+
27+
Returns:
28+
The PDB id
29+
30+
"""
31+
query = TextQuery(value=uniprot_id)
32+
for result in query():
33+
return result
34+
return None
35+
2236
def check_structure(self, uniprot_id: str) -> bool:
2337
"""
2438
Check if a protein is contained within the PDB
@@ -30,12 +44,28 @@ def check_structure(self, uniprot_id: str) -> bool:
3044
Is the protein in the PDB (True/False)
3145
3246
"""
33-
search_operator = text_operators.DefaultOperator(value=uniprot_id)
34-
try:
35-
self.results = perform_search(search_operator, self.return_type)
47+
pdb_id = self.uniprot_id_to_pdb_id(uniprot_id)
48+
if pdb_id is not None:
49+
self.results = [pdb_id]
3650
return True
37-
except ValueError:
38-
return False
51+
return False
52+
53+
def make_url(self, uniprot_id: str, filetype: str = "pdb") -> str:
54+
"""
55+
Make the URL for the protein
56+
57+
Args:
58+
uniprot_id: The uniprot id of the protein
59+
filetype: The type of file to download (pdb or cif)
60+
61+
Returns:
62+
The URL of the file to download
63+
64+
"""
65+
66+
uniprot_id = uniprot_id.upper()
67+
url = f"https://files.rcsb.org/download/{uniprot_id}.{filetype}"
68+
return url
3969

4070
def get_pdb(
4171
self,
@@ -54,26 +84,26 @@ def get_pdb(
5484
5585
"""
5686

57-
# Search for the protein if we have not already done so
5887
if not self.results:
59-
search_operator = text_operators.DefaultOperator(value=uniprot_id)
60-
self.results = perform_search(search_operator, self.return_type)
88+
self.results = [self.uniprot_id_to_pdb_id(uniprot_id)]
6189

6290
# Try to get the PDB file
6391
pdb_id = self.results[0]
64-
filedata = pypdb.clients.pdb.pdb_client.get_pdb_file(
65-
pdb_id, PDBFileType(filetype), compression=True
66-
)
6792

68-
# If we couldn't find the file toggle the filetype and try again
69-
if filedata is None:
93+
try:
94+
url = self.make_url(pdb_id, filetype)
95+
filename, result = urlretrieve(url)
96+
with open(filename) as file:
97+
filedata = file.read()
98+
except Exception:
7099
if filetype == "pdb":
71100
filetype = "cif"
72101
else:
73102
filetype = "pdb"
74-
filedata = pypdb.clients.pdb.pdb_client.get_pdb_file(
75-
pdb_id, PDBFileType(filetype), compression=True
76-
)
103+
url = self.make_url(pdb_id, filetype)
104+
filename, result = urlretrieve(url)
105+
with open(filename) as file:
106+
filedata = file.read()
77107

78108
# If pdb is not the same then add the pdb id to the uniprot id as the identifier
79109
if pdb_id.lower() != uniprot_id.lower():

0 commit comments

Comments
 (0)