1- import pypdb .clients .pdb .pdb_client
2- from pypdb .clients .search .search_client import perform_search
3- from pypdb .clients .search .search_client import ReturnType
4- from pypdb .clients .search .operators import text_operators
5- from pypdb .clients .pdb .pdb_client import PDBFileType
1+ from urllib .request import urlretrieve
2+ from rcsbsearchapi import TextQuery
63
74
85class PDB_DB :
@@ -16,9 +13,26 @@ def __init__(self):
1613 Initialise the PDB data base class
1714
1815 """
19- self .return_type = ReturnType .ENTRY
16+ pass
17+ # self.return_type = ReturnType.ENTRY
2018 self .results = []
2119
20+ def uniprot_id_to_pdb_id (self , uniprot_id : str ):
21+ """
22+ Convert a uniprot_id to a pdb_id by selecting first entry
23+
24+ Args:
25+ uniprot_id: The uniprot id of the protein
26+
27+ Returns:
28+ The PDB id
29+
30+ """
31+ query = TextQuery (value = uniprot_id )
32+ for result in query ():
33+ return result
34+ return None
35+
2236 def check_structure (self , uniprot_id : str ) -> bool :
2337 """
2438 Check if a protein is contained within the PDB
@@ -30,12 +44,28 @@ def check_structure(self, uniprot_id: str) -> bool:
3044 Is the protein in the PDB (True/False)
3145
3246 """
33- search_operator = text_operators . DefaultOperator ( value = uniprot_id )
34- try :
35- self .results = perform_search ( search_operator , self . return_type )
47+ pdb_id = self . uniprot_id_to_pdb_id ( uniprot_id )
48+ if pdb_id is not None :
49+ self .results = [ pdb_id ]
3650 return True
37- except ValueError :
38- return False
51+ return False
52+
53+ def make_url (self , uniprot_id : str , filetype : str = "pdb" ) -> str :
54+ """
55+ Make the URL for the protein
56+
57+ Args:
58+ uniprot_id: The uniprot id of the protein
59+ filetype: The type of file to download (pdb or cif)
60+
61+ Returns:
62+ The URL of the file to download
63+
64+ """
65+
66+ uniprot_id = uniprot_id .upper ()
67+ url = f"https://files.rcsb.org/download/{ uniprot_id } .{ filetype } "
68+ return url
3969
4070 def get_pdb (
4171 self ,
@@ -54,26 +84,26 @@ def get_pdb(
5484
5585 """
5686
57- # Search for the protein if we have not already done so
5887 if not self .results :
59- search_operator = text_operators .DefaultOperator (value = uniprot_id )
60- self .results = perform_search (search_operator , self .return_type )
88+ self .results = [self .uniprot_id_to_pdb_id (uniprot_id )]
6189
6290 # Try to get the PDB file
6391 pdb_id = self .results [0 ]
64- filedata = pypdb .clients .pdb .pdb_client .get_pdb_file (
65- pdb_id , PDBFileType (filetype ), compression = True
66- )
6792
68- # If we couldn't find the file toggle the filetype and try again
69- if filedata is None :
93+ try :
94+ url = self .make_url (pdb_id , filetype )
95+ filename , result = urlretrieve (url )
96+ with open (filename ) as file :
97+ filedata = file .read ()
98+ except Exception :
7099 if filetype == "pdb" :
71100 filetype = "cif"
72101 else :
73102 filetype = "pdb"
74- filedata = pypdb .clients .pdb .pdb_client .get_pdb_file (
75- pdb_id , PDBFileType (filetype ), compression = True
76- )
103+ url = self .make_url (pdb_id , filetype )
104+ filename , result = urlretrieve (url )
105+ with open (filename ) as file :
106+ filedata = file .read ()
77107
78108 # If pdb is not the same then add the pdb id to the uniprot id as the identifier
79109 if pdb_id .lower () != uniprot_id .lower ():
0 commit comments