Skip to content

Commit c31c3c6

Browse files
author
James Parkhurst
authored
Merge pull request ccpem#54 from jmp1985/main
Fixing profet for new PDB API
2 parents 896b9f3 + 84cb528 commit c31c3c6

File tree

3 files changed

+54
-24
lines changed

3 files changed

+54
-24
lines changed

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: [3.6, 3.7, 3.8, 3.9]
19+
python-version: ["3.10", "3.11", "3.12", "3.13"]
2020

2121
steps:
2222
- uses: actions/checkout@v2

profet/pdb.py

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
import pypdb.clients.pdb.pdb_client
2-
from pypdb.clients.search.search_client import perform_search
3-
from pypdb.clients.search.search_client import ReturnType
4-
from pypdb.clients.search.operators import text_operators
5-
from pypdb.clients.pdb.pdb_client import PDBFileType
1+
from urllib.request import urlretrieve
2+
from rcsbsearchapi import TextQuery
63

74

85
class PDB_DB:
@@ -16,9 +13,26 @@ def __init__(self):
1613
Initialise the PDB data base class
1714
1815
"""
19-
self.return_type = ReturnType.ENTRY
16+
pass
17+
# self.return_type = ReturnType.ENTRY
2018
self.results = []
2119

20+
def uniprot_id_to_pdb_id(self, uniprot_id: str):
21+
"""
22+
Convert a uniprot_id to a pdb_id by selecting first entry
23+
24+
Args:
25+
uniprot_id: The uniprot id of the protein
26+
27+
Returns:
28+
The PDB id
29+
30+
"""
31+
query = TextQuery(value=uniprot_id)
32+
for result in query():
33+
return result
34+
return None
35+
2236
def check_structure(self, uniprot_id: str) -> bool:
2337
"""
2438
Check if a protein is contained within the PDB
@@ -30,12 +44,28 @@ def check_structure(self, uniprot_id: str) -> bool:
3044
Is the protein in the PDB (True/False)
3145
3246
"""
33-
search_operator = text_operators.DefaultOperator(value=uniprot_id)
34-
try:
35-
self.results = perform_search(search_operator, self.return_type)
47+
pdb_id = self.uniprot_id_to_pdb_id(uniprot_id)
48+
if pdb_id is not None:
49+
self.results = [pdb_id]
3650
return True
37-
except ValueError:
38-
return False
51+
return False
52+
53+
def make_url(self, uniprot_id: str, filetype: str = "pdb") -> str:
54+
"""
55+
Make the URL for the protein
56+
57+
Args:
58+
uniprot_id: The uniprot id of the protein
59+
filetype: The type of file to download (pdb or cif)
60+
61+
Returns:
62+
The URL of the file to download
63+
64+
"""
65+
66+
uniprot_id = uniprot_id.upper()
67+
url = f"https://files.rcsb.org/download/{uniprot_id}.{filetype}"
68+
return url
3969

4070
def get_pdb(
4171
self,
@@ -54,26 +84,26 @@ def get_pdb(
5484
5585
"""
5686

57-
# Search for the protein if we have not already done so
5887
if not self.results:
59-
search_operator = text_operators.DefaultOperator(value=uniprot_id)
60-
self.results = perform_search(search_operator, self.return_type)
88+
self.results = [self.uniprot_id_to_pdb_id(uniprot_id)]
6189

6290
# Try to get the PDB file
6391
pdb_id = self.results[0]
64-
filedata = pypdb.clients.pdb.pdb_client.get_pdb_file(
65-
pdb_id, PDBFileType(filetype), compression=True
66-
)
6792

68-
# If we couldn't find the file toggle the filetype and try again
69-
if filedata is None:
93+
try:
94+
url = self.make_url(pdb_id, filetype)
95+
filename, result = urlretrieve(url)
96+
with open(filename) as file:
97+
filedata = file.read()
98+
except Exception:
7099
if filetype == "pdb":
71100
filetype = "cif"
72101
else:
73102
filetype = "pdb"
74-
filedata = pypdb.clients.pdb.pdb_client.get_pdb_file(
75-
pdb_id, PDBFileType(filetype), compression=True
76-
)
103+
url = self.make_url(pdb_id, filetype)
104+
filename, result = urlretrieve(url)
105+
with open(filename) as file:
106+
filedata = file.read()
77107

78108
# If pdb is not the same then add the pdb id to the uniprot id as the identifier
79109
if pdb_id.lower() != uniprot_id.lower():

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ python_requires = >=3.6
2424
include_package_data = True
2525
install_requires =
2626
numpy
27-
pypdb
2827
requests
2928
pandas
3029
requests_html
30+
rcsbsearchapi
3131
bs4
3232
pypdb@git+https://github.com/williamgilpin/pypdb@master#egg=pypdb
3333
lxml_html_clean

0 commit comments

Comments
 (0)