Skip to content

Commit 56b2d13

Browse files
committed
feat: add apo features
1 parent 1a8230e commit 56b2d13

File tree

8 files changed

+397
-140
lines changed

8 files changed

+397
-140
lines changed

docs/examples/3_access_system_files.ipynb

Lines changed: 218 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": null,
19+
"execution_count": 1,
2020
"metadata": {},
2121
"outputs": [],
2222
"source": [
@@ -36,9 +36,28 @@
3636
},
3737
{
3838
"cell_type": "code",
39-
"execution_count": null,
39+
"execution_count": 2,
4040
"metadata": {},
41-
"outputs": [],
41+
"outputs": [
42+
{
43+
"name": "stderr",
44+
"output_type": "stream",
45+
"text": [
46+
"2024-09-25 21:58:17,520 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s\n",
47+
"2024-09-25 21:58:17,521 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s\n"
48+
]
49+
},
50+
{
51+
"data": {
52+
"text/plain": [
53+
"{'1.W': '/Users/yusuf/.local/share/plinder/2024-06/v2/systems/4agi__1__1.C__1.W/ligand_files/1.W.sdf'}"
54+
]
55+
},
56+
"execution_count": 2,
57+
"metadata": {},
58+
"output_type": "execute_result"
59+
}
60+
],
4261
"source": [
4362
"plinder_system.ligand_sdfs"
4463
]
@@ -52,9 +71,31 @@
5271
},
5372
{
5473
"cell_type": "code",
55-
"execution_count": null,
74+
"execution_count": 3,
5675
"metadata": {},
57-
"outputs": [],
76+
"outputs": [
77+
{
78+
"name": "stderr",
79+
"output_type": "stream",
80+
"text": [
81+
"2024-09-25 21:58:18,231 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.57s\n",
82+
"2024-09-25 21:58:18,317 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.08s\n",
83+
"2024-09-25 21:58:18,317 | plinder.core.index.utils:148 | INFO : loading entries from 1 zips\n",
84+
"2024-09-25 21:58:18,323 | plinder.core.index.utils:163 | INFO : loaded 1 entries\n",
85+
"2024-09-25 21:58:18,324 | plinder.core.index.utils.load_entries:24 | INFO : runtime succeeded: 0.77s\n"
86+
]
87+
},
88+
{
89+
"data": {
90+
"text/plain": [
91+
"{'1.W': 'C[Se][C@@H]1O[C@@H](C)[C@@H](O)[C@@H](O)[C@@H]1O'}"
92+
]
93+
},
94+
"execution_count": 3,
95+
"metadata": {},
96+
"output_type": "execute_result"
97+
}
98+
],
5899
"source": [
59100
"plinder_system.smiles"
60101
]
@@ -75,9 +116,21 @@
75116
},
76117
{
77118
"cell_type": "code",
78-
"execution_count": null,
119+
"execution_count": 4,
79120
"metadata": {},
80-
"outputs": [],
121+
"outputs": [
122+
{
123+
"data": {
124+
"text/plain": [
125+
"('/Users/yusuf/.local/share/plinder/2024-06/v2/systems/4agi__1__1.C__1.W/receptor.pdb',\n",
126+
" '/Users/yusuf/.local/share/plinder/2024-06/v2/systems/4agi__1__1.C__1.W/receptor.cif')"
127+
]
128+
},
129+
"execution_count": 4,
130+
"metadata": {},
131+
"output_type": "execute_result"
132+
}
133+
],
81134
"source": [
82135
"plinder_system.receptor_pdb, plinder_system.receptor_cif"
83136
]
@@ -91,9 +144,20 @@
91144
},
92145
{
93146
"cell_type": "code",
94-
"execution_count": null,
147+
"execution_count": 5,
95148
"metadata": {},
96-
"outputs": [],
149+
"outputs": [
150+
{
151+
"data": {
152+
"text/plain": [
153+
"{'1.C': 'A'}"
154+
]
155+
},
156+
"execution_count": 5,
157+
"metadata": {},
158+
"output_type": "execute_result"
159+
}
160+
],
97161
"source": [
98162
"plinder_system.chain_mapping"
99163
]
@@ -107,9 +171,21 @@
107171
},
108172
{
109173
"cell_type": "code",
110-
"execution_count": null,
174+
"execution_count": 6,
111175
"metadata": {},
112-
"outputs": [],
176+
"outputs": [
177+
{
178+
"data": {
179+
"text/plain": [
180+
"('/Users/yusuf/.local/share/plinder/2024-06/v2/systems/4agi__1__1.C__1.W/sequences.fasta',\n",
181+
" {'1.C': 'MSTPGAQQVLFRTGIAAVNSTNHLRVYFQDVYGSIRESLYEGSWANGTEKNVIGNAKLGSPVAATSKELKHIRVYTLTEGNTLQEFAYDSGTGWYNGGLGGAKFQVAPYSXIAAVFLAGTDALQLRIYAQKPDNTIQEYMWNGDGWKEGTNLGGALPGTGIGATSFRYTDYNGPSIRIWFQTDDLKLVQRAYDPHKGWYPDLVTIFDRAPPRTAIAATSFGAGNSSIYMRIYFVNSDNTIWQVCWDHGKGYHDKGTITPVIQGSEVAIISWGSFANNGPDLRLYFQNGTYISAVSEWVWNRAHGSQLGRSALPPA'})"
182+
]
183+
},
184+
"execution_count": 6,
185+
"metadata": {},
186+
"output_type": "execute_result"
187+
}
188+
],
113189
"source": [
114190
"plinder_system.sequences_fasta, plinder_system.sequences"
115191
]
@@ -127,18 +203,145 @@
127203
},
128204
{
129205
"cell_type": "code",
130-
"execution_count": null,
206+
"execution_count": 7,
131207
"metadata": {},
132-
"outputs": [],
208+
"outputs": [
209+
{
210+
"name": "stderr",
211+
"output_type": "stream",
212+
"text": [
213+
"2024-09-25 21:58:20,579 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 2.01s\n",
214+
"2024-09-25 21:58:20,962 | plinder.core.scores.links.query_links:24 | INFO : runtime succeeded: 2.54s\n"
215+
]
216+
}
217+
],
133218
"source": [
134219
"link_info = plinder_system.linked_structures"
135220
]
136221
},
137222
{
138223
"cell_type": "code",
139-
"execution_count": null,
224+
"execution_count": 8,
140225
"metadata": {},
141-
"outputs": [],
226+
"outputs": [
227+
{
228+
"data": {
229+
"text/html": [
230+
"<div>\n",
231+
"<style scoped>\n",
232+
" .dataframe tbody tr th:only-of-type {\n",
233+
" vertical-align: middle;\n",
234+
" }\n",
235+
"\n",
236+
" .dataframe tbody tr th {\n",
237+
" vertical-align: top;\n",
238+
" }\n",
239+
"\n",
240+
" .dataframe thead th {\n",
241+
" text-align: right;\n",
242+
" }\n",
243+
"</style>\n",
244+
"<table border=\"1\" class=\"dataframe\">\n",
245+
" <thead>\n",
246+
" <tr style=\"text-align: right;\">\n",
247+
" <th></th>\n",
248+
" <th>id</th>\n",
249+
" <th>pocket_fident</th>\n",
250+
" <th>lddt</th>\n",
251+
" <th>bb_lddt</th>\n",
252+
" <th>lddt_lp_ave</th>\n",
253+
" <th>lddt_pli_ave</th>\n",
254+
" <th>bisy_rmsd_ave</th>\n",
255+
" <th>sort_score</th>\n",
256+
" <th>kind</th>\n",
257+
" </tr>\n",
258+
" </thead>\n",
259+
" <tbody>\n",
260+
" <tr>\n",
261+
" <th>0</th>\n",
262+
" <td>4uou_B</td>\n",
263+
" <td>100.0</td>\n",
264+
" <td>0.972682</td>\n",
265+
" <td>0.994065</td>\n",
266+
" <td>0.987813</td>\n",
267+
" <td>0.989777</td>\n",
268+
" <td>0.159702</td>\n",
269+
" <td>2.40</td>\n",
270+
" <td>apo</td>\n",
271+
" </tr>\n",
272+
" <tr>\n",
273+
" <th>1</th>\n",
274+
" <td>4uou_C</td>\n",
275+
" <td>100.0</td>\n",
276+
" <td>0.973562</td>\n",
277+
" <td>0.994687</td>\n",
278+
" <td>0.967287</td>\n",
279+
" <td>0.951068</td>\n",
280+
" <td>0.194233</td>\n",
281+
" <td>2.40</td>\n",
282+
" <td>apo</td>\n",
283+
" </tr>\n",
284+
" <tr>\n",
285+
" <th>2</th>\n",
286+
" <td>4uou_D</td>\n",
287+
" <td>100.0</td>\n",
288+
" <td>0.973604</td>\n",
289+
" <td>0.994235</td>\n",
290+
" <td>0.972579</td>\n",
291+
" <td>0.973048</td>\n",
292+
" <td>0.101252</td>\n",
293+
" <td>2.40</td>\n",
294+
" <td>apo</td>\n",
295+
" </tr>\n",
296+
" <tr>\n",
297+
" <th>3</th>\n",
298+
" <td>4uou_A</td>\n",
299+
" <td>100.0</td>\n",
300+
" <td>0.967257</td>\n",
301+
" <td>0.994800</td>\n",
302+
" <td>0.976908</td>\n",
303+
" <td>0.963504</td>\n",
304+
" <td>0.214243</td>\n",
305+
" <td>2.40</td>\n",
306+
" <td>apo</td>\n",
307+
" </tr>\n",
308+
" <tr>\n",
309+
" <th>4</th>\n",
310+
" <td>Q4WW81_A</td>\n",
311+
" <td>100.0</td>\n",
312+
" <td>0.982275</td>\n",
313+
" <td>0.998587</td>\n",
314+
" <td>0.999679</td>\n",
315+
" <td>0.997273</td>\n",
316+
" <td>0.126228</td>\n",
317+
" <td>98.57</td>\n",
318+
" <td>pred</td>\n",
319+
" </tr>\n",
320+
" </tbody>\n",
321+
"</table>\n",
322+
"</div>"
323+
],
324+
"text/plain": [
325+
" id pocket_fident lddt bb_lddt lddt_lp_ave lddt_pli_ave \\\n",
326+
"0 4uou_B 100.0 0.972682 0.994065 0.987813 0.989777 \n",
327+
"1 4uou_C 100.0 0.973562 0.994687 0.967287 0.951068 \n",
328+
"2 4uou_D 100.0 0.973604 0.994235 0.972579 0.973048 \n",
329+
"3 4uou_A 100.0 0.967257 0.994800 0.976908 0.963504 \n",
330+
"4 Q4WW81_A 100.0 0.982275 0.998587 0.999679 0.997273 \n",
331+
"\n",
332+
" bisy_rmsd_ave sort_score kind \n",
333+
"0 0.159702 2.40 apo \n",
334+
"1 0.194233 2.40 apo \n",
335+
"2 0.101252 2.40 apo \n",
336+
"3 0.214243 2.40 apo \n",
337+
"4 0.126228 98.57 pred "
338+
]
339+
},
340+
"execution_count": 8,
341+
"metadata": {},
342+
"output_type": "execute_result"
343+
}
344+
],
142345
"source": [
143346
"link_info[\n",
144347
" [\n",

src/plinder/core/loader/dataset.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from torch.utils.data import DataLoader, Dataset
1010

1111
from plinder.core.index.system import PlinderSystem
12-
from plinder.core.loader.featurizer import structure_featurizer
12+
from plinder.core.loader.featurizer import system_featurizer
1313
from plinder.core.loader.utils import collate_batch
1414
from plinder.core.scores import query_index
1515
from plinder.core.scores.query import FILTERS
@@ -32,8 +32,8 @@ class PlinderDataset(Dataset): # type: ignore
3232
use_alternate_structures: bool, default=True
3333
Whether to load alternate structures
3434
featurizer: Callable[
35-
[Structure, int], dict[str, torch.Tensor]
36-
] = structure_featurizer,
35+
[PlinderSystem, int], dict[str, dict[str, torch.Tensor]]
36+
] = system_featurizer,
3737
Transformation to turn structure to input tensors
3838
"""
3939

@@ -43,8 +43,9 @@ def __init__(
4343
filters: FILTERS = None,
4444
use_alternate_structures: bool = True,
4545
featurizer: Callable[
46-
[Structure], torch.Tensor | dict[str, torch.Tensor]
47-
] = structure_featurizer,
46+
[PlinderSystem, int, bool],
47+
torch.Tensor | dict[str, dict[str, torch.Tensor]],
48+
] = system_featurizer,
4849
**kwargs: Any,
4950
):
5051
index = query_index(splits=[split], filters=filters)
@@ -68,14 +69,14 @@ def __getitem__(
6869
holo_structure = s.holo_structure
6970
features_and_coords = None
7071
if self._featurizer is not None:
71-
features_and_coords = self._featurizer(holo_structure)
72+
features_and_coords = self._featurizer(
73+
s,
74+
featurize_apo=self._use_alternate_structures, # type: ignore
75+
)
7276

7377
item: dict[str, Any] = {
7478
"system_id": holo_structure.id,
75-
"holo_structure": holo_structure,
76-
"alternate_structures": s.alternate_structures
77-
if self._use_alternate_structures
78-
else {},
79+
"plinder_system": s,
7980
"features_and_coords": features_and_coords,
8081
"path": s.system_cif,
8182
}

0 commit comments

Comments
 (0)