|
81 | 81 | "name": "stderr", |
82 | 82 | "output_type": "stream", |
83 | 83 | "text": [ |
84 | | - "<frozen importlib._bootstrap>:241: RuntimeWarning: to-Python converter for boost::shared_ptr<RDKit::FilterHierarchyMatcher> already registered; second conversion method ignored.\n", |
85 | | - "<frozen importlib._bootstrap>:241: RuntimeWarning: to-Python converter for boost::shared_ptr<RDKit::FilterCatalogEntry> already registered; second conversion method ignored.\n" |
| 84 | + "<frozen importlib._bootstrap>:241: RuntimeWarning: to-Python converter for boost::shared_ptr<RDKit::FilterHierarchyMatcher> already registered; second conversion method ignored.\n" |
86 | 85 | ] |
87 | 86 | } |
88 | 87 | ], |
|
108 | 107 | }, |
109 | 108 | { |
110 | 109 | "cell_type": "code", |
111 | | - "execution_count": 5, |
| 110 | + "execution_count": 2, |
112 | 111 | "id": "0f8eefe0", |
113 | 112 | "metadata": {}, |
114 | 113 | "outputs": [], |
|
123 | 122 | }, |
124 | 123 | { |
125 | 124 | "cell_type": "code", |
126 | | - "execution_count": 6, |
| 125 | + "execution_count": 3, |
127 | 126 | "id": "3137351a", |
128 | 127 | "metadata": {}, |
129 | 128 | "outputs": [], |
|
136 | 135 | }, |
137 | 136 | { |
138 | 137 | "cell_type": "code", |
139 | | - "execution_count": 7, |
| 138 | + "execution_count": 4, |
140 | 139 | "id": "ab862891", |
141 | 140 | "metadata": {}, |
142 | 141 | "outputs": [ |
|
149 | 148 | " 'C[C@H](CCC(=O)O)CC1=CC=CC=C1']" |
150 | 149 | ] |
151 | 150 | }, |
152 | | - "execution_count": 7, |
| 151 | + "execution_count": 4, |
153 | 152 | "metadata": {}, |
154 | 153 | "output_type": "execute_result" |
155 | 154 | } |
|
161 | 160 | }, |
162 | 161 | { |
163 | 162 | "cell_type": "code", |
164 | | - "execution_count": 8, |
| 163 | + "execution_count": 5, |
165 | 164 | "id": "93f94568", |
166 | 165 | "metadata": {}, |
167 | 166 | "outputs": [ |
|
174 | 173 | " 'C[C@H](CCC(=O)O)CC1=CC=CC=C1']" |
175 | 174 | ] |
176 | 175 | }, |
177 | | - "execution_count": 8, |
| 176 | + "execution_count": 5, |
178 | 177 | "metadata": {}, |
179 | 178 | "output_type": "execute_result" |
180 | 179 | } |
|
186 | 185 | }, |
187 | 186 | { |
188 | 187 | "cell_type": "code", |
189 | | - "execution_count": 9, |
| 188 | + "execution_count": 6, |
190 | 189 | "id": "cab9fc95", |
191 | 190 | "metadata": {}, |
192 | 191 | "outputs": [ |
|
199 | 198 | " 'C[C@H](CCC(=O)O)CC1=CC=CC=C1']" |
200 | 199 | ] |
201 | 200 | }, |
202 | | - "execution_count": 9, |
| 201 | + "execution_count": 6, |
203 | 202 | "metadata": {}, |
204 | 203 | "output_type": "execute_result" |
205 | 204 | } |
|
211 | 210 | }, |
212 | 211 | { |
213 | 212 | "cell_type": "code", |
214 | | - "execution_count": 10, |
| 213 | + "execution_count": 7, |
215 | 214 | "id": "19a21c0e", |
216 | 215 | "metadata": {}, |
217 | 216 | "outputs": [ |
|
221 | 220 | "['CC(=O)OC1=CC=CC=C1C(=O)O', 'C[C@H](CCC(=O)O)CC1=CC=CC=C1']" |
222 | 221 | ] |
223 | 222 | }, |
224 | | - "execution_count": 10, |
| 223 | + "execution_count": 7, |
225 | 224 | "metadata": {}, |
226 | 225 | "output_type": "execute_result" |
227 | 226 | } |
|
241 | 240 | }, |
242 | 241 | { |
243 | 242 | "cell_type": "code", |
244 | | - "execution_count": 14, |
| 243 | + "execution_count": 8, |
245 | 244 | "id": "02a47287", |
246 | 245 | "metadata": {}, |
247 | | - "outputs": [], |
| 246 | + "outputs": [ |
| 247 | + { |
| 248 | + "name": "stderr", |
| 249 | + "output_type": "stream", |
| 250 | + "text": [ |
| 251 | + "/home/jakub/PycharmProjects/scikit-fingerprints/skfp/bases/base_filter.py:109: UserWarning: return_indicators is deprecated and will be removed in 2.0, use return_type instead\n", |
| 252 | + " warnings.warn(\n" |
| 253 | + ] |
| 254 | + } |
| 255 | + ], |
248 | 256 | "source": [ |
249 | 257 | "mw_mask = MolecularWeightFilter(return_indicators=True)\n", |
250 | 258 | "lipinski_mask = LipinskiFilter(return_indicators=True)\n", |
|
254 | 262 | }, |
255 | 263 | { |
256 | 264 | "cell_type": "code", |
257 | | - "execution_count": 15, |
| 265 | + "execution_count": 9, |
258 | 266 | "id": "4d53f1e4", |
259 | 267 | "metadata": {}, |
260 | 268 | "outputs": [], |
|
272 | 280 | }, |
273 | 281 | { |
274 | 282 | "cell_type": "code", |
275 | | - "execution_count": 16, |
| 283 | + "execution_count": 10, |
276 | 284 | "id": "307f6e05", |
277 | 285 | "metadata": {}, |
278 | 286 | "outputs": [], |
|
285 | 293 | }, |
286 | 294 | { |
287 | 295 | "cell_type": "code", |
288 | | - "execution_count": 17, |
| 296 | + "execution_count": 11, |
289 | 297 | "id": "11970880", |
290 | 298 | "metadata": {}, |
291 | 299 | "outputs": [ |
|
384 | 392 | "4 True False False " |
385 | 393 | ] |
386 | 394 | }, |
387 | | - "execution_count": 17, |
| 395 | + "execution_count": 11, |
388 | 396 | "metadata": {}, |
389 | 397 | "output_type": "execute_result" |
390 | 398 | } |
|
405 | 413 | "cell_type": "markdown", |
406 | 414 | "id": "a581bb4f-ba1e-4d96-afcc-c219afd442ac", |
407 | 415 | "metadata": {}, |
408 | | - "source": "Substructural filters use sets of SMARTS patterns to define unwanted substructures. An example is Brenk filter ([docs](https://scikit-fingerprints.readthedocs.io/latest/modules/generated/skfp.filters.BrenkFilter.html)), designed to filter out molecules containing substructures with undesirable pharmacokinetics or toxicity, e.g. sulfates, phosphates, nitro groups. Other filters from this group often work based on similar principles, but differing in how aggressively they filter the molecules." |
| 416 | + "source": [ |
| 417 | + "Substructural filters use sets of SMARTS patterns to define unwanted substructures. An example is Brenk filter ([docs](https://scikit-fingerprints.readthedocs.io/latest/modules/generated/skfp.filters.BrenkFilter.html)), designed to filter out molecules containing substructures with undesirable pharmacokinetics or toxicity, e.g. sulfates, phosphates, nitro groups. Other filters from this group often work based on similar principles, but differing in how aggressively they filter the molecules." |
| 418 | + ] |
409 | 419 | }, |
410 | 420 | { |
411 | 421 | "cell_type": "code", |
412 | | - "execution_count": 84, |
| 422 | + "execution_count": 12, |
413 | 423 | "id": "b0285ab1", |
414 | 424 | "metadata": { |
415 | 425 | "scrolled": true |
416 | 426 | }, |
| 427 | + "outputs": [], |
| 428 | + "source": [ |
| 429 | + "from skfp.filters import BrenkFilter\n", |
| 430 | + "\n", |
| 431 | + "brenk_filter = BrenkFilter()" |
| 432 | + ] |
| 433 | + }, |
| 434 | + { |
| 435 | + "cell_type": "markdown", |
| 436 | + "id": "9af3ce4f", |
| 437 | + "metadata": {}, |
| 438 | + "source": [ |
| 439 | + "Meanings of filter conditions are available through `.get_feature_names_out()` method. Generally, they are interpretable names given by creators, rather than raw SMARTS patterns." |
| 440 | + ] |
| 441 | + }, |
| 442 | + { |
| 443 | + "cell_type": "code", |
| 444 | + "execution_count": 13, |
| 445 | + "id": "94b2989e-f6ac-46a7-b1d6-17662b8a4579", |
| 446 | + "metadata": { |
| 447 | + "scrolled": true |
| 448 | + }, |
417 | 449 | "outputs": [ |
418 | 450 | { |
419 | 451 | "data": { |
420 | 452 | "text/plain": [ |
421 | | - "['description', 'FilterSet', 'Reference', 'Scope']" |
| 453 | + "array(['>_2_ester_groups', '2-halo_pyridine', 'acid_halide',\n", |
| 454 | + " 'acyclic_C=C-O', 'acyl_cyanide', 'acyl_hydrazine', 'aldehyde',\n", |
| 455 | + " 'Aliphatic_long_chain', 'alkyl_halide', 'amidotetrazole',\n", |
| 456 | + " 'aniline', 'azepane', 'Azido_group', 'Azo_group', 'azocane',\n", |
| 457 | + " 'benzidine', 'beta-keto/anhydride', 'biotin_analogue',\n", |
| 458 | + " 'Carbo_cation/anion', 'catechol', 'charged_oxygen_or_sulfur_atoms',\n", |
| 459 | + " 'chinone_1', 'chinone_2', 'conjugated_nitrile_group',\n", |
| 460 | + " 'crown_ether', 'cumarine', 'cyanamide',\n", |
| 461 | + " 'cyanate_/aminonitrile_/thiocyanate', 'cyanohydrins',\n", |
| 462 | + " 'cycloheptane_1', 'cycloheptane_2', 'cyclooctane_1',\n", |
| 463 | + " 'cyclooctane_2', 'diaminobenzene_1', 'diaminobenzene_2',\n", |
| 464 | + " 'diaminobenzene_3', 'diazo_group', 'diketo_group', 'disulphide',\n", |
| 465 | + " 'enamine', 'ester_of_HOBT', 'four_member_lactones',\n", |
| 466 | + " 'halogenated_ring_1', 'halogenated_ring_2', 'heavy_metal',\n", |
| 467 | + " 'het-C-het_not_in_ring', 'hydantoin', 'hydrazine', 'hydroquinone',\n", |
| 468 | + " 'hydroxamic_acid', 'imine_1', 'imine_2', 'iodine', 'isocyanate',\n", |
| 469 | + " 'isolated_alkene', 'ketene', 'methylidene-1,3-dithiole',\n", |
| 470 | + " 'Michael_acceptor_1', 'Michael_acceptor_2', 'Michael_acceptor_3',\n", |
| 471 | + " 'Michael_acceptor_4', 'Michael_acceptor_5', 'N_oxide',\n", |
| 472 | + " 'N-acyl-2-amino-5-mercapto-1,3,4-_thiadiazole', 'N-C-halo',\n", |
| 473 | + " 'N-halo', 'N-hydroxyl_pyridine', 'nitro_group', 'N-nitroso',\n", |
| 474 | + " 'oxime_1', 'oxime_2', 'Oxygen-nitrogen_single_bond',\n", |
| 475 | + " 'Perfluorinated_chain', 'peroxide', 'phenol_ester',\n", |
| 476 | + " 'phenyl_carbonate', 'phosphor', 'phthalimide',\n", |
| 477 | + " 'Polycyclic_aromatic_hydrocarbon_1',\n", |
| 478 | + " 'Polycyclic_aromatic_hydrocarbon_2',\n", |
| 479 | + " 'Polycyclic_aromatic_hydrocarbon_3', 'polyene',\n", |
| 480 | + " 'quaternary_nitrogen_1', 'quaternary_nitrogen_2',\n", |
| 481 | + " 'quaternary_nitrogen_3', 'saponine_derivative', 'silicon_halogen',\n", |
| 482 | + " 'stilbene', 'sulfinic_acid', 'Sulfonic_acid_1', 'Sulfonic_acid_2',\n", |
| 483 | + " 'sulfonyl_cyanide', 'sulfur_oxygen_single_bond', 'sulphate',\n", |
| 484 | + " 'sulphur_nitrogen_single_bond', 'Thiobenzothiazole_1',\n", |
| 485 | + " 'thiobenzothiazole_2', 'Thiocarbonyl_group', 'thioester',\n", |
| 486 | + " 'thiol_1', 'thiol_2', 'Three-membered_heterocycle', 'triflate',\n", |
| 487 | + " 'triphenyl_methyl-silyl', 'triple_bond'], dtype='<U44')" |
422 | 488 | ] |
423 | 489 | }, |
424 | | - "execution_count": 84, |
| 490 | + "execution_count": 13, |
425 | 491 | "metadata": {}, |
426 | 492 | "output_type": "execute_result" |
427 | 493 | } |
428 | 494 | ], |
429 | 495 | "source": [ |
430 | | - "from skfp.filters import BrenkFilter\n", |
431 | | - "\n", |
432 | | - "brenk_filter = BrenkFilter()" |
| 496 | + "brenk_filter.get_feature_names_out()" |
433 | 497 | ] |
434 | 498 | }, |
435 | 499 | { |
436 | 500 | "cell_type": "markdown", |
437 | | - "id": "9af3ce4f", |
| 501 | + "id": "6c6f153b-a8c9-4118-b51d-8f164f47c5d8", |
438 | 502 | "metadata": {}, |
439 | 503 | "source": [ |
440 | | - "Patterns are saved in `._filters` attribute of the filter object. They are represented using RDKit `FilterCatalog` objects, which are quite efficient in checking the patterns, but make it challenging to inspect SMARTS patterns from Python. The first few patterns from BRENK and their meanings are:" |
| 504 | + "Underlying SMARTS patterns are represented using RDKit `FilterCatalog` objects, which are quite efficient in checking the patterns. Unfortunately, getting the actual SMARTS strings in Python is challenging, and more easily inferred from RDKit files. The first few patterns from BRENK and their meanings are:" |
441 | 505 | ] |
442 | 506 | }, |
443 | 507 | { |
|
0 commit comments