|
1 | 1 | import numpy as np |
2 | 2 | import skfp.fingerprints as fps |
3 | | -from ogb.graphproppred import GraphPropPredDataset |
4 | 3 | from rdkit.Chem import Mol |
5 | 4 | from skfp.bases import BaseFingerprintTransformer |
6 | | -from skfp.datasets.moleculenet import load_moleculenet_benchmark |
| 5 | +from skfp.datasets.moleculenet import load_moleculenet_benchmark, load_ogb_splits |
7 | 6 | from skfp.preprocessing import MolFromSmilesTransformer |
8 | 7 | from skfp.utils import no_rdkit_logs |
9 | 8 | from sklearn.ensemble import RandomForestClassifier |
@@ -42,7 +41,7 @@ def fp_name_to_fp(fp_name: str) -> tuple[BaseFingerprintTransformer, dict]: |
42 | 41 | fingerprint = fps.EStateFingerprint(n_jobs=-1) |
43 | 42 | fp_params_grid = {"variant": ["sum", "bit", "count"]} |
44 | 43 | elif fp_name == "FCFP": |
45 | | - fingerprint = fps.ECFPFingerprint(use_fcfp=True, n_jobs=-1) |
| 44 | + fingerprint = fps.ECFPFingerprint(use_pharmacophoric_invariants=True, n_jobs=-1) |
46 | 45 | fp_params_grid = { |
47 | 46 | "fp_size": [1024, 2048, 4096], |
48 | 47 | "radius": [2, 3], |
@@ -78,7 +77,7 @@ def fp_name_to_fp(fp_name: str) -> tuple[BaseFingerprintTransformer, dict]: |
78 | 77 | fp_params_grid = { |
79 | 78 | "fp_size": [512, 1024, 2048], |
80 | 79 | "radius": [2, 3], |
81 | | - "variant": ["bit", "count"], |
| 80 | + "count": [False, True], |
82 | 81 | } |
83 | 82 | elif fp_name == "Pattern": |
84 | 83 | fingerprint = fps.PatternFingerprint() |
@@ -157,13 +156,9 @@ def train_and_tune_fp_classifier( |
157 | 156 | print("DATASET", dataset_name) |
158 | 157 | X = np.array(X) |
159 | 158 |
|
160 | | - dataset = GraphPropPredDataset( |
161 | | - name=f"ogbg-mol{dataset_name.lower()}", root=".tmp" |
162 | | - ) |
163 | | - split_idx = dataset.get_idx_split() |
| 159 | + train_idxs, valid_idxs, test_idxs = load_ogb_splits(dataset_name) |
164 | 160 |
|
165 | | - train_idxs = list(split_idx["train"]) + list(split_idx["valid"]) |
166 | | - test_idxs = list(split_idx["test"]) |
| 161 | + train_idxs = list(train_idxs) + list(valid_idxs) |
167 | 162 |
|
168 | 163 | smiles_train = X[train_idxs] |
169 | 164 | smiles_test = X[test_idxs] |
@@ -206,6 +201,7 @@ def train_and_tune_fp_classifier( |
206 | 201 | fp=fp, |
207 | 202 | fp_params_grid=fp_params_grid, |
208 | 203 | ) |
| 204 | + |
209 | 205 | print( |
210 | 206 | f"AUROC default {auroc_default:.1%}, tuned {auroc_tuned:.1%}, diff: {diff:.1%}" |
211 | 207 | ) |
0 commit comments