Skip to content

Commit a527ba2

Browse files
author
Sergey Feldman
committed
try a better svm?"
1 parent a4313e4 commit a527ba2

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

01_compare_baseline_models.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
from sklearn.ensemble import RandomForestClassifier
1212
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold
1313
from sklearn.preprocessing import MinMaxScaler
14-
from sklearn.ensemble import BaggingClassifier
1514
from sklearn.pipeline import Pipeline
16-
from sklearn.svm import SVC
15+
from sklearn.svm import SVC, LinearSVC
1716
from utils import load_data
1817

1918

@@ -27,20 +26,24 @@
2726
database = database[database.nrow >= 50]
2827

2928

30-
def evaluate_pipeline_helper(X, y, pipeline, param_grid, random_state=0):
29+
def evaluate_pipeline_helper(X, y, pipeline, param_grid, scoring="roc_auc_ovr_weighted", random_state=0):
3130
inner_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
3231
outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
33-
clf = GridSearchCV(
34-
estimator=pipeline, param_grid=param_grid, cv=inner_cv, scoring="roc_auc_ovr_weighted", n_jobs=N_JOBS
35-
)
36-
nested_score = cross_val_score(clf, X=X, y=y, cv=outer_cv, scoring="roc_auc_ovr_weighted", n_jobs=N_JOBS)
32+
clf = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=inner_cv, scoring=scoring, n_jobs=N_JOBS)
33+
nested_score = cross_val_score(clf, X=X, y=y, cv=outer_cv, scoring=scoring, n_jobs=N_JOBS)
3734
return nested_score
3835

3936

4037
def define_and_evaluate_pipelines(X, y, random_state=0):
4138
# LinearSVC
4239
pipeline1 = Pipeline(
43-
[("scaler", MinMaxScaler()), ("svc", SVC(kernel="linear", probability=True, random_state=random_state))]
40+
[
41+
("scaler", MinMaxScaler()),
42+
(
43+
"svc",
44+
SVC(kernel="linear", class_weight="balanced", probability=True, tol=1e-4, random_state=random_state),
45+
),
46+
]
4447
)
4548
param_grid1 = {
4649
"svc__C": [1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2],
@@ -97,6 +100,7 @@ def define_and_evaluate_pipelines(X, y, random_state=0):
97100
evaluated_datasets.append(dataset_name)
98101
times.append(elapsed)
99102
print("done. elapsed:", elapsed)
103+
print("scores:", np.mean(nested_scores1), np.mean(nested_scores2), np.mean(nested_scores3))
100104

101105
#
102106
results1 = np.array(results1)

03_autogluon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
import autogluon as ag
66
from autogluon import TabularPrediction as task
7-
from sklearn.model_selection import cross_val_score, StratifiedKFold
7+
from sklearn.model_selection import StratifiedKFold
88
from sklearn.metrics import roc_auc_score
99
from utils import load_data
1010

0 commit comments

Comments
 (0)