-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrandom_forest.py
More file actions
76 lines (74 loc) · 4 KB
/
random_forest.py
File metadata and controls
76 lines (74 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from preprocess import preprocess_data
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, confusion_matrix, roc_curve, roc_auc_score
import csv
file = "Data\healthcare-dataset-stroke-data.csv"
for log in ["True", "False"]:
for max_depth in [10, 15, None]:
for min_samples_split in [2, 5, 10]:
for max_features in ["sqrt", "log2", 0.8, 0.5]:
for bootstrap in [True, False]:
for random_state in [123, 42]:
for n_estimators in [100, 200, 300]:
for num_pca_components in [18, 15, 12, 9, 6, 4]:
for sample in ["SMOTE", "not"]:
(
X_train,
X_test,
Y_train,
Y_test,
) = preprocess_data(
file,
log,
num_pca_components,
)
rf_classifier = RandomForestClassifier(
n_estimators=n_estimators,
max_depth=max_depth,
min_samples_split=min_samples_split,
min_samples_leaf=1,
bootstrap=bootstrap,
random_state=random_state,
max_features=max_features,
)
# Train the model on the training data
rf_classifier.fit(X_train, Y_train)
# Predict on the test data
Y_pred = rf_classifier.predict(X_test)
y_pred_prob = rf_classifier.predict_proba(X_test)[
:, 1
]
fpr, tpr, thresholds = roc_curve(
Y_test, y_pred_prob
)
roc_auc = roc_auc_score(Y_test, y_pred_prob)
pca = (
num_pca_components
if num_pca_components != None
else 0
)
cm = confusion_matrix(Y_test, Y_pred)
print(cm)
f1 = f1_score(Y_test, Y_pred, average="macro")
data = [
f1,
log,
pca,
cm,
n_estimators,
max_depth,
min_samples_split,
max_features,
bootstrap,
random_state,
roc_auc,
fpr,
tpr,
thresholds,
]
with open("data_random_forest.csv", "a") as f:
writer = csv.writer(f)
writer.writerow(data)