-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLR_4_task_5.py
More file actions
119 lines (90 loc) · 5.46 KB
/
LR_4_task_5.py
File metadata and controls
119 lines (90 loc) · 5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, roc_curve, roc_auc_score
import matplotlib.pyplot as plt
df = pd.read_csv('data_metrics.csv')
thresh = 0.5
df['predicted_RF'] = (df.model_RF >= 0.5).astype('int')
df['predicted_LR'] = (df.model_LR >= 0.5).astype('int')
df.head()
def find_TP(y_true, y_pred):
return sum((y_true == 1) & (y_pred == 1))
def find_FN(y_true, y_pred):
return sum((y_true == 1) & (y_pred == 0))
def find_FP(y_true, y_pred):
return sum((y_true == 0) & (y_pred == 1))
def find_TN(y_true, y_pred):
return sum((y_true == 0) & (y_pred == 0))
def find_conf_matrix_values(y_true, y_pred):
TP = find_TP(y_true, y_pred)
FN = find_FN(y_true, y_pred)
FP = find_FP(y_true, y_pred)
TN = find_TN(y_true, y_pred)
return TP,FN,FP,TN
def benediuk_confusion_matrix(y_true, y_pred):
TP, FN, FP, TN = find_conf_matrix_values(y_true, y_pred)
return np.array([[TN, FP], [FN, TP]])
assert np.array_equal(
benediuk_confusion_matrix(df.actual_label.values, df.predicted_RF.values),
confusion_matrix(df.actual_label.values, df.predicted_RF.values)
), 'benediuk_confusion_matrix() is not correct for RF'
assert np.array_equal(
benediuk_confusion_matrix(df.actual_label.values, df.predicted_LR.values),
confusion_matrix(df.actual_label.values, df.predicted_LR.values)
), 'benediuk_confusion_matrix() is not correct for LR'
def benediuk_accuracy_score(y_true, y_pred):
TP, FN, FP, TN = find_conf_matrix_values(y_true, y_pred)
return (TP + TN) / (TP + FN + FP + TN)
def benediuk_recall_score(y_true, y_pred):
TP, FN, FP, TN = find_conf_matrix_values(y_true, y_pred)
return TP / (TP + FN)
assert benediuk_accuracy_score(df.actual_label.values, df.predicted_RF.values) == accuracy_score(df.actual_label.values, df.predicted_RF.values), 'my_accuracy_score failed on RF'
assert benediuk_accuracy_score(df.actual_label.values, df.predicted_LR.values) == accuracy_score(df.actual_label.values, df.predicted_LR.values), 'my_accuracy_score failed on LR'
assert benediuk_recall_score(df.actual_label.values, df.predicted_RF.values) == recall_score(df.actual_label.values, df.predicted_RF.values), 'my_recall_score failed on RF'
assert benediuk_recall_score(df.actual_label.values, df.predicted_LR.values) == recall_score(df.actual_label.values, df.predicted_LR.values), 'my_recall_score failed on LR'
def benediuk_precision_score(y_true, y_pred):
TP, FN, FP, TN = find_conf_matrix_values(y_true, y_pred)
if TP + FP == 0:
return 0.0
return TP / (TP + FP)
assert benediuk_precision_score(df.actual_label.values, df.predicted_RF.values) == precision_score(df.actual_label.values, df.predicted_RF.values), 'my_precision_score failed on RF'
assert benediuk_precision_score(df.actual_label.values, df.predicted_LR.values) == precision_score(df.actual_label.values, df.predicted_LR.values), 'my_precision_score failed on LR'
def benediuk_f1_score(y_true, y_pred):
recall = benediuk_recall_score(y_true, y_pred)
precision = benediuk_precision_score(y_true, y_pred)
if precision + recall == 0:
return 0.0
return 2 * (precision * recall) / (precision + recall)
assert np.isclose(benediuk_f1_score(df.actual_label.values, df.predicted_RF.values),
f1_score(df.actual_label.values, df.predicted_RF.values)), 'my_f1_score failed on RF'
assert np.isclose(benediuk_f1_score(df.actual_label.values, df.predicted_LR.values),
f1_score(df.actual_label.values, df.predicted_LR.values)), 'my_f1_score failed on LR'
fpr_RF, tpr_RF, thresholds_RF = roc_curve(df.actual_label.values, df.model_RF.values)
auc_RF = roc_auc_score(df.actual_label.values, df.model_RF.values)
fpr_LR, tpr_LR, thresholds_LR = roc_curve(df.actual_label.values, df.model_LR.values)
auc_LR = roc_auc_score(df.actual_label.values, df.model_LR.values)
plt.plot(fpr_RF, tpr_RF, 'r-', label='RF AUC: %.3f' % auc_RF)
plt.plot(fpr_LR, tpr_LR, 'b-', label='LR AUC: %.3f' % auc_LR)
plt.plot([0,1], [0,1], 'k-', label='random')
plt.plot([0,0,1,1], [0,1,1,1], 'g-', label='perfect')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()
print("Confusion matrix (sklearn, RF):")
print(confusion_matrix(df.actual_label.values, df.predicted_RF.values))
print('TP:', find_TP(df.actual_label.values, df.predicted_RF.values))
print('FN:', find_FN(df.actual_label.values, df.predicted_RF.values))
print('FP:', find_FP(df.actual_label.values, df.predicted_RF.values))
print('TN:', find_TN(df.actual_label.values, df.predicted_RF.values))
print("My confusion matrix (RF):")
print(benediuk_confusion_matrix(df.actual_label.values, df.predicted_RF.values))
print('Accuracy RF: %.3f' % benediuk_accuracy_score(df.actual_label.values, df.predicted_RF.values))
print('Accuracy LR: %.3f' % benediuk_accuracy_score(df.actual_label.values, df.predicted_LR.values))
print('Recall RF: %.3f' % benediuk_recall_score(df.actual_label.values, df.predicted_RF.values))
print('Recall LR: %.3f' % benediuk_recall_score(df.actual_label.values, df.predicted_LR.values))
print('Precision RF: %.3f' % benediuk_precision_score(df.actual_label.values, df.predicted_RF.values))
print('Precision LR: %.3f' % benediuk_precision_score(df.actual_label.values, df.predicted_LR.values))
print('F1 RF: %.3f' % benediuk_f1_score(df.actual_label.values, df.predicted_RF.values))
print('F1 LR: %.3f' % benediuk_f1_score(df.actual_label.values, df.predicted_LR.values))