-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmetrics.py
More file actions
180 lines (132 loc) · 4.33 KB
/
metrics.py
File metadata and controls
180 lines (132 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
Custom metrics.
"""
from collections import OrderedDict
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
def confusion_dataframe(y_true, y_pred,
columns=['P', 'N', 'PP', 'NP', 'TP', 'TN', 'FP', 'FN'],
orderby='PP'):
"""Builds a confusion dataframe.
Each row corresponds to a unique method name X. Values of the row
contain elements of the confusion matrix of binary classification
with labels ["X", "not X"]. The condition for this classification
is "name == X".
P - condition positive
N - condition negative
PP - Predicted condition positive
PN - Predicted condition negative
TP - True positives
TN - True negatives
FP - False positives (type I error)
FN - False negatives (type II error)
More information here: https://en.wikipedia.org/wiki/Confusion_matrix
Parameters
----------
y_true : array-like
True labels
y_pred : array-like
Predicted labels
columns : array-like
List of columns to be included in a dataframe in a specified order
orderby : str or array-like
Column name or list of names to specify the column(s) by which
the dataframe should be ordered.
Returns
-------
confusion_df : pd.DataFrame
Confusion dataframe
Examples
--------
>>> from metrics import confusion_dataframe
>>> y_true = ['cat', 'dog', 'mouse', 'cat']
>>> y_pred = ['cat', 'mouse', 'dog', 'dog']
>>> confusion_dataframe(y_true)
P N PP NP TP TN FP FN
dog 1 3 2 2 0 1 2 1
cat 2 2 1 3 1 2 0 1
mouse 1 3 1 3 0 2 1 1
Order by true positives and then predicted positives and show only
false negatives and false positives in the specified order.
>>> confusion_dataframe(y_true, y_pred, columns=['FN', 'FP'], orderby=['TP', 'PP'])
FN FP
cat 1 0
dog 1 2
mouse 1 1
"""
# Converting all labels to str
y_true = np.array(y_true, dtype=str)
y_pred = np.array(y_pred, dtype=str)
confusion = confusion_matrix(y_true, y_pred)
labels = unique_labels(y_true, y_pred)
P = confusion.sum(axis=1)
PP = confusion.sum(axis=0)
TP = confusion.diagonal()
N = len(y_true) - P
NP = len(y_pred) - PP
FP = PP - TP
TN = N - FP
FN = NP - TN
confusion_df = pd.DataFrame(OrderedDict([
('name', labels),
('P', P),
('N', N),
('PP', PP),
('NP', NP),
('TP', TP),
('TN', TN),
('FP', FP),
('FN', FN)
]))
confusion_df = confusion_df.set_index('name')
del confusion_df.index.name
return confusion_df.sort_values(orderby, ascending=False)[columns]
def bleu(reference, candidate):
"""Calculates the BLEU score of candidate sentence.
Candidate sentence is compared to reference sentence using a modified
form of precision:
BLEU = m / w
Where m is the number of words in the candidate that were found in
reference and w is the total number of words in the candidate.
More information here: https://en.wikipedia.org/wiki/BLEU
Parameters
----------
reference : array-like
A list of words of a reference sentence. The true sentence that
is considred the ground truth.
candidate : array-like
A list of words of a candidate sentence. A sentence generated by
the algorithm that needs to be evaluated.
Returns
-------
bleu : float
BLEU score
Examples
--------
>>> from metrics import bleu
>>> reference = ['test', 'basic']
>>> candidate = ['test', 'add']
>>> bleu(reference, candidate)
0.5
"""
m = len([word for word in candidate if word in set(reference)])
w = len(candidate)
if w == 0:
w = 0.000000001
return m / w
def rouge(reference, candidate):
m = len([word for word in candidate if word in set(reference)])
w = len(reference)
if w == 0:
w = 0.000000001
return m / w
def f1_score(reference, candidate):
precision = bleu(reference, candidate)
recall = rouge(reference, candidate)
m = 2 * (precision * recall)
w = precision + recall
if w == 0:
w = 0.000000001
return m / w