Skip to content

Commit 67d75af

Browse files
authored
Add micro and macro metrics #minor (#101)
1 parent a4c79e8 commit 67d75af

File tree

2 files changed

+482
-58
lines changed

2 files changed

+482
-58
lines changed

hiclass/metrics.py

Lines changed: 167 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,81 +9,167 @@ def _validate_input(y_true, y_pred):
99
assert len(y_true) == len(y_pred)
1010
y_pred = make_leveled(y_pred)
1111
y_true = make_leveled(y_true)
12-
y_true = check_array(y_true, dtype=None)
13-
y_pred = check_array(y_pred, dtype=None)
12+
y_true = check_array(y_true, dtype=None, ensure_2d=False, allow_nd=True)
13+
y_pred = check_array(y_pred, dtype=None, ensure_2d=False, allow_nd=True)
1414
return y_true, y_pred
1515

1616

17-
def precision(y_true: np.ndarray, y_pred: np.ndarray):
17+
def precision(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
1818
r"""
19-
Compute precision score for hierarchical classification.
20-
21-
:math:`hP = \displaystyle{\frac{\sum_{i}| \alpha_i \cap \beta_i |}{\sum_{i}| \alpha_i |}}`,
22-
where :math:`\alpha_i` is the set consisting of the most specific classes predicted
23-
for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the
24-
set containing the true most specific classes of test example :math:`i` and all
25-
their ancestors, with summations computed over all test examples.
19+
Compute hierarchical precision score.
2620
2721
Parameters
2822
----------
2923
y_true : np.array of shape (n_samples, n_levels)
3024
Ground truth (correct) labels.
3125
y_pred : np.array of shape (n_samples, n_levels)
3226
Predicted labels, as returned by a classifier.
27+
average: {"micro", "macro"}, str, default="micro"
28+
This parameter determines the type of averaging performed during the computation:
29+
30+
- `micro`: The precision is computed by summing over all individual instances, :math:`\displaystyle{hP = \frac{\sum_{i=1}^{n}| \alpha_i \cap \beta_i |}{\sum_{i=1}^{n}| \alpha_i |}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors, with summations computed over all test examples.
31+
- `macro`: The precision is computed for each instance and then averaged, :math:`\displaystyle{hP = \frac{\sum_{i=1}^{n}hP_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors.
32+
3333
Returns
3434
-------
3535
precision : float
3636
What proportion of positive identifications was actually correct?
3737
"""
3838
y_true, y_pred = _validate_input(y_true, y_pred)
39+
functions = {
40+
"micro": _precision_micro,
41+
"macro": _precision_macro,
42+
}
43+
return functions[average](y_true, y_pred)
44+
45+
46+
def _precision_micro(y_true: np.ndarray, y_pred: np.ndarray):
47+
precision_micro = {
48+
1: _precision_micro_1d,
49+
2: _precision_micro_2d,
50+
3: _precision_micro_3d,
51+
}
52+
return precision_micro[y_true.ndim](y_true, y_pred)
53+
54+
55+
def _precision_micro_1d(y_true: np.ndarray, y_pred: np.ndarray):
56+
sum_intersection = 0
57+
sum_prediction_and_ancestors = 0
58+
for ground_truth, prediction in zip(y_true, y_pred):
59+
ground_truth_set = set([ground_truth])
60+
ground_truth_set.discard("")
61+
predicted_set = set([prediction])
62+
predicted_set.discard("")
63+
sum_intersection = sum_intersection + len(
64+
ground_truth_set.intersection(predicted_set)
65+
)
66+
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(predicted_set)
67+
return sum_intersection / sum_prediction_and_ancestors
68+
69+
70+
def _precision_micro_2d(y_true: np.ndarray, y_pred: np.ndarray):
3971
sum_intersection = 0
4072
sum_prediction_and_ancestors = 0
4173
for ground_truth, prediction in zip(y_true, y_pred):
4274
ground_truth_set = set(ground_truth)
4375
ground_truth_set.discard("")
44-
prediction_set = set(prediction)
45-
prediction_set.discard("")
76+
predicted_set = set(prediction)
77+
predicted_set.discard("")
4678
sum_intersection = sum_intersection + len(
47-
ground_truth_set.intersection(prediction_set)
79+
ground_truth_set.intersection(predicted_set)
4880
)
49-
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(
50-
prediction_set
81+
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(predicted_set)
82+
return sum_intersection / sum_prediction_and_ancestors
83+
84+
85+
def _precision_micro_3d(y_true: np.ndarray, y_pred: np.ndarray):
86+
sum_intersection = 0
87+
sum_prediction_and_ancestors = 0
88+
for row_ground_truth, row_prediction in zip(y_true, y_pred):
89+
ground_truth_set = set()
90+
predicted_set = set()
91+
for ground_truth, prediction in zip(row_ground_truth, row_prediction):
92+
ground_truth_set.update(ground_truth)
93+
predicted_set.update(prediction)
94+
ground_truth_set.discard("")
95+
predicted_set.discard("")
96+
sum_intersection = sum_intersection + len(
97+
ground_truth_set.intersection(predicted_set)
5198
)
52-
precision = sum_intersection / sum_prediction_and_ancestors
53-
return precision
99+
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(predicted_set)
100+
return sum_intersection / sum_prediction_and_ancestors
54101

55102

56-
def recall(y_true: np.ndarray, y_pred: np.ndarray):
57-
r"""
58-
Compute recall score for hierarchical classification.
103+
def _precision_macro(y_true: np.ndarray, y_pred: np.ndarray):
104+
return _compute_macro(y_true, y_pred, _precision_micro)
105+
59106

60-
:math:`\displaystyle{hR = \frac{\sum_i|\alpha_i \cap \beta_i|}{\sum_i|\beta_i|}}`,
61-
where :math:`\alpha_i` is the set consisting of the most specific classes predicted
62-
for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the
63-
set containing the true most specific classes of test example :math:`i` and all
64-
their ancestors, with summations computed over all test examples.
107+
def recall(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
108+
r"""
109+
Compute hierarchical recall score.
65110
66111
Parameters
67112
----------
68113
y_true : np.array of shape (n_samples, n_levels)
69114
Ground truth (correct) labels.
70115
y_pred : np.array of shape (n_samples, n_levels)
71116
Predicted labels, as returned by a classifier.
117+
average: {"micro", "macro"}, str, default="micro"
118+
This parameter determines the type of averaging performed during the computation:
119+
120+
- `micro`: The recall is computed by summing over all individual instances, :math:`\displaystyle{hR = \frac{\sum_{i=1}^{n}|\alpha_i \cap \beta_i|}{\sum_{i=1}^{n}|\beta_i|}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors, with summations computed over all test examples.
121+
- `macro`: The recall is computed for each instance and then averaged, :math:`\displaystyle{hR = \frac{\sum_{i=1}^{n}hR_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors.
122+
72123
Returns
73124
-------
74125
recall : float
75126
What proportion of actual positives was identified correctly?
76127
"""
77128
y_true, y_pred = _validate_input(y_true, y_pred)
129+
functions = {
130+
"micro": _recall_micro,
131+
"macro": _recall_macro,
132+
}
133+
return functions[average](y_true, y_pred)
134+
135+
136+
def _recall_micro(y_true: np.ndarray, y_pred: np.ndarray):
137+
recall_micro = {
138+
1: _recall_micro_1d,
139+
2: _recall_micro_2d,
140+
3: _recall_micro_3d,
141+
}
142+
return recall_micro[y_true.ndim](y_true, y_pred)
143+
144+
145+
def _recall_micro_1d(y_true: np.ndarray, y_pred: np.ndarray):
146+
sum_intersection = 0
147+
sum_prediction_and_ancestors = 0
148+
for ground_truth, prediction in zip(y_true, y_pred):
149+
ground_truth_set = set([ground_truth])
150+
ground_truth_set.discard("")
151+
predicted_set = set([prediction])
152+
predicted_set.discard("")
153+
sum_intersection = sum_intersection + len(
154+
ground_truth_set.intersection(predicted_set)
155+
)
156+
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(
157+
ground_truth_set
158+
)
159+
recall = sum_intersection / sum_prediction_and_ancestors
160+
return recall
161+
162+
163+
def _recall_micro_2d(y_true: np.ndarray, y_pred: np.ndarray):
78164
sum_intersection = 0
79165
sum_prediction_and_ancestors = 0
80166
for ground_truth, prediction in zip(y_true, y_pred):
81167
ground_truth_set = set(ground_truth)
82168
ground_truth_set.discard("")
83-
prediction_set = set(prediction)
84-
prediction_set.discard("")
169+
predicted_set = set(prediction)
170+
predicted_set.discard("")
85171
sum_intersection = sum_intersection + len(
86-
ground_truth_set.intersection(prediction_set)
172+
ground_truth_set.intersection(predicted_set)
87173
)
88174
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(
89175
ground_truth_set
@@ -92,26 +178,72 @@ def recall(y_true: np.ndarray, y_pred: np.ndarray):
92178
return recall
93179

94180

95-
def f1(y_true: np.ndarray, y_pred: np.ndarray):
96-
r"""
97-
Compute f1 score for hierarchical classification.
181+
def _recall_micro_3d(y_true: np.ndarray, y_pred: np.ndarray):
182+
sum_intersection = 0
183+
sum_prediction_and_ancestors = 0
184+
for row_ground_truth, row_prediction in zip(y_true, y_pred):
185+
ground_truth_set = set()
186+
predicted_set = set()
187+
for ground_truth, prediction in zip(row_ground_truth, row_prediction):
188+
ground_truth_set.update(ground_truth)
189+
predicted_set.update(prediction)
190+
ground_truth_set.discard("")
191+
predicted_set.discard("")
192+
sum_intersection = sum_intersection + len(
193+
ground_truth_set.intersection(predicted_set)
194+
)
195+
sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(
196+
ground_truth_set
197+
)
198+
recall = sum_intersection / sum_prediction_and_ancestors
199+
return recall
98200

99-
:math:`\displaystyle{hF = \frac{2 \times hP \times hR}{hP + hR}}`,
100-
where :math:`hP` is the hierarchical precision and :math:`hR` is the hierarchical recall.
201+
202+
def _recall_macro(y_true: np.ndarray, y_pred: np.ndarray):
203+
return _compute_macro(y_true, y_pred, _recall_micro)
204+
205+
206+
def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
207+
r"""
208+
Compute hierarchical f-score.
101209
102210
Parameters
103211
----------
104212
y_true : np.array of shape (n_samples, n_levels)
105213
Ground truth (correct) labels.
106214
y_pred : np.array of shape (n_samples, n_levels)
107215
Predicted labels, as returned by a classifier.
216+
average: {"micro", "macro"}, str, default="micro"
217+
This parameter determines the type of averaging performed during the computation:
218+
219+
- `micro`: The f-score is computed by summing over all individual instances, :math:`\displaystyle{hF = \frac{2 \times hP \times hR}{hP + hR}}`, where :math:`hP` is the hierarchical precision and :math:`hR` is the hierarchical recall.
220+
- `macro`: The f-score is computed for each instance and then averaged, :math:`\displaystyle{hF = \frac{\sum_{i=1}^{n}hF_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors.
108221
Returns
109222
-------
110223
f1 : float
111224
Weighted average of the precision and recall
112225
"""
113226
y_true, y_pred = _validate_input(y_true, y_pred)
227+
functions = {
228+
"micro": _f_score_micro,
229+
"macro": _f_score_macro,
230+
}
231+
return functions[average](y_true, y_pred)
232+
233+
234+
def _f_score_micro(y_true: np.ndarray, y_pred: np.ndarray):
114235
prec = precision(y_true, y_pred)
115236
rec = recall(y_true, y_pred)
116-
f1 = 2 * prec * rec / (prec + rec)
117-
return f1
237+
return 2 * prec * rec / (prec + rec)
238+
239+
240+
def _f_score_macro(y_true: np.ndarray, y_pred: np.ndarray):
241+
return _compute_macro(y_true, y_pred, _f_score_micro)
242+
243+
244+
def _compute_macro(y_true: np.ndarray, y_pred: np.ndarray, _micro_function):
245+
overall_sum = 0
246+
for ground_truth, prediction in zip(y_true, y_pred):
247+
sample_score = _micro_function(np.array([ground_truth]), np.array([prediction]))
248+
overall_sum = overall_sum + sample_score
249+
return overall_sum / len(y_true)

0 commit comments

Comments
 (0)