Skip to content

Commit dda9e1c

Browse files
committed
Fix zero division error when computing f-score
1 parent c029aa6 commit dda9e1c

File tree

3 files changed

+179
-12
lines changed

3 files changed

+179
-12
lines changed

docs/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ sphinx_code_tabs==0.5.3
66
sphinx-gallery==0.10.1
77
matplotlib==3.5.2
88
pandas==1.4.2
9-
ray==1.13.0
9+
ray
1010
numpy
1111
git+https://github.com/charles9n/bert-sklearn.git@master
1212
shap==0.44.1

hiclass/metrics.py

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
"""Helper functions to compute hierarchical evaluation metrics."""
22

3-
from typing import Union, List
3+
import warnings
4+
from typing import List, Union
5+
46
import numpy as np
5-
from sklearn.utils import check_array
7+
from sklearn.exceptions import UndefinedMetricWarning
68
from sklearn.metrics import log_loss as sk_log_loss
79
from sklearn.preprocessing import LabelEncoder
10+
from sklearn.utils import check_array
811

9-
from hiclass.HierarchicalClassifier import make_leveled
1012
from hiclass import HierarchicalClassifier
13+
from hiclass.HierarchicalClassifier import make_leveled
1114

1215

1316
def _validate_input(y_true, y_pred):
@@ -208,7 +211,7 @@ def _recall_macro(y_true: np.ndarray, y_pred: np.ndarray):
208211
return _compute_macro(y_true, y_pred, _recall_micro)
209212

210213

211-
def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
214+
def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro", zero_division: str = "warn"):
212215
r"""
213216
Compute hierarchical f-score.
214217
@@ -223,33 +226,98 @@ def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
223226
224227
- `micro`: The f-score is computed by summing over all individual instances, :math:`\displaystyle{hF = \frac{2 \times hP \times hR}{hP + hR}}`, where :math:`hP` is the hierarchical precision and :math:`hR` is the hierarchical recall.
225228
- `macro`: The f-score is computed for each instance and then averaged, :math:`\displaystyle{hF = \frac{\sum_{i=1}^{n}hF_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors.
229+
zero_division: {"warn", 0.0, 1.0, np.nan}, default="warn"
230+
Sets the value to return when there is a zero division, i.e., when all
231+
predictions and labels are negative.
232+
233+
Notes:
234+
- If set to "warn", this acts like 0, but a warning is also raised.
235+
- If set to `np.nan`, such values will be excluded from the average.
236+
226237
Returns
227238
-------
228239
f1 : float
229240
Weighted average of the precision and recall
241+
242+
Notes
243+
-----
244+
When ``precision + recall == 0`` (i.e. classes
245+
are completely different from both ``y_true`` and ``y_pred``), f-score is
246+
undefined. In such cases, by default f-score will be set to 0.0, and
247+
``UndefinedMetricWarning`` will be raised. This behavior can be modified by
248+
setting the ``zero_division`` parameter.
249+
250+
References
251+
----------
252+
.. [1] `A survey of hierarchical classification across different application domains
253+
<https://link.springer.com/article/10.1007/S10618-010-0175-9>`_.
254+
255+
Examples
256+
--------
257+
>>> import numpy as np
258+
>>> from hiclass.metrics import f1
259+
>>> y_true = [[0, 1, 2], [3, 4, 5]]
260+
>>> y_pred = [[0, 1, 2], [6, 7, 8]]
261+
>>> f1(y_true, y_pred, average='micro')
262+
0.5
263+
>>> f1(y_true, y_pred, average='macro')
264+
0.5
265+
266+
>>> # zero division
267+
>>> y_true = [[0, 1], [2, 3]]
268+
>>> y_pred = [[4, 5], [6, 7]]
269+
>>> f1(y_true, y_pred)
270+
F-score is ill-defined and being set to 0.0. Use `zero_division` parameter to control this behavior.
271+
0.0
272+
>>> f1(y_true, y_pred, zero_division=1.0)
273+
1.0
274+
>>> f1(y_true, y_pred, zero_division=np.nan)
275+
nan
276+
277+
>>> # multilabel hierarchical classification
278+
>>> y_true = [[["a", "b", "c"]], [["d", "e", "f"]], [["g", "h", "i"]]]
279+
>>> y_pred = [[["a", "b", "c"]], [["d", "e", "f"]], [["g", "h", "i"]]]
280+
>>> f1(y_true, y_pred)
281+
1.0
230282
"""
231283
y_true, y_pred = _validate_input(y_true, y_pred)
232284
functions = {
233285
"micro": _f_score_micro,
234286
"macro": _f_score_macro,
235287
}
236-
return functions[average](y_true, y_pred)
288+
return functions[average](y_true, y_pred, zero_division)
237289

238290

239-
def _f_score_micro(y_true: np.ndarray, y_pred: np.ndarray):
291+
def _f_score_micro(y_true: np.ndarray, y_pred: np.ndarray, zero_division):
240292
prec = precision(y_true, y_pred)
241293
rec = recall(y_true, y_pred)
242-
return 2 * prec * rec / (prec + rec)
294+
if prec + rec == 0:
295+
if zero_division == "warn":
296+
msg = (
297+
"F-score is ill-defined and being set to 0.0. "
298+
"Use `zero_division` parameter to control this behavior."
299+
)
300+
warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
301+
return np.float64(0.0)
302+
elif zero_division in [0, 1]:
303+
return np.float64(zero_division)
304+
else:
305+
return np.nan
306+
else:
307+
return np.float64(2 * prec * rec / (prec + rec))
243308

244309

245-
def _f_score_macro(y_true: np.ndarray, y_pred: np.ndarray):
246-
return _compute_macro(y_true, y_pred, _f_score_micro)
310+
def _f_score_macro(y_true: np.ndarray, y_pred: np.ndarray, zero_division):
311+
return _compute_macro(y_true, y_pred, _f_score_micro, zero_division)
247312

248313

249-
def _compute_macro(y_true: np.ndarray, y_pred: np.ndarray, _micro_function):
314+
def _compute_macro(y_true: np.ndarray, y_pred: np.ndarray, _micro_function, zero_division=None):
250315
overall_sum = 0
251316
for ground_truth, prediction in zip(y_true, y_pred):
252-
sample_score = _micro_function(np.array([ground_truth]), np.array([prediction]))
317+
if zero_division:
318+
sample_score = _micro_function(np.array([ground_truth]), np.array([prediction]), zero_division)
319+
else:
320+
sample_score = _micro_function(np.array([ground_truth]), np.array([prediction]))
253321
overall_sum = overall_sum + sample_score
254322
return overall_sum / len(y_true)
255323

tests/test_metrics.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,24 +264,55 @@ def test_f1_micro_1d_list():
264264
assert 0.5 == f1(y_true, y_pred, "micro")
265265

266266

267+
def test_f1_micro_1d_list_zero_division():
268+
y_true = [1, 2, 3, 4]
269+
y_pred = [5, 6, 7, 8]
270+
assert 0.0 == f1(y_true, y_pred, "micro")
271+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
272+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
273+
274+
267275
def test_f1_micro_2d_list():
268276
y_true = [[1, 2, 3, 4], [1, 2, 5, 6]]
269277
y_pred = [[1, 2, 5, 6], [1, 2, 3, 4]]
270278
assert 0.5 == f1(y_true, y_pred, "micro")
271279

272280

281+
def test_f1_micro_2d_list_zero_division():
282+
y_true = [[1, 2, 3, 4], [5, 6, 7, 8]]
283+
y_pred = [[5, 6, 7, 8], [1, 2, 3, 4]]
284+
assert 0.0 == f1(y_true, y_pred, "micro")
285+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
286+
287+
273288
def test_f1_micro_1d_np_array():
274289
y_true = np.array([1, 2, 3, 4])
275290
y_pred = np.array([1, 2, 5, 6])
276291
assert 0.5 == f1(y_true, y_pred, "micro")
277292

278293

294+
def test_f1_micro_1d_np_array_zero_division():
295+
y_true = np.array([1, 2, 3, 4])
296+
y_pred = np.array([5, 6, 7, 8])
297+
assert 0.0 == f1(y_true, y_pred, "micro")
298+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
299+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
300+
301+
279302
def test_f1_micro_2d_np_array():
280303
y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]])
281304
y_pred = np.array([[1, 2, 5, 6], [1, 2, 3, 4]])
282305
assert 0.5 == f1(y_true, y_pred, "micro")
283306

284307

308+
def test_f1_micro_2d_np_array_zero_division():
309+
y_true = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
310+
y_pred = np.array([[5, 6, 7, 8], [1, 2, 3, 4]])
311+
assert 0.0 == f1(y_true, y_pred, "micro")
312+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
313+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
314+
315+
285316
def test_f1_micro_3d_np_array():
286317
y_true = np.array(
287318
[
@@ -299,30 +330,80 @@ def test_f1_micro_3d_np_array():
299330
assert 1 == f1(y_true, y_true, "micro")
300331

301332

333+
def test_f1_micro_3d_np_array_zero_division():
334+
y_true = np.array(
335+
[
336+
[["a", "b"], ["c", "d"]],
337+
[["e", "f"], ["g", "h"]],
338+
]
339+
)
340+
y_pred = np.array(
341+
[
342+
[["i", "j"], ["k", "l"]],
343+
[["m", "n"], ["o", "p"]],
344+
]
345+
)
346+
assert 0.0 == f1(y_true, y_pred, "micro")
347+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
348+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
349+
350+
302351
def test_f1_macro_1d_list():
303352
y_true = [1, 2, 3, 4]
304353
y_pred = [1, 2, 3, 4]
305354
assert 1 == f1(y_true, y_pred, "macro")
306355

307356

357+
def test_f1_macro_1d_list_zero_division():
358+
y_true = [1, 2, 3, 4]
359+
y_pred = [5, 6, 7, 8]
360+
assert 0.0 == f1(y_true, y_pred, "macro")
361+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
362+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
363+
364+
308365
def test_f1_macro_2d_list():
309366
y_true = [[1, 2, 3, 4], [1, 2, 5, 6]]
310367
y_pred = [[1, 5, 6], [1, 2, 3]]
311368
assert 0.4285714 == approx(f1(y_true, y_pred, "macro"))
312369

313370

371+
def test_f1_macro_2d_list_zero_division():
372+
y_true = [[1, 2, 3, 4], [5, 6, 7, 8]]
373+
y_pred = [[5, 6, 7, 8], [1, 2, 3, 4]]
374+
assert 0.0 == f1(y_true, y_pred, "macro")
375+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
376+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
377+
378+
314379
def test_f1_macro_1d_np_array():
315380
y_true = np.array([1, 2, 3, 4])
316381
y_pred = np.array([1, 2, 3, 4])
317382
assert 1 == f1(y_true, y_pred, "macro")
318383

319384

385+
def test_f1_macro_1d_np_array_zero_division():
386+
y_true = np.array([1, 2, 3, 4])
387+
y_pred = np.array([5, 6, 7, 8])
388+
assert 0.0 == f1(y_true, y_pred, "macro")
389+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
390+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
391+
392+
320393
def test_f1_macro_2d_np_array():
321394
y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]])
322395
y_pred = np.array([[1, 5, 6], [1, 2, 3]])
323396
assert 0.4285714 == approx(f1(y_true, y_pred, "macro"))
324397

325398

399+
def test_f1_macro_2d_np_array_zero_division():
400+
y_true = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
401+
y_pred = np.array([[5, 6, 7, 8], [1, 2, 3, 4]])
402+
assert 0.0 == f1(y_true, y_pred, "macro")
403+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
404+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
405+
406+
326407
def test_f1_macro_3d_np_array():
327408
y_true = np.array(
328409
[
@@ -340,6 +421,24 @@ def test_f1_macro_3d_np_array():
340421
assert 1 == f1(y_true, y_true, "macro")
341422

342423

424+
def test_f1_macro_3d_np_array_zero_division():
425+
y_true = np.array(
426+
[
427+
[["a", "b"], ["c", "d"]],
428+
[["e", "f"], ["g", "h"]],
429+
]
430+
)
431+
y_pred = np.array(
432+
[
433+
[["i", "j"], ["k", "l"]],
434+
[["m", "n"], ["o", "p"]],
435+
]
436+
)
437+
assert 0.0 == f1(y_true, y_pred, "macro")
438+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
439+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
440+
441+
343442
def test_empty_levels_2d_list_1():
344443
y_true = [["2", "3"], ["1"], ["4", "5", "6"]]
345444
y_pred = [["1"], ["2", "3"], ["4", "5", "6"]]

0 commit comments

Comments
 (0)