Skip to content

Commit 844bad4

Browse files
committed
raise ValueError when privileged/unprivileged arrays are empty
1 parent da5f1c7 commit 844bad4

File tree

4 files changed

+34
-15
lines changed

4 files changed

+34
-15
lines changed

src/core/metrics/fairness/fairness_metrics_utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
import numpy as np
33
from sklearn.metrics import confusion_matrix
44

5+
56
def filter_rows_by_inputs(data: np.ndarray, filter_func: Callable[[np.ndarray], bool]):
67
return data[np.apply_along_axis(filter_func, 1, data)]
78

9+
810
def calculate_confusion_matrix(test: np.array, truth: np.array, positive_class: int) -> dict:
911
# cast test and truth to int
1012
test = test.astype(int)
@@ -16,3 +18,14 @@ def calculate_confusion_matrix(test: np.array, truth: np.array, positive_class:
1618
fp = cm[1, 0]
1719
tn = cm[1, 1]
1820
return {"tp": tp, "tn": tn, "fp": fp, "fn": fn}
21+
22+
23+
def validate_fairness_groups(privileged: np.ndarray, unprivileged: np.ndarray) -> None:
24+
empty_groups = []
25+
if len(privileged) == 0:
26+
empty_groups.append("privileged")
27+
if len(unprivileged) == 0:
28+
empty_groups.append("unprivileged")
29+
30+
if empty_groups:
31+
raise ValueError(f"Arrays cannot be empty for the following groups: {', '.join(empty_groups)}")

src/core/metrics/fairness/group/disparate_impact_ratio.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,22 @@
44
import numpy as np
55
from sklearn.base import ClassifierMixin
66

7+
from src.core.metrics.fairness.fairness_metrics_utils import validate_fairness_groups
8+
9+
710
class DisparateImpactRatio:
811
"""
912
Calculate disparate impact ratio (DIR).
1013
"""
14+
1115
@staticmethod
1216
def calculate_model(
1317
samples: np.ndarray,
1418
model: ClassifierMixin,
1519
privilege_columns: List[int],
1620
privilege_values: List[int],
17-
favorable_output: np.ndarray
18-
) -> float:
21+
favorable_output: np.ndarray,
22+
) -> float:
1923
"""
2024
Calculate disparate impact ratio (DIR) for model outputs.
2125
:param samples a NumPy array of inputs to be used for testing fairness
@@ -34,17 +38,17 @@ def calculate_model(
3438

3539
@staticmethod
3640
def calculate(
37-
privileged: Union[int, np.ndarray],
38-
unprivileged: Union[int, np.ndarray],
39-
favorable_output: int
40-
) -> float:
41+
privileged: Union[int, np.ndarray], unprivileged: Union[int, np.ndarray], favorable_output: int
42+
) -> float:
4143
"""
4244
Calculate disparate impact ratio (DIR) when the labels are pre-calculated.
4345
:param privileged a NumPy array with the privileged groups
4446
:param unprivileged a NumPy array with the unprivileged groups
4547
:param favorableOutput an output that is considered favorable / desirable
4648
return DIR, between 0 and 1
4749
"""
50+
validate_fairness_groups(privileged=privileged, unprivileged=unprivileged)
51+
4852
probability_privileged = np.sum(privileged[:, -1] == favorable_output) / len(privileged)
4953
probability_unprivileged = np.sum(unprivileged[:, -1] == favorable_output) / len(unprivileged)
5054
return probability_unprivileged / probability_privileged

src/core/metrics/fairness/group/group_statistical_parity_difference.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
from sklearn.base import ClassifierMixin
66

7+
from src.core.metrics.fairness.fairness_metrics_utils import validate_fairness_groups
8+
79

810
class GroupStatisticalParityDifference:
911
"""
@@ -47,6 +49,8 @@ def calculate(
4749
:param favorableOutput an output that is considered favorable / desirable
4850
return SPD, between -1 and 1
4951
"""
52+
validate_fairness_groups(privileged=privileged, unprivileged=unprivileged)
53+
5054
probability_privileged = np.sum(privileged[:, -1] == favorable_output) / len(privileged)
5155
probability_unprivileged = np.sum(unprivileged[:, -1] == favorable_output) / len(unprivileged)
5256
return probability_unprivileged - probability_privileged

tests/metrics/test_fairness.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# pylint: disable=line-too-long, missing-function-docstring
2-
import math
32

3+
import pytest
44
from pytest import approx
55
import numpy as np
66
import pandas as pd
@@ -234,9 +234,8 @@ def test_dir_empty_dataframe(self):
234234
empty_df = pd.DataFrame(columns=df.columns)
235235
privileged, unprivileged = get_privileged_unprivileged_split(df=empty_df)
236236

237-
score = DisparateImpactRatio.calculate(privileged=privileged, unprivileged=unprivileged, favorable_output=1)
238-
239-
assert math.isnan(score)
237+
with pytest.raises(ValueError):
238+
DisparateImpactRatio.calculate(privileged=privileged, unprivileged=unprivileged, favorable_output=1)
240239

241240

242241
class TestGroupStatisticalParityDifference:
@@ -305,11 +304,10 @@ def test_spd_empty_dataframe(self):
305304
empty_df = pd.DataFrame(columns=df.columns)
306305
privileged, unprivileged = get_privileged_unprivileged_split(df=empty_df)
307306

308-
score = GroupStatisticalParityDifference.calculate(
309-
privileged=privileged, unprivileged=unprivileged, favorable_output=1
310-
)
311-
312-
assert math.isnan(score)
307+
with pytest.raises(ValueError):
308+
GroupStatisticalParityDifference.calculate(
309+
privileged=privileged, unprivileged=unprivileged, favorable_output=1
310+
)
313311

314312

315313
def test_average_odds_difference():

0 commit comments

Comments
 (0)