Skip to content

Commit 150cf01

Browse files
authored
Merge pull request #5 from christinaexyou/add-fairness-metrics
feat(RHOAIENG-21045): Add fairness metrics and tests
2 parents 872549b + cd7c5a6 commit 150cf01

File tree

9 files changed

+10523
-0
lines changed

9 files changed

+10523
-0
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ dependencies = [
1515
"requests>=2.31.0,<3",
1616
"cryptography>=44.0.2,<46",
1717
"h5py>=3.13.0,<4",
18+
"scikit-learn",
19+
"aif360",
1820
]
1921

2022
[project.optional-dependencies]
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from typing import Callable
2+
import numpy as np
3+
4+
def filter_rows_by_inputs(data: np.ndarray, filter_func: Callable[[np.ndarray], bool]):
5+
return data[np.apply_along_axis(filter_func, 1, data)]
6+
7+
def calculate_confusion_matrix(test: np.array, truth: np.array, positive_class: int) -> dict:
8+
tp = np.sum((test == positive_class) & (truth == positive_class))
9+
tn = np.sum((test != positive_class) & (truth != positive_class))
10+
fp = np.sum((test == positive_class) & (truth != positive_class))
11+
fn = np.sum((test != positive_class) & (truth == positive_class))
12+
return {"tp": tp, "tn": tn, "fp": fp, "fn": fn}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# pylint: disable=line-too-long
2+
from typing import List, Any, Union
3+
4+
import numpy as np
5+
6+
class DisparateImpactRatio:
7+
"""
8+
Calculate disparate impact ratio (DIR).
9+
"""
10+
@staticmethod
11+
def calculate_model(
12+
samples: np.ndarray,
13+
model: Any,
14+
privilege_columns: List[int],
15+
privilege_values: List[int],
16+
favorable_output: np.ndarray
17+
) -> float:
18+
"""
19+
Calculate disparate impact ratio (DIR) for model outputs.
20+
:param samples a NumPy array of inputs to be used for testing fairness
21+
:param model the model to be tested for fairness
22+
:param privilege_columns a list of integers specifying the indices of the privileged columns
23+
:param privilege_values a list integers specifying the privileged values
24+
:param favorable_output the outputs that are considered favorable / desirable
25+
return DIR score
26+
"""
27+
outputs = model.predict(samples)
28+
data = np.append(samples, outputs, axis=1)
29+
privileged = np.sum(data[:, privilege_columns] == privilege_values)
30+
unprivileged = np.sum(data[:, privilege_columns] != privilege_values)
31+
32+
return DisparateImpactRatio.calculate(privileged, unprivileged, favorable_output)
33+
34+
@staticmethod
35+
def calculate(
36+
privileged: Union[int, np.ndarray],
37+
unprivileged: Union[int, np.ndarray],
38+
favorable_output: int
39+
) -> float:
40+
"""
41+
Calculate disparate impact ratio (DIR) when the labels are pre-calculated.
42+
:param privileged a NumPy array with the privileged groups
43+
:param unprivileged a NumPy array with the unprivileged groups
44+
:param favorableOutput an output that is considered favorable / desirable
45+
return DIR, between 0 and 1
46+
"""
47+
probability_privileged = np.sum(privileged[:, -1] == favorable_output) / len(privileged)
48+
probability_unprivileged = np.sum(unprivileged[:, -1] == favorable_output) / len(unprivileged)
49+
return probability_unprivileged / probability_privileged
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# pylint: disable=line-too-long, too-many-arguments
2+
from typing import List, Any
3+
4+
import numpy as np
5+
6+
from src.core.metrics.fairness.fairness_metrics_utils import filter_rows_by_inputs, calculate_confusion_matrix
7+
8+
class GroupAverageOddsDifference:
9+
"""
10+
Calculate group average odds difference.
11+
"""
12+
@ staticmethod
13+
def calculate_model(
14+
samples: np.ndarray,
15+
model: Any,
16+
privilege_columns: List[int],
17+
privilege_values: List[int],
18+
postive_class: List[int],
19+
output_column: int
20+
):
21+
"""
22+
Calculate group average odds difference for model outputs.
23+
:param samples a NumPy arrary of inputs to be used for testing fairness
24+
:param model the model to be tested for fairness
25+
:param privilege_columns a list of integers specifying the indices of the privileged columns
26+
:param privilege_values a list of integers specifying the privileged values
27+
:param positive_class the favorable / desirable outputs
28+
:param output_column the column index where the output is located
29+
return group average odds difference score
30+
"""
31+
outputs = model.predict(samples)
32+
truth = np.append(samples, outputs, axis=1)
33+
34+
return GroupAverageOddsDifference.calculate(samples, truth, privilege_columns, privilege_values, postive_class, output_column)
35+
36+
@staticmethod
37+
def calculate(test, truth, privilege_columns, privilege_values, positive_class, output_column):
38+
"""
39+
Calculate group average odds difference when the labels are pre-calculated.
40+
:param test a NumPy array representing the test data
41+
:param truth a NumPy array representing the truth data
42+
:param privilege_columns a list of integers specifying the indices of the privileged columns
43+
:param privilege_values a list of intergers specifying the privileged values
44+
:param positive_class the favorable / desirable outputs
45+
:param output_column the column where the output is located
46+
return group average odds difference, between -1 and 1
47+
"""
48+
def privilege_filter(row):
49+
return np.array_equal(row[privilege_columns], privilege_values)
50+
51+
test_privileged = filter_rows_by_inputs(test, privilege_filter)
52+
test_unprivileged = filter_rows_by_inputs(test, lambda row: not privilege_filter(row))
53+
54+
truth_privileged = filter_rows_by_inputs(truth, privilege_filter)
55+
truth_unprivileged = filter_rows_by_inputs(truth, lambda row: not privilege_filter(row))
56+
57+
ucm = calculate_confusion_matrix(test_unprivileged[:, output_column], truth_unprivileged[:, output_column], positive_class)
58+
pcm = calculate_confusion_matrix(test_privileged[:, output_column], truth_privileged[:, output_column], positive_class)
59+
60+
utp, utn, ufp, ufn = ucm["tp"], ucm["tn"], ucm["fp"], ucm["fn"]
61+
ptp, ptn, pfp, pfn = pcm["tp"], pcm["tn"], pcm["fp"], pcm["fn"]
62+
63+
return (utp / (utp + ufn + 1e-10) - ptp / (ptp + pfn + 1e-10)) / 2 + \
64+
(ufp / (ufp + utn + 1e-10) - pfp / (pfp + ptn + 1e-10)) / 2
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# pylint: disable=line-too-long, too-many-arguments
2+
from typing import List, Any
3+
4+
import numpy as np
5+
6+
from src.core.metrics.fairness.fairness_metrics_utils import filter_rows_by_inputs, calculate_confusion_matrix
7+
8+
class GroupAveragePredictiveValueDifference:
9+
"""
10+
Calculate group average predictive value difference.
11+
"""
12+
@staticmethod
13+
def calculate_model(
14+
samples: np.ndarray,
15+
model: Any,
16+
privilege_columns: List[int],
17+
privilege_values: List[int],
18+
positive_class: int,
19+
output_column: int
20+
) -> float:
21+
"""
22+
Calculate group average predictive value difference for model outputs.
23+
:param samples a Numpy array of inputs to be used for testing fairness
24+
:param model the model to be tested for fairness
25+
:param privilege_columns a list of integers specifying the indices of the privileged columns
26+
:param privilege_values a list of integers specifying the privileged values
27+
:param positive_class the favorable / desirable outputs
28+
:param output_column the column index where the output is located
29+
"""
30+
outputs = model.predict(samples)
31+
truth = np.append(samples, outputs, axis=1)
32+
return GroupAveragePredictiveValueDifference.calculate(samples, truth, privilege_columns, privilege_values, positive_class, output_column)
33+
34+
@staticmethod
35+
def calculate(test, truth, privilege_columns, privilege_values, positive_class, output_column):
36+
"""
37+
Calculate group average predictive value difference when the labels are pre-calculated.
38+
:param test a NumPy array representing the test data
39+
:param truth a NumPy array representing the truth data
40+
:param privilege_columns a list of integers specifying the indices of the privileged columns
41+
:param privilege_values a list of intergers specifying the privileged values
42+
:param positive_class the favorable / desirable outputs
43+
:param output_column the column where the output is located
44+
return group average predictive value difference, between -1 and 1
45+
"""
46+
def privilege_filter(row):
47+
return np.array_equal(row[privilege_columns], privilege_values)
48+
49+
test_privileged = filter_rows_by_inputs(test, privilege_filter)
50+
test_unprivileged = filter_rows_by_inputs(test, lambda row: not privilege_filter(row))
51+
52+
truth_privileged = filter_rows_by_inputs(truth, privilege_filter)
53+
truth_unprivileged = filter_rows_by_inputs(truth, lambda row: not privilege_filter(row))
54+
55+
ucm = calculate_confusion_matrix(test_unprivileged[:, output_column], truth_unprivileged[:, output_column], positive_class)
56+
pcm = calculate_confusion_matrix(test_privileged[:, output_column], truth_privileged[:, output_column], positive_class)
57+
58+
utp, utn, ufp, ufn = ucm["tp"], ucm["tn"], ucm["fp"], ucm["fn"]
59+
ptp, ptn, pfp, pfn = pcm["tp"], pcm["tn"], pcm["fp"], pcm["fn"]
60+
61+
return (utp / (utp + ufp + 1e-10) - ptp / (ptp + pfp + 1e-10)) / 2 + \
62+
(ufn / (ufn + utn + 1e-10) - pfn / (pfn + ptn + 1e-10)) / 2
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# pylint: disable=line-too-long
2+
from typing import List
3+
4+
import numpy as np
5+
6+
class GroupStatisticalParityDifference:
7+
"""
8+
Calculate group statistical parity difference (SPD).
9+
"""
10+
@staticmethod
11+
def calculate_model(
12+
samples: np.ndarray,
13+
model,
14+
privilege_columns: List[int],
15+
privilege_values: List[int],
16+
favorable_output,
17+
) -> float:
18+
"""
19+
Calculate group statistical parity difference (SPD) for model outputs.
20+
:param samples a NumPy array of inputs to be used for testing fairness
21+
:param model the model to be tested for fairness
22+
:param privilege_columns a list of integers specifying the indices of the privileged columns
23+
:param privilege_values a list integers specifying the privileged values
24+
:param favorable_output the outputs that are considered favorable / desirable
25+
return SPD score
26+
"""
27+
outputs = model.predict(samples)
28+
data = np.append(samples, outputs, axis=1)
29+
privileged = np.all(data[:, privilege_columns] == privilege_values, axis=1)
30+
unprivileged = np.all(data[:, privilege_columns] != privilege_values, axis=1)
31+
32+
return GroupStatisticalParityDifference.calculate(privileged, unprivileged, favorable_output)
33+
34+
@staticmethod
35+
def calculate(
36+
privileged,
37+
unprivileged,
38+
favorable_output,
39+
) -> float:
40+
"""
41+
Calculate statistical/demographic parity difference (SPD) when the labels are pre-calculated.
42+
:param priviledged numPy array with the privileged groups
43+
:param unpriviledged numPy array with the unpriviledged groups
44+
:param favorableOutput an output that is considered favorable / desirable
45+
return SPD, between 0 and 1
46+
"""
47+
probability_privileged = np.sum(privileged[:, -1] == favorable_output) / len(privileged)
48+
probability_unprivileged = np.sum(unprivileged[:, -1] == favorable_output) / len(unprivileged)
49+
return probability_unprivileged - probability_privileged
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# pylint: disable=too-few-public-methods, line-too-long
2+
from typing import Any
3+
4+
import numpy as np
5+
6+
class IndividualConsistency:
7+
"""
8+
Calculate individual fairness in terms of consistency of predictions across similar inputs.
9+
:param proximity_function: a function that finds the top k similar inputs, given a reference input and a list of inputs
10+
:param samples a list of inputs to be tested for consistency
11+
:param predictionProvider the model under inspection
12+
return the consistency measure
13+
"""
14+
@staticmethod
15+
def calculate(
16+
proximity_function: Any,
17+
samples: np.ndarray,
18+
prediction_provider: Any
19+
) -> float:
20+
"""
21+
Calculate individual fairness.
22+
:param proximity_function: a function that finds the top k similar inputs, given a reference input and a list of inputs
23+
:param samples a list of inputs to be tested for consistency
24+
:param prediction_provider the model under inspection
25+
return the consistency measure
26+
"""
27+
consistency = 1
28+
for sample in samples:
29+
prediction_outputs = prediction_provider.predict(sample)
30+
prediction_output = prediction_outputs[0]
31+
neighbors = proximity_function(sample, samples)
32+
neighbors_outputs = prediction_provider.predict(neighbors)
33+
for output in prediction_outputs:
34+
for neighbor_output in neighbors_outputs:
35+
if neighbor_output != output:
36+
consistency -= 1 / (len(neighbors) * len(prediction_output) * len(samples))
37+
return consistency

0 commit comments

Comments
 (0)