feat(RHOAIENG-21045): Add fairness metrics and tests

christinaexyou · christinaexyou · commit fe17163c5c5c · 2025-05-08T11:01:57.000-04:00
diff --git a/src/core/metrics/fairness/fairness_metrics_utils.py b/src/core/metrics/fairness/fairness_metrics_utils.py
@@ -0,0 +1,11 @@
+import numpy as np
+
+def filter_rows_by_inputs(data, filter_func):
+    return data[np.apply_along_axis(filter_func, 1, data)]
+
+def calculate_confusion_matrix(test: np.array, truth: np.array, positive_class: int) -> dict:
+    tp = np.sum((test == positive_class) & (truth == positive_class))
+    tn = np.sum((test != positive_class) & (truth != positive_class))
+    fp = np.sum((test == positive_class) & (truth != positive_class))
+    fn = np.sum((test != positive_class) & (truth == positive_class))
+    return {"tp": tp, "tn": tn, "fp": fp, "fn": fn}
diff --git a/src/core/metrics/fairness/group/disparate_impact_ratio.py b/src/core/metrics/fairness/group/disparate_impact_ratio.py
@@ -0,0 +1,49 @@
+# pylint: disable=line-too-long
+from typing import List, Any
+
+import numpy as np
+
+class DisparateImpactRatio:
+    """
+    Calculate disparate impact ratio (DIR).
+    """
+    @staticmethod
+    def calculate_model(
+        samples: np.ndarray,
+        model: Any,
+        privilege_columns: List[int],
+        privilege_values: List[int],
+        favorable_output: np.ndarray
+        ) -> float:
+        """
+        Calculate disparate impact ratio (DIR) for model outputs.
+        :param samples a NumPy array of inputs to be used for testing fairness
+        :param model the model to be tested for fairness
+        :param privilege_columns a list of integers specifying the indices of the privileged columns
+        :param privilege_values a list integers specifying the privileged values
+        :param favorable_output the outputs that are considered favorable / desirable
+        return DIR score
+        """
+        outputs = model.predict(samples)
+        data = np.append(samples, outputs, axis=1)
+        privileged = np.sum(data[:, privilege_columns] == privilege_values)
+        unprivileged = np.sum(data[:, privilege_columns] != privilege_values)
+
+        return DisparateImpactRatio.calculate(privileged, unprivileged, favorable_output)
+
+    @staticmethod
+    def calculate(
+        privileged: np.ndarray,
+        unprivileged: np.ndarray,
+        favorable_output: int
+        ) -> float:
+        """
+        Calculate disparate impact ratio (DIR) when the labels are pre-calculated.
+        :param privileged a NumPy array with the privileged groups
+        :param unprivileged a NumPy array with the unprivileged groups
+        :param favorableOutput an output that is considered favorable / desirable
+        return DIR, between 0 and 1
+        """
+        probability_privileged = np.sum(privileged[:, -1] == favorable_output) / len(privileged)
+        probability_unprivileged = np.sum(unprivileged[:, -1] == favorable_output) / len(unprivileged)
+        return probability_unprivileged / probability_privileged
diff --git a/src/core/metrics/fairness/group/group_average_odds_difference.py b/src/core/metrics/fairness/group/group_average_odds_difference.py
@@ -0,0 +1,64 @@
+# pylint: disable=line-too-long, too-many-arguments
+from typing import List, Any
+
+import numpy as np
+
+from src.core.metrics.fairness.fairness_metrics_utils import filter_rows_by_inputs, calculate_confusion_matrix
+
+class GroupAverageOddsDifference:
+    """
+    Calculate group average odds difference.
+    """
+    @ staticmethod
+    def calculate_model(
+        samples: np.ndarray,
+        model: Any,
+        privilege_columns: List[int],
+        privilege_values: List[int],
+        postive_class: List[int],
+        output_column: int
+    ):
+        """
+        Calculate group average odds difference for model outputs.
+        :param samples a NumPy arrary of inputs to be used for testing fairness
+        :param model the model to be tested for fairness
+        :param privilege_columns a list of integers specifying the indices of the privileged columns
+        :param privilege_values a list of intergers specifying the privileged values
+        :param postive_class the favorable / desirable outputs
+        :param output_column the column index where the output is located
+        return group average odds difference score
+        """
+        outputs = model.predict(samples)
+        truth = np.append(samples, outputs, axis=1)
+
+        return GroupAverageOddsDifference.calculate(samples, truth, privilege_columns, privilege_values, postive_class, output_column)
+
+    @staticmethod
+    def calculate(test, truth, privilege_columns, privilege_values, positive_class, output_column):
+        """
+        Calculate group average odds difference when the labels are pre-calculated.
+        :param test a NumPy array representing the test data
+        :param truth a NumPy array representing the truth data
+        :param privilege_columns a list of integers specifying the indices of the privileged columns
+        :param privilege_values a list of intergers specifying the privileged values
+        :param positive_class the favorable / desirable outputs
+        :param output_column the column where the output is located
+        return group average odds difference, between -1 and 1
+        """
+        def privilege_filter(row):
+            return np.array_equal(row[privilege_columns], privilege_values)
+
+        test_privileged = filter_rows_by_inputs(test, privilege_filter)
+        test_unprivileged = filter_rows_by_inputs(test, lambda row: not privilege_filter(row))
+
+        truth_privileged = filter_rows_by_inputs(truth, privilege_filter)
+        truth_unprivileged = filter_rows_by_inputs(truth, lambda row: not privilege_filter(row))
+
+        ucm = calculate_confusion_matrix(test_unprivileged[:, output_column], truth_unprivileged[:, output_column], positive_class)
+        pcm = calculate_confusion_matrix(test_privileged[:, output_column], truth_privileged[:, output_column], positive_class)
+
+        utp, utn, ufp, ufn = ucm["tp"], ucm["tn"], ucm["fp"], ucm["fn"]
+        ptp, ptn, pfp, pfn = pcm["tp"], pcm["tn"], pcm["fp"], pcm["fn"]
+
+        return (utp / (utp + ufn + 1e-10) - ptp / (ptp + pfn + 1e-10)) / 2 + \
+            (ufp / (ufp + utn + 1e-10) - pfp / (pfp + ptn + 1e-10)) / 2
diff --git a/src/core/metrics/fairness/group/group_average_predictive_value_difference.py b/src/core/metrics/fairness/group/group_average_predictive_value_difference.py
@@ -0,0 +1,62 @@
+# pylint: disable=line-too-long, too-many-arguments
+from typing import List, Any
+
+import numpy as np
+
+from src.core.metrics.fairness.fairness_metrics_utils import filter_rows_by_inputs, calculate_confusion_matrix
+
+class GroupAveragePredictiveValueDifference:
+    """
+    Calculate group average predictive value difference.
+    """
+    @staticmethod
+    def calculate_model(
+        samples: np.ndarray,
+        model: Any,
+        privilege_columns: List[int],
+        privilege_values: List[int],
+        positive_class: int,
+        output_column: int
+    ) -> float:
+        """
+        Calculate group average predictive value difference for model outputs.
+        :param samples a Numpy array of inputs to be used for testing fairness
+        :param model the model to be tested for fairness
+        :param privilege_columns a list of integers specifying the indices of the privileged columns
+        :param privilege_values a list of integers specifying the privileged values
+        :param positive_class the favorable / desirable outputs
+        :param output_column the column index where the output is located
+        """
+        outputs = model.predict(samples)
+        truth = np.append(samples, outputs, axis=1)
+        return GroupAveragePredictiveValueDifference.calculate(samples, truth, privilege_columns, privilege_values, positive_class, output_column)
+
+    @staticmethod
+    def calculate(test, truth, privilege_columns, privilege_values, positive_class, output_column):
+        """
+        Calculate group average predictive value difference when the labels are pre-calculated.
+        :param test a NumPy array representing the test data
+        :param truth a NumPy array representing the truth data
+        :param privilege_columns a list of integers specifying the indices of the privileged columns
+        :param privilege_values a list of intergers specifying the privileged values
+        :param positive_class the favorable / desirable outputs
+        :param output_column the column where the output is located
+        return group average predictive value difference, between -1 and 1
+        """
+        def privilege_filter(row):
+            return np.array_equal(row[privilege_columns], privilege_values)
+
+        test_privileged = filter_rows_by_inputs(test, privilege_filter)
+        test_unprivileged = filter_rows_by_inputs(test, lambda row: not privilege_filter(row))
+
+        truth_privileged = filter_rows_by_inputs(truth, privilege_filter)
+        truth_unprivileged = filter_rows_by_inputs(truth, lambda row: not privilege_filter(row))
+
+        ucm = calculate_confusion_matrix(test_unprivileged[:, output_column], truth_unprivileged[:, output_column], positive_class)
+        pcm = calculate_confusion_matrix(test_privileged[:, output_column], truth_privileged[:, output_column], positive_class)
+
+        utp, utn, ufp, ufn = ucm["tp"], ucm["tn"], ucm["fp"], ucm["fn"]
+        ptp, ptn, pfp, pfn = pcm["tp"], pcm["tn"], pcm["fp"], pcm["fn"]
+
+        return (utp / (utp + ufp) - ptp / (ptp + pfp + 1e-10)) / 2 + \
+            (ufn / (ufn + utn) - pfn / (pfn + ptn + 1e-10)) / 2
diff --git a/src/core/metrics/fairness/group/group_statistical_parity_difference.py b/src/core/metrics/fairness/group/group_statistical_parity_difference.py
@@ -0,0 +1,49 @@
+# pylint: disable=line-too-long
+from typing import List
+
+import numpy as np
+
+class GroupStatisticalParityDifference:
+    """
+    Calculate group statistical parity difference (SPD).
+    """
+    @staticmethod
+    def calculate_model(
+        samples: np.ndarray,
+        model,
+        privilege_columns: List[int],
+        privilege_values: List[int],
+        favorable_output,
+        ) -> float:
+        """
+        Calculate group statistical parity difference (SPD) for model outputs.
+        :param samples a NumPy array of inputs to be used for testing fairness
+        :param model the model to be tested for fairness
+        :param privilege_columns a list of integers specifying the indices of the privileged columns
+        :param privilege_values a list integers specifying the privileged values
+        :param favorable_output the outputs that are considered favorable / desirable
+        return SPD score
+        """
+        outputs = model.predict(samples)
+        data = np.append(samples, outputs, axis=1)
+        privileged = data[np.where(data[:, privilege_columns] == privilege_values)]
+        unprivileged = data[np.where(data[:, privilege_columns] != privilege_values)]
+
+        return GroupStatisticalParityDifference.calculate(privileged, unprivileged, favorable_output)
+
+    @staticmethod
+    def calculate(
+        privileged,
+        unprivileged,
+        favorable_output,
+        ) -> float:
+        """
+        Calculate statistical/demographic parity difference (SPD) when the labels are pre-calculated.
+        :param priviledged numPy array with the privileged groups
+        :param unpriviledged numPy array with the unpriviledged groups
+        :param favorableOutput an output that is considered favorable / desirable
+        return SPD, between 0 and 1
+        """
+        probability_privileged = np.sum(privileged[:, -1] == favorable_output) / len(privileged)
+        probability_unprivileged = np.sum(unprivileged[:, -1] == favorable_output) / len(unprivileged)
+        return probability_unprivileged - probability_privileged
diff --git a/src/core/metrics/fairness/individual/individual_consistency.py b/src/core/metrics/fairness/individual/individual_consistency.py
@@ -0,0 +1,37 @@
+# pylint: disable=too-few-public-methods, line-too-long
+from typing import Any
+
+import numpy as np
+
+class IndividualConsistency:
+    """
+    Calculate individual fairness in terms of consistency of predictions across similar inputs.
+    :param proximity_function: a function that finds the top k similar inputs, given a reference input and a list of inputs
+    :param samples a list of inputs to be tested for consistency
+    :param predictionProvider the model under inspection
+    return the consistency measure
+    """
+    @staticmethod
+    def calculate(
+        proximity_function: Any,
+        samples: np.ndarray,
+        prediction_provider: Any
+    ) -> float:
+        """
+        Calculate individual fairness.
+        :param proximity_function: a function that finds the top k similar inputs, given a reference input and a list of inputs
+        :param samples a list of inputs to be tested for consistency
+        :param prediction_provider the model under inspection
+        return the consistency measure
+        """
+        consistency =  1
+        for sample in samples:
+            prediction_outputs = prediction_provider.predict(sample)
+            prediction_output = prediction_outputs[0]
+            neighbors = proximity_function(sample, samples)
+            neighbors_outputs = prediction_provider.predict(neighbors)
+            for output in prediction_outputs:
+                for neighbor_output in neighbors_outputs:
+                    if neighbor_output != output:
+                        consistency -= 1 / (len(neighbors) * len(prediction_output) * len(samples))
+        return consistency
diff --git a/tests/metrics/test_fairness.py b/tests/metrics/test_fairness.py
@@ -0,0 +1,116 @@
+# pylint: disable=line-too-long, missing-function-docstring
+from typing import List, Optional
+
+from pytest import approx
+import numpy as np
+import pandas as pd
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import LabelEncoder
+
+from aif360.sklearn.metrics import (
+    disparate_impact_ratio,
+    statistical_parity_difference,
+    average_odds_difference,
+    average_predictive_value_difference,
+)
+
+from src.core.metrics.fairness.group.disparate_impact_ratio import DisparateImpactRatio
+from src.core.metrics.fairness.group.group_average_odds_difference import GroupAverageOddsDifference
+from src.core.metrics.fairness.group.group_average_predictive_value_difference import GroupAveragePredictiveValueDifference
+from src.core.metrics.fairness.group.group_statistical_parity_difference import GroupStatisticalParityDifference
+
+df = pd.read_csv(
+    "https://raw.githubusercontent.com/trustyai-explainability/model-collection/8aa8e2e762c6d2b41dbcbe8a0035d50aa5f58c93/bank-churn/data/train.csv",
+)
+X = df.drop(columns=["Exited"], axis=1)
+y = df["Exited"]
+
+def train_model():
+    categorical_features = ['Geography', 'Gender', 'Card Type', 'HasCrCard', 'IsActiveMember', 'Complain']
+    label_encoders = {}
+    for feature in categorical_features:
+        label_encoders[feature] = LabelEncoder()
+        X[feature] = label_encoders[feature].fit_transform(X[feature])
+    lr = LogisticRegression().fit(X, y)
+
+    y_pred = pd.DataFrame(lr.predict(X))
+    return y_pred
+
+def truth_predict_output():
+    y.index = X["Gender"]
+    y_pred = pd.DataFrame(train_model())
+    y_pred.index = X["Gender"]
+    return y, y_pred
+
+def get_privileged_unprivleged_split():
+    data = df[[col for col in df.columns if col != "Exited"] + ["Exited"]]
+    data = data.to_numpy()
+    privileged = data[np.where(data[:, 2] == "Male")]
+    unprivileged = data[np.where(data[:, 2] == "Female")]
+    return privileged, unprivileged
+
+def get_labeled_data():
+    data = df[[col for col in df.columns if col != "Exited"] + ["Exited"]]
+    data = data.to_numpy()
+    y_pred = pd.DataFrame(train_model())
+    data_pred = data.copy()
+    data_pred[:, -1] = y_pred.to_numpy().flatten()
+    return data, data_pred
+
+y, y_pred = truth_predict_output()
+privileged, unprivileged = get_privileged_unprivleged_split()
+data, data_pred = get_labeled_data()
+
+
+def test_disparate_impact_ratio():
+    dir = disparate_impact_ratio(y, prot_attr="Gender", priv_group="Male", pos_label=1)
+
+    score = DisparateImpactRatio.calculate(
+        privileged=privileged,
+        unprivileged=unprivileged,
+        favorable_output=1
+    )
+    assert score == approx(dir, abs=1e-5)
+
+
+def test_statistical_parity_difference():
+    spd = statistical_parity_difference(y, prot_attr="Gender", priv_group="Male", pos_label=1)
+
+    score = GroupStatisticalParityDifference.calculate(
+        privileged=privileged,
+        unprivileged=unprivileged,
+        favorable_output=1
+    )
+
+    assert score == approx(spd, abs=1e-5)
+
+
+def test_average_odds_difference():
+    aod = average_odds_difference(y, y_pred, prot_attr="Gender", priv_group="Male", pos_label=1)
+
+    score = GroupAverageOddsDifference.calculate(
+        test=data_pred,
+        truth=data,
+        privilege_columns=[2],
+        privilege_values=["Male"],
+        positive_class=1,
+        output_column=-1
+    )
+
+    assert score == approx(aod, abs=0.2)
+
+
+def test_average_predictive_value_difference():
+    apvd = average_predictive_value_difference(y, y_pred, prot_attr="Gender", priv_group="Male", pos_label=1)
+
+    score = GroupAveragePredictiveValueDifference.calculate(
+        test=data_pred,
+        truth=data,
+        privilege_columns=[2],
+        privilege_values=["Male"],
+        positive_class=1,
+        output_column=-1
+    )
+
+    assert score == approx(apvd, abs=0.2)