Skip to content

Commit cd7c5a6

Browse files
artemsa223christinaexyou
authored andcommitted
feat(RHOAIENG-24177): Add unit test for Individual Consistency for TrustyAI Python Service
1 parent fe17163 commit cd7c5a6

File tree

8 files changed

+10153
-18
lines changed

8 files changed

+10153
-18
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ dependencies = [
1515
"requests>=2.31.0,<3",
1616
"cryptography>=44.0.2,<45",
1717
"h5py>=3.13.0,<4",
18+
"scikit-learn",
19+
"aif360",
1820
]
1921

2022
[project.optional-dependencies]

src/core/metrics/fairness/fairness_metrics_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from typing import Callable
12
import numpy as np
23

3-
def filter_rows_by_inputs(data, filter_func):
4+
def filter_rows_by_inputs(data: np.ndarray, filter_func: Callable[[np.ndarray], bool]):
45
return data[np.apply_along_axis(filter_func, 1, data)]
56

67
def calculate_confusion_matrix(test: np.array, truth: np.array, positive_class: int) -> dict:

src/core/metrics/fairness/group/disparate_impact_ratio.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# pylint: disable=line-too-long
2-
from typing import List, Any
2+
from typing import List, Any, Union
33

44
import numpy as np
55

@@ -33,8 +33,8 @@ def calculate_model(
3333

3434
@staticmethod
3535
def calculate(
36-
privileged: np.ndarray,
37-
unprivileged: np.ndarray,
36+
privileged: Union[int, np.ndarray],
37+
unprivileged: Union[int, np.ndarray],
3838
favorable_output: int
3939
) -> float:
4040
"""

src/core/metrics/fairness/group/group_average_odds_difference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def calculate_model(
2323
:param samples a NumPy arrary of inputs to be used for testing fairness
2424
:param model the model to be tested for fairness
2525
:param privilege_columns a list of integers specifying the indices of the privileged columns
26-
:param privilege_values a list of intergers specifying the privileged values
27-
:param postive_class the favorable / desirable outputs
26+
:param privilege_values a list of integers specifying the privileged values
27+
:param positive_class the favorable / desirable outputs
2828
:param output_column the column index where the output is located
2929
return group average odds difference score
3030
"""

src/core/metrics/fairness/group/group_average_predictive_value_difference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,5 @@ def privilege_filter(row):
5858
utp, utn, ufp, ufn = ucm["tp"], ucm["tn"], ucm["fp"], ucm["fn"]
5959
ptp, ptn, pfp, pfn = pcm["tp"], pcm["tn"], pcm["fp"], pcm["fn"]
6060

61-
return (utp / (utp + ufp) - ptp / (ptp + pfp + 1e-10)) / 2 + \
62-
(ufn / (ufn + utn) - pfn / (pfn + ptn + 1e-10)) / 2
61+
return (utp / (utp + ufp + 1e-10) - ptp / (ptp + pfp + 1e-10)) / 2 + \
62+
(ufn / (ufn + utn + 1e-10) - pfn / (pfn + ptn + 1e-10)) / 2

src/core/metrics/fairness/group/group_statistical_parity_difference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ def calculate_model(
2626
"""
2727
outputs = model.predict(samples)
2828
data = np.append(samples, outputs, axis=1)
29-
privileged = data[np.where(data[:, privilege_columns] == privilege_values)]
30-
unprivileged = data[np.where(data[:, privilege_columns] != privilege_values)]
29+
privileged = np.all(data[:, privilege_columns] == privilege_values, axis=1)
30+
unprivileged = np.all(data[:, privilege_columns] != privilege_values, axis=1)
3131

3232
return GroupStatisticalParityDifference.calculate(privileged, unprivileged, favorable_output)
3333

tests/data/bank_churn_train.csv

Lines changed: 10001 additions & 0 deletions
Large diffs are not rendered by default.

tests/metrics/test_fairness.py

Lines changed: 139 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
11
# pylint: disable=line-too-long, missing-function-docstring
2-
from typing import List, Optional
3-
42
from pytest import approx
53
import numpy as np
64
import pandas as pd
75

86
from sklearn.linear_model import LogisticRegression
7+
from sklearn.neighbors import NearestNeighbors
98
from sklearn.preprocessing import LabelEncoder
109

1110
from aif360.sklearn.metrics import (
1211
disparate_impact_ratio,
1312
statistical_parity_difference,
1413
average_odds_difference,
1514
average_predictive_value_difference,
15+
consistency_score
1616
)
1717

1818
from src.core.metrics.fairness.group.disparate_impact_ratio import DisparateImpactRatio
1919
from src.core.metrics.fairness.group.group_average_odds_difference import GroupAverageOddsDifference
2020
from src.core.metrics.fairness.group.group_average_predictive_value_difference import GroupAveragePredictiveValueDifference
2121
from src.core.metrics.fairness.group.group_statistical_parity_difference import GroupStatisticalParityDifference
22+
from src.core.metrics.fairness.individual.individual_consistency import IndividualConsistency
2223

2324
df = pd.read_csv(
24-
"https://raw.githubusercontent.com/trustyai-explainability/model-collection/8aa8e2e762c6d2b41dbcbe8a0035d50aa5f58c93/bank-churn/data/train.csv",
25+
"tests/data/bank_churn_train.csv",
2526
)
2627
X = df.drop(columns=["Exited"], axis=1)
2728
y = df["Exited"]
@@ -34,8 +35,7 @@ def train_model():
3435
X[feature] = label_encoders[feature].fit_transform(X[feature])
3536
lr = LogisticRegression().fit(X, y)
3637

37-
y_pred = pd.DataFrame(lr.predict(X))
38-
return y_pred
38+
return pd.DataFrame(lr.predict(X))
3939

4040
def truth_predict_output():
4141
y.index = X["Gender"]
@@ -58,20 +58,92 @@ def get_labeled_data():
5858
data_pred[:, -1] = y_pred.to_numpy().flatten()
5959
return data, data_pred
6060

61+
62+
def get_k_neighbors_function(k_value=5):
63+
"""Create a function that returns k nearest neighbors for a given input."""
64+
65+
def find_neighbors(sample, samples):
66+
"""Find k nearest neighbors for a given sample."""
67+
if isinstance(sample, np.ndarray) and sample.ndim > 1:
68+
sample = sample.flatten()
69+
70+
nbrs = NearestNeighbors(n_neighbors=k_value + 1, algorithm='ball_tree').fit(samples)
71+
distances, indices = nbrs.kneighbors([sample])
72+
73+
neighbor_indices = indices[0][1:k_value + 1]
74+
return samples[neighbor_indices]
75+
76+
return find_neighbors
77+
78+
79+
def get_processed_data(sample_size=None):
80+
"""Process data for testing individual consistency."""
81+
categorical_features = ['Geography', 'Gender', 'Card Type', 'HasCrCard', 'IsActiveMember', 'Complain']
82+
X_processed = X.copy()
83+
for feature in categorical_features:
84+
if feature in X_processed.columns:
85+
le = LabelEncoder()
86+
X_processed[feature] = le.fit_transform(X_processed[feature])
87+
88+
if sample_size is not None:
89+
return X_processed.to_numpy()[:sample_size]
90+
return X_processed.to_numpy()
91+
92+
93+
class MockPredictionProvider:
94+
"""Mock prediction provider for testing."""
95+
96+
def __init__(self, predictions):
97+
self.predictions = predictions
98+
99+
def predict(self, x):
100+
"""Return prediction for input."""
101+
if isinstance(x, np.ndarray) and x.ndim == 1:
102+
x = x.reshape(1, -1)
103+
104+
result = []
105+
for i in range(x.shape[0]):
106+
if i < len(self.predictions):
107+
result.append([self.predictions[i][0]])
108+
else:
109+
result.append([0])
110+
return result
111+
112+
113+
class PerfectConsistencyProvider:
114+
"""Provider that always returns the same prediction."""
115+
116+
def predict(self, x):
117+
if isinstance(x, np.ndarray) and x.ndim == 1:
118+
x = x.reshape(1, -1)
119+
return [[1] for _ in range(x.shape[0])]
120+
121+
122+
class RandomPredictionProvider:
123+
"""Provider that returns random predictions."""
124+
125+
def __init__(self, seed=42):
126+
self.rng = np.random.RandomState(seed)
127+
128+
def predict(self, x):
129+
if isinstance(x, np.ndarray) and x.ndim == 1:
130+
x = x.reshape(1, -1)
131+
return [[self.rng.randint(0, 2)] for _ in range(x.shape[0])]
132+
61133
y, y_pred = truth_predict_output()
62134
privileged, unprivileged = get_privileged_unprivleged_split()
63135
data, data_pred = get_labeled_data()
64136

65137

66138
def test_disparate_impact_ratio():
67-
dir = disparate_impact_ratio(y, prot_attr="Gender", priv_group="Male", pos_label=1)
139+
dir_result = disparate_impact_ratio(y, prot_attr="Gender", priv_group="Male", pos_label=1)
68140

69141
score = DisparateImpactRatio.calculate(
70142
privileged=privileged,
71143
unprivileged=unprivileged,
72144
favorable_output=1
73145
)
74-
assert score == approx(dir, abs=1e-5)
146+
assert score == approx(dir_result, abs=1e-5)
75147

76148

77149
def test_statistical_parity_difference():
@@ -98,7 +170,7 @@ def test_average_odds_difference():
98170
output_column=-1
99171
)
100172

101-
assert score == approx(aod, abs=0.2)
173+
assert score == approx(aod, abs=1e-5)
102174

103175

104176
def test_average_predictive_value_difference():
@@ -114,3 +186,62 @@ def test_average_predictive_value_difference():
114186
)
115187

116188
assert score == approx(apvd, abs=0.2)
189+
190+
191+
def test_individual_consistency():
192+
"""Test individual consistency calculation using AIF360's consistency_score as ground truth."""
193+
X_sample = get_processed_data(sample_size=50)
194+
y_pred_sample = y_pred.iloc[:50].to_numpy()
195+
196+
k = 5
197+
cs_score = consistency_score(X_sample, y_pred_sample.flatten())
198+
199+
prediction_provider = MockPredictionProvider(y_pred_sample)
200+
proximity_function = get_k_neighbors_function(k)
201+
202+
score = IndividualConsistency.calculate(
203+
proximity_function=proximity_function,
204+
samples=X_sample,
205+
prediction_provider=prediction_provider
206+
)
207+
208+
assert score == approx(cs_score, abs=0.2)
209+
210+
211+
def test_individual_consistency_perfect():
212+
"""Test individual consistency with a perfect consistency model."""
213+
X_sample = get_processed_data(sample_size=20)
214+
215+
perfect_predictions = np.ones(20)
216+
217+
cs_score = consistency_score(X_sample, perfect_predictions)
218+
219+
proximity_function = get_k_neighbors_function(3)
220+
221+
consistency = IndividualConsistency.calculate(
222+
proximity_function=proximity_function,
223+
samples=X_sample,
224+
prediction_provider=PerfectConsistencyProvider()
225+
)
226+
227+
assert consistency == approx(cs_score, abs=0.2)
228+
229+
230+
def test_individual_consistency_imperfect():
231+
"""Test individual consistency with an inconsistent model."""
232+
X_sample = get_processed_data(sample_size=20)
233+
234+
rng = np.random.RandomState(42)
235+
random_predictions = rng.randint(0, 2, size=20)
236+
237+
cs_score = consistency_score(X_sample, random_predictions)
238+
239+
proximity_function = get_k_neighbors_function(3)
240+
241+
consistency = IndividualConsistency.calculate(
242+
proximity_function=proximity_function,
243+
samples=X_sample,
244+
prediction_provider=RandomPredictionProvider(seed=42)
245+
)
246+
247+
assert consistency == approx(cs_score, abs=0.2)

0 commit comments

Comments
 (0)