22# pylint: disable = import-error
33from typing import List , Optional , Any , Union
44
5+ import numpy as np
56import pandas as pd
67from jpype import JInt
78from org .kie .trustyai .explainability .metrics import FairnessMetrics
89
910from trustyai .model import Value , PredictionProvider , Model
1011from trustyai .utils .data_conversions import (
11- pandas_to_trusty ,
1212 OneOutputUnionType ,
1313 one_output_convert ,
14+ to_trusty_dataframe ,
1415)
1516
1617ColumSelector = Union [List [int ], List [str ]]
1718
1819
1920def _column_selector_to_index (columns : ColumSelector , dataframe : pd .DataFrame ):
21+ """Returns a list of input and output indices, given an index size and output indices"""
2022 if len (columns ) == 0 :
2123 raise ValueError ("Must specify at least one column" )
2224
@@ -27,32 +29,40 @@ def _column_selector_to_index(columns: ColumSelector, dataframe: pd.DataFrame):
2729
2830
2931def statistical_parity_difference (
30- privileged : pd .DataFrame ,
31- unprivileged : pd .DataFrame ,
32+ privileged : Union [ pd .DataFrame , np . ndarray ] ,
33+ unprivileged : Union [ pd .DataFrame , np . ndarray ] ,
3234 favorable : OneOutputUnionType ,
3335 outputs : Optional [List [int ]] = None ,
36+ feature_names : Optional [List [str ]] = None ,
3437) -> float :
3538 """Calculate Statistical Parity Difference between privileged and unprivileged dataframes"""
3639 favorable_prediction_object = one_output_convert (favorable )
3740 return FairnessMetrics .groupStatisticalParityDifference (
38- pandas_to_trusty (privileged , outputs ),
39- pandas_to_trusty (unprivileged , outputs ),
41+ to_trusty_dataframe (
42+ data = privileged , outputs = outputs , feature_names = feature_names
43+ ),
44+ to_trusty_dataframe (
45+ data = unprivileged , outputs = outputs , feature_names = feature_names
46+ ),
4047 favorable_prediction_object .outputs ,
4148 )
4249
4350
44- # pylint: disable = line-too-long
51+ # pylint: disable = line-too-long, too-many-arguments
4552def statistical_parity_difference_model (
46- samples : pd .DataFrame ,
53+ samples : Union [ pd .DataFrame , np . ndarray ] ,
4754 model : Union [PredictionProvider , Model ],
4855 privilege_columns : ColumSelector ,
4956 privilege_values : List [Any ],
5057 favorable : OneOutputUnionType ,
58+ feature_names : Optional [List [str ]] = None ,
5159) -> float :
5260 """Calculate Statistical Parity Difference using a samples dataframe and a model"""
5361 favorable_prediction_object = one_output_convert (favorable )
5462 _privilege_values = [Value (v ) for v in privilege_values ]
55- _jsamples = pandas_to_trusty (samples , no_outputs = True )
63+ _jsamples = to_trusty_dataframe (
64+ data = samples , no_outputs = True , feature_names = feature_names
65+ )
5666 return FairnessMetrics .groupStatisticalParityDifference (
5767 _jsamples ,
5868 model ,
@@ -63,32 +73,40 @@ def statistical_parity_difference_model(
6373
6474
6575def disparate_impact_ratio (
66- privileged : pd .DataFrame ,
67- unprivileged : pd .DataFrame ,
76+ privileged : Union [ pd .DataFrame , np . ndarray ] ,
77+ unprivileged : Union [ pd .DataFrame , np . ndarray ] ,
6878 favorable : OneOutputUnionType ,
6979 outputs : Optional [List [int ]] = None ,
80+ feature_names : Optional [List [str ]] = None ,
7081) -> float :
7182 """Calculate Disparate Impact Ration between privileged and unprivileged dataframes"""
7283 favorable_prediction_object = one_output_convert (favorable )
7384 return FairnessMetrics .groupDisparateImpactRatio (
74- pandas_to_trusty (privileged , outputs ),
75- pandas_to_trusty (unprivileged , outputs ),
85+ to_trusty_dataframe (
86+ data = privileged , outputs = outputs , feature_names = feature_names
87+ ),
88+ to_trusty_dataframe (
89+ data = unprivileged , outputs = outputs , feature_names = feature_names
90+ ),
7691 favorable_prediction_object .outputs ,
7792 )
7893
7994
8095# pylint: disable = line-too-long
8196def disparate_impact_ratio_model (
82- samples : pd .DataFrame ,
97+ samples : Union [ pd .DataFrame , np . ndarray ] ,
8398 model : Union [PredictionProvider , Model ],
8499 privilege_columns : ColumSelector ,
85100 privilege_values : List [Any ],
86101 favorable : OneOutputUnionType ,
102+ feature_names : Optional [List [str ]] = None ,
87103) -> float :
88104 """Calculate Disparate Impact Ration using a samples dataframe and a model"""
89105 favorable_prediction_object = one_output_convert (favorable )
90106 _privilege_values = [Value (v ) for v in privilege_values ]
91- _jsamples = pandas_to_trusty (samples , no_outputs = True )
107+ _jsamples = to_trusty_dataframe (
108+ data = samples , no_outputs = True , feature_names = feature_names
109+ )
92110 return FairnessMetrics .groupDisparateImpactRatio (
93111 _jsamples ,
94112 model ,
@@ -100,12 +118,13 @@ def disparate_impact_ratio_model(
100118
101119# pylint: disable = too-many-arguments
102120def average_odds_difference (
103- test : pd .DataFrame ,
104- truth : pd .DataFrame ,
121+ test : Union [ pd .DataFrame , np . ndarray ] ,
122+ truth : Union [ pd .DataFrame , np . ndarray ] ,
105123 privilege_columns : ColumSelector ,
106124 privilege_values : OneOutputUnionType ,
107125 positive_class : List [Any ],
108126 outputs : Optional [List [int ]] = None ,
127+ feature_names : Optional [List [str ]] = None ,
109128) -> float :
110129 """Calculate Average Odds between two dataframes"""
111130 if test .shape != truth .shape :
@@ -117,23 +136,26 @@ def average_odds_difference(
117136 # determine privileged columns
118137 _privilege_columns = _column_selector_to_index (privilege_columns , test )
119138 return FairnessMetrics .groupAverageOddsDifference (
120- pandas_to_trusty ( test , outputs ),
121- pandas_to_trusty ( truth , outputs ),
139+ to_trusty_dataframe ( data = test , outputs = outputs , feature_names = feature_names ),
140+ to_trusty_dataframe ( data = truth , outputs = outputs , feature_names = feature_names ),
122141 _privilege_columns ,
123142 _privilege_values ,
124143 _positive_class ,
125144 )
126145
127146
128147def average_odds_difference_model (
129- samples : pd .DataFrame ,
148+ samples : Union [ pd .DataFrame , np . ndarray ] ,
130149 model : Union [PredictionProvider , Model ],
131150 privilege_columns : ColumSelector ,
132151 privilege_values : List [Any ],
133152 positive_class : List [Any ],
153+ feature_names : Optional [List [str ]] = None ,
134154) -> float :
135155 """Calculate Average Odds for a sample dataframe using the provided model"""
136- _jsamples = pandas_to_trusty (samples , no_outputs = True )
156+ _jsamples = to_trusty_dataframe (
157+ data = samples , no_outputs = True , feature_names = feature_names
158+ )
137159 _privilege_values = [Value (v ) for v in privilege_values ]
138160 _positive_class = [Value (v ) for v in positive_class ]
139161 # determine privileged columns
@@ -144,12 +166,13 @@ def average_odds_difference_model(
144166
145167
146168def average_predictive_value_difference (
147- test : pd .DataFrame ,
148- truth : pd .DataFrame ,
169+ test : Union [ pd .DataFrame , np . ndarray ] ,
170+ truth : Union [ pd .DataFrame , np . ndarray ] ,
149171 privilege_columns : ColumSelector ,
150172 privilege_values : List [Any ],
151173 positive_class : List [Any ],
152174 outputs : Optional [List [int ]] = None ,
175+ feature_names : Optional [List [str ]] = None ,
153176) -> float :
154177 """Calculate Average Predictive Value Difference between two dataframes"""
155178 if test .shape != truth .shape :
@@ -160,8 +183,8 @@ def average_predictive_value_difference(
160183 _positive_class = [Value (v ) for v in positive_class ]
161184 _privilege_columns = _column_selector_to_index (privilege_columns , test )
162185 return FairnessMetrics .groupAveragePredictiveValueDifference (
163- pandas_to_trusty ( test , outputs ),
164- pandas_to_trusty ( truth , outputs ),
186+ to_trusty_dataframe ( data = test , outputs = outputs , feature_names = feature_names ),
187+ to_trusty_dataframe ( data = truth , outputs = outputs , feature_names = feature_names ),
165188 _privilege_columns ,
166189 _privilege_values ,
167190 _positive_class ,
@@ -170,14 +193,14 @@ def average_predictive_value_difference(
170193
171194# pylint: disable = line-too-long
172195def average_predictive_value_difference_model (
173- samples : pd .DataFrame ,
196+ samples : Union [ pd .DataFrame , np . ndarray ] ,
174197 model : Union [PredictionProvider , Model ],
175198 privilege_columns : ColumSelector ,
176199 privilege_values : List [Any ],
177200 positive_class : List [Any ],
178201) -> float :
179202 """Calculate Average Predictive Value Difference for a sample dataframe using the provided model"""
180- _jsamples = pandas_to_trusty (samples , no_outputs = True )
203+ _jsamples = to_trusty_dataframe (samples , no_outputs = True )
181204 _privilege_values = [Value (v ) for v in privilege_values ]
182205 _positive_class = [Value (v ) for v in positive_class ]
183206 # determine privileged columns
0 commit comments