diff --git a/CHANGELOG.md b/CHANGELOG.md index fc52dd2..64e757d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## Version 1.0.9 - 2025-06 +- Fix bug when dropping rows during the preprocessing + ## Version 1.0.8 - 2022-09-13 - In the model view, use the code env that was used to train the model - No need to build a Python 3.6 code env anymore diff --git a/plugin.json b/plugin.json index c7a9a08..be321fb 100755 --- a/plugin.json +++ b/plugin.json @@ -1,10 +1,10 @@ { "id": "model-fairness-report", - "version": "1.0.8", + "version": "1.0.9", "meta": { "label": "Model Fairness Report", "description": "Get insight on the fairness of a model.", - "author": "Dataiku (Du PHAN)", + "author": "Dataiku", "icon": "icon-group", "licenseInfo": "Apache Software License", "url": "https://www.dataiku.com/product/plugins/model-fairness-report/", diff --git a/python-lib/dku_webapp/utils.py b/python-lib/dku_webapp/utils.py index 1a867b4..df01eee 100644 --- a/python-lib/dku_webapp/utils.py +++ b/python-lib/dku_webapp/utils.py @@ -93,13 +93,13 @@ def get_histograms(model_accessor, advantageous_outcome, sensitive_column): test_df = raw_test_df.dropna(subset=[sensitive_column]) target_variable = model_accessor.get_target_variable() - y_true = test_df.loc[:, target_variable] pred_df = model_accessor.predict(test_df) + y_true = test_df.iloc[pred_df.index].loc[:, target_variable] y_pred = pred_df.loc[:, DkuWebappConstants.PREDICTION] advantageous_outcome_proba_col = 'proba_{}'.format(advantageous_outcome) y_pred_proba = pred_df.loc[:, advantageous_outcome_proba_col] - sensitive_feature_values = test_df[sensitive_column] + sensitive_feature_values = test_df.iloc[pred_df.index][sensitive_column] return get_histogram_data(y_true, y_pred, y_pred_proba, advantageous_outcome, sensitive_feature_values) @@ -109,11 +109,10 @@ def get_metrics(model_accessor, advantageous_outcome, sensitive_column, referenc target_variable = model_accessor.get_target_variable() test_df.dropna(subset=[sensitive_column, target_variable], how='any', inplace=True) - y_true = test_df.loc[:, target_variable] pred_df = model_accessor.predict(test_df) + y_true = test_df.iloc[pred_df.index].loc[:, target_variable] y_pred = pred_df.loc[:, DkuWebappConstants.PREDICTION] - try: # check whether or not the column can be casted to int if np.array_equal(test_df[sensitive_column], test_df[sensitive_column].astype(int)): test_df[sensitive_column] = test_df[sensitive_column].astype(int) @@ -124,8 +123,8 @@ def get_metrics(model_accessor, advantageous_outcome, sensitive_column, referenc except Exception as e: logger.info('Sensitive column can not be casted to int. ', e) pass - - sensitive_feature_values = test_df[sensitive_column] + + sensitive_feature_values = test_df.iloc[pred_df.index][sensitive_column] model_report = ModelFairnessMetricReport(y_true, y_pred, sensitive_feature_values, advantageous_outcome) population_names = sensitive_feature_values.unique()