diff --git a/src/edvise/scripts/predictions_h2o.py b/src/edvise/scripts/predictions_h2o.py index 61b9ac95..5979268a 100644 --- a/src/edvise/scripts/predictions_h2o.py +++ b/src/edvise/scripts/predictions_h2o.py @@ -256,7 +256,7 @@ def run_predictions( *, run_type: RunType, df_inference: pd.DataFrame | None = None, - test_sample_cap: int = 200, + test_sample_cap: int | None = 200, ) -> PredOutputs: ft = load_features_table(pred_paths.features_table_path) model, model_feature_names = load_model_and_features(pred_cfg.model_run_id) @@ -268,12 +268,16 @@ def run_predictions( df_train, df_test_all = extract_and_split_training_data( pred_cfg.experiment_id, pred_cfg.split_col ) - df_test = sample_rows( - df_test_all, - min(test_sample_cap, len(df_test_all)), - pred_cfg.random_state, - "df_test(train)", - ) + if test_sample_cap is None: + # Use full test dataset when test_sample_cap is None + df_test = df_test_all.copy() + else: + df_test = sample_rows( + df_test_all, + min(test_sample_cap, len(df_test_all)), + pred_cfg.random_state, + "df_test(train)", + ) else: # PREDICT: inference input df_test = df_inference diff --git a/src/edvise/scripts/training_h2o.py b/src/edvise/scripts/training_h2o.py index 3396d9de..68257172 100644 --- a/src/edvise/scripts/training_h2o.py +++ b/src/edvise/scripts/training_h2o.py @@ -372,8 +372,13 @@ def make_predictions(self, current_run_path): label="Training SHAP Feature Importance table", ) + # Generate support_overview using full test dataset (same as ROC table) + logging.info("Generating support_overview table from full test dataset") + out_full = run_predictions( + pred_cfg=cfg, pred_paths=paths, run_type=RunType.TRAIN, test_sample_cap=None + ) self.write_delta( - df=out.support_score_distribution, + df=out_full.support_score_distribution, table_name_suffix=f"training_{self.cfg.model.run_id}_support_overview", label="Training Support Overview table", )