@@ -161,7 +161,8 @@ def run():
161161 missingness_dict = md_handler.detect_missingness(real_data)
162162 print("Detected Missingness Type:", missingness_dict)
163163
164- missingness_dict_df = pd.DataFrame(missingness_dict, index=[0])
164+ missingness_dict_df = pd.DataFrame(list(missingness_dict.items()), columns=['key', 'value'])
165+ missingness_dict_df = missingness_dict_df.rename(columns={'key': 'Column', 'value': 'Missing data type'})
165166
166167 df_imputed = md_handler.apply_imputation(real_data, missingness_dict)
167168
@@ -255,20 +256,35 @@ def run():
255256 report_df = report.generate_report()
256257 print('report_df:', report_df)
257258
259+
260+
258261 # combine empty synthetic data with original data and with encoded data
259262 combined_data = pd.concat((df_imputed.assign(realOrSynthetic='real'), synthetic_data.assign(realOrSynthetic='synthetic')), keys=['real','synthetic'], names=['Data'])
260263
264+ metrics_list = []
265+
266+ # Loop through column_dtypes
261267 for column in column_dtypes:
262268 if column_dtypes[column] == 'categorical':
263269 reg_efficacy = EfficacyMetrics(task='classification', target_column=column)
264- reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
265- print("=== Regression Efficacy Metrics ===", column)
266- print(reg_metrics)
267270 else:
268271 reg_efficacy = EfficacyMetrics(task='regression', target_column=column)
269- reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
270- print("=== Regression Efficacy Metrics ===", column)
271- print(reg_metrics)
272+
273+
274+ reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
275+ reg_metrics['dataType'] = column_dtypes[column]
276+
277+ # Append the column name and its metrics as a dictionary
278+ reg_metrics['column'] = column # Add column name to the metrics dictionary
279+
280+ metrics_list.append(reg_metrics)
281+
282+ # Convert list of dictionaries to DataFrame
283+ metrics_df = pd.DataFrame(metrics_list)
284+ columns_order = ['dataType'] + [col for col in metrics_df.columns if col != 'dataType']
285+ metrics_df = metrics_df[columns_order]
286+ columns_order = ['column'] + [col for col in metrics_df.columns if col != 'column']
287+ metrics_df = metrics_df[columns_order]
272288
273289 clf_efficacy = EfficacyMetrics(task='classification', target_column="bar")
274290 clf_metrics = clf_efficacy.evaluate(df_imputed, synthetic_data)
@@ -335,6 +351,12 @@ def run():
335351 'contentType' : 'correlationSyntheticData'
336352 }]
337353 },
354+ {
355+ 'reportType': 'table',
356+ 'titleKey': 'syntheticData.efficacyMetricsTitle',
357+ 'showIndex' : False,
358+ 'data': metrics_df.to_json(orient="records"),
359+ },
338360 {
339361 'reportType': 'table',
340362 'titleKey': 'syntheticData.disclosureProtectionTitle',
0 commit comments