@@ -292,14 +292,21 @@ def run():
292292
293293 # Convert categorical variables to numerical values
294294 df_encoded = real_data.copy()
295- df_encoded['sex'] = df_encoded['sex'].astype('category').cat.codes
296- df_encoded['race1'] = df_encoded['race1'].astype('category').cat.codes
297- df_encoded['bar'] = df_encoded['bar'].astype('category').cat.codes
298-
299295 synth_df_encoded = synthetic_data.copy()
300- synth_df_encoded['sex'] = synth_df_encoded['sex'].astype('category').cat.codes
301- synth_df_encoded['race1'] = synth_df_encoded['race1'].astype('category').cat.codes
302- synth_df_encoded['bar'] = synth_df_encoded['bar'].astype('category').cat.codes
296+
297+ for column in column_dtypes:
298+ if column_dtypes[column] == 'categorical':
299+ df_encoded[column] = df_encoded[column].astype('category').cat.codes
300+ synth_df_encoded[column] = synth_df_encoded[column].astype('category').cat.codes
301+
302+ # df_encoded['sex'] = df_encoded['sex'].astype('category').cat.codes
303+ # df_encoded['race1'] = df_encoded['race1'].astype('category').cat.codes
304+ # df_encoded['bar'] = df_encoded['bar'].astype('category').cat.codes
305+
306+ # synth_df_encoded = synthetic_data.copy()
307+ # synth_df_encoded['sex'] = synth_df_encoded['sex'].astype('category').cat.codes
308+ # synth_df_encoded['race1'] = synth_df_encoded['race1'].astype('category').cat.codes
309+ # synth_df_encoded['bar'] = synth_df_encoded['bar'].astype('category').cat.codes
303310
304311 # Output some results
305312 print("Original Data (first 5 rows):", real_data.head())
@@ -312,7 +319,10 @@ def run():
312319
313320 # results = run_diagnostic(real_data, synthetic_data, target_column='gpa')
314321 # print('Results:', results)
315-
322+
323+ report = MetricsReport(real_data, synthetic_data, metadata)
324+ report_df = report.generate_report()
325+ print('report_df:', report_df)
316326
317327 # combine empty synthetic data with original data and with encoded data
318328 combined_data = pd.concat((real_data.assign(realOrSynthetic='real'), synthetic_data.assign(realOrSynthetic='synthetic')), keys=['real','synthetic'], names=['Data'])
@@ -339,22 +349,15 @@ def run():
339349 'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
340350 },
341351 {'reportType': 'univariateDistributionSyntheticData'},
342- # {
343- # 'reportType': 'table',
344- # 'titleKey': 'syntheticData.diagnosticsTitle',
345- # 'showIndex' : False,
346- # 'data': json.dumps([
347- # {
348- # 'attribute': key,
349- # 'ks_stat': values['ks_stat'],
350- # 'p_value': values['p_value']
351- # }
352- # for key, values in results['distribution_results'].items()
353- # ]),
352+ {
353+ 'reportType': 'table',
354+ 'titleKey': 'syntheticData.diagnosticsTitle',
355+ 'showIndex' : False,
356+ 'data': report_df.to_json(orient="records"),
354357 # 'postContent': json.dumps([{
355358 # 'contentType' : 'correlationSyntheticData'
356359 # }])
357- # },
360+ },
358361 {'reportType': 'bivariateDistributionSyntheticData'}
359362 ]
360363 }))
0 commit comments