Skip to content

Commit 4033a18

Browse files
authored
Merge pull request #48 from NGO-Algorithm-Audit/feature/diagnostics-for-new-python-synthpop
Feature/diagnostics for new python synthpop
2 parents ae1b5e0 + 4e3b25e commit 4033a18

File tree

3 files changed

+29
-24
lines changed

3 files changed

+29
-24
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Run `npm run dev` to start the development server.
88
- upgrade files in public/pyodide
99
- download from https://github.com/pyodide/pyodide/releases and extract the files
1010
- download pyodide and pyodide-core
11-
- place only the needed files from pyodide in the public/pyodide folder
12-
- place all files from pyodide-core in the public/pyodide folder
11+
- place only the needed files from pyodide in the public/pyodide-[version] folder
12+
- place all files from pyodide-core in the public/pyodide-[version] folder
13+
- change the use-worker.ts file to point to the new version
1314

src/assets/synthetic-data.tsx

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -292,14 +292,21 @@ def run():
292292
293293
# Convert categorical variables to numerical values
294294
df_encoded = real_data.copy()
295-
df_encoded['sex'] = df_encoded['sex'].astype('category').cat.codes
296-
df_encoded['race1'] = df_encoded['race1'].astype('category').cat.codes
297-
df_encoded['bar'] = df_encoded['bar'].astype('category').cat.codes
298-
299295
synth_df_encoded = synthetic_data.copy()
300-
synth_df_encoded['sex'] = synth_df_encoded['sex'].astype('category').cat.codes
301-
synth_df_encoded['race1'] = synth_df_encoded['race1'].astype('category').cat.codes
302-
synth_df_encoded['bar'] = synth_df_encoded['bar'].astype('category').cat.codes
296+
297+
for column in column_dtypes:
298+
if column_dtypes[column] == 'categorical':
299+
df_encoded[column] = df_encoded[column].astype('category').cat.codes
300+
synth_df_encoded[column] = synth_df_encoded[column].astype('category').cat.codes
301+
302+
# df_encoded['sex'] = df_encoded['sex'].astype('category').cat.codes
303+
# df_encoded['race1'] = df_encoded['race1'].astype('category').cat.codes
304+
# df_encoded['bar'] = df_encoded['bar'].astype('category').cat.codes
305+
306+
# synth_df_encoded = synthetic_data.copy()
307+
# synth_df_encoded['sex'] = synth_df_encoded['sex'].astype('category').cat.codes
308+
# synth_df_encoded['race1'] = synth_df_encoded['race1'].astype('category').cat.codes
309+
# synth_df_encoded['bar'] = synth_df_encoded['bar'].astype('category').cat.codes
303310
304311
# Output some results
305312
print("Original Data (first 5 rows):", real_data.head())
@@ -312,7 +319,10 @@ def run():
312319
313320
# results = run_diagnostic(real_data, synthetic_data, target_column='gpa')
314321
# print('Results:', results)
315-
322+
323+
report = MetricsReport(real_data, synthetic_data, metadata)
324+
report_df = report.generate_report()
325+
print('report_df:', report_df)
316326
317327
# combine empty synthetic data with original data and with encoded data
318328
combined_data = pd.concat((real_data.assign(realOrSynthetic='real'), synthetic_data.assign(realOrSynthetic='synthetic')), keys=['real','synthetic'], names=['Data'])
@@ -339,22 +349,15 @@ def run():
339349
'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
340350
},
341351
{'reportType': 'univariateDistributionSyntheticData'},
342-
# {
343-
# 'reportType': 'table',
344-
# 'titleKey': 'syntheticData.diagnosticsTitle',
345-
# 'showIndex' : False,
346-
# 'data': json.dumps([
347-
# {
348-
# 'attribute': key,
349-
# 'ks_stat': values['ks_stat'],
350-
# 'p_value': values['p_value']
351-
# }
352-
# for key, values in results['distribution_results'].items()
353-
# ]),
352+
{
353+
'reportType': 'table',
354+
'titleKey': 'syntheticData.diagnosticsTitle',
355+
'showIndex' : False,
356+
'data': report_df.to_json(orient="records"),
354357
# 'postContent': json.dumps([{
355358
# 'contentType' : 'correlationSyntheticData'
356359
# }])
357-
#},
360+
},
358361
{'reportType': 'bivariateDistributionSyntheticData'}
359362
]
360363
}))

src/components/DistributionReport.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ export const DistributionReport = (
332332
};
333333
}
334334
);
335+
335336
return (
336337
<div key={column + column2}>
337338
<GroupBarChart
@@ -443,7 +444,7 @@ export const DistributionReport = (
443444
);
444445

445446
return (
446-
<div>
447+
<div key={column + column2}>
447448
<h2 className="text-center font-bold mt-2 text-[12px]">
448449
{column} vs {column2}
449450
</h2>

0 commit comments

Comments
 (0)