Skip to content

Commit 6b553c1

Browse files
authored
Merge pull request #56 from NGO-Algorithm-Audit/feature/additional-text-and-report-data-march-2025-part-2
Feature/additional text and report data march 2025 part 2
2 parents fa44e70 + 92ef762 commit 6b553c1

File tree

4 files changed

+35
-8
lines changed

4 files changed

+35
-8
lines changed

src/assets/synthetic-data.tsx

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ def run():
161161
missingness_dict = md_handler.detect_missingness(real_data)
162162
print("Detected Missingness Type:", missingness_dict)
163163
164-
missingness_dict_df = pd.DataFrame(missingness_dict, index=[0])
164+
missingness_dict_df = pd.DataFrame(list(missingness_dict.items()), columns=['key', 'value'])
165+
missingness_dict_df = missingness_dict_df.rename(columns={'key': 'Column', 'value': 'Missing data type'})
165166
166167
df_imputed = md_handler.apply_imputation(real_data, missingness_dict)
167168
@@ -255,20 +256,35 @@ def run():
255256
report_df = report.generate_report()
256257
print('report_df:', report_df)
257258
259+
260+
258261
# combine empty synthetic data with original data and with encoded data
259262
combined_data = pd.concat((df_imputed.assign(realOrSynthetic='real'), synthetic_data.assign(realOrSynthetic='synthetic')), keys=['real','synthetic'], names=['Data'])
260263
264+
metrics_list = []
265+
266+
# Loop through column_dtypes
261267
for column in column_dtypes:
262268
if column_dtypes[column] == 'categorical':
263269
reg_efficacy = EfficacyMetrics(task='classification', target_column=column)
264-
reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
265-
print("=== Regression Efficacy Metrics ===", column)
266-
print(reg_metrics)
267270
else:
268271
reg_efficacy = EfficacyMetrics(task='regression', target_column=column)
269-
reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
270-
print("=== Regression Efficacy Metrics ===", column)
271-
print(reg_metrics)
272+
273+
274+
reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
275+
reg_metrics['dataType'] = column_dtypes[column]
276+
277+
# Append the column name and its metrics as a dictionary
278+
reg_metrics['column'] = column # Add column name to the metrics dictionary
279+
280+
metrics_list.append(reg_metrics)
281+
282+
# Convert list of dictionaries to DataFrame
283+
metrics_df = pd.DataFrame(metrics_list)
284+
columns_order = ['dataType'] + [col for col in metrics_df.columns if col != 'dataType']
285+
metrics_df = metrics_df[columns_order]
286+
columns_order = ['column'] + [col for col in metrics_df.columns if col != 'column']
287+
metrics_df = metrics_df[columns_order]
272288
273289
clf_efficacy = EfficacyMetrics(task='classification', target_column="bar")
274290
clf_metrics = clf_efficacy.evaluate(df_imputed, synthetic_data)
@@ -335,6 +351,12 @@ def run():
335351
'contentType' : 'correlationSyntheticData'
336352
}]
337353
},
354+
{
355+
'reportType': 'table',
356+
'titleKey': 'syntheticData.efficacyMetricsTitle',
357+
'showIndex' : False,
358+
'data': metrics_df.to_json(orient="records"),
359+
},
338360
{
339361
'reportType': 'table',
340362
'titleKey': 'syntheticData.disclosureProtectionTitle',

src/components/CSVReader.tsx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ export default function CSVReader({ onChange }: csvReader) {
5252
const { acceptedFiles, getRootProps, getInputProps } = useDropzone({
5353
onDrop,
5454
maxFiles: 1,
55+
accept: {
56+
'text/csv': ['.csv'],
57+
},
5558
});
5659

5760
if (acceptedFiles.length > 0) {
@@ -91,7 +94,7 @@ export default function CSVReader({ onChange }: csvReader) {
9194
{...getRootProps()}
9295
className="border-aaDark border-dashed border-2 cursor-pointer rounded-xl text-center p-10 lg:min-w-[400px]"
9396
>
94-
<input {...getInputProps()} multiple={false} />
97+
<input {...getInputProps()} multiple={false} accept=".csv" />
9598

9699
<FormLabel>{t('dropzoneLabel')}</FormLabel>
97100
</FormItem>

src/locales/en.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
"evaluationOfGeneratedDataTitle": "4. Evaluation of generated data",
136136
"diagnosticsReportTitle": "5. Diagnostic Report",
137137
"diagnosticsTitle": "Diagnostic Results",
138+
"efficacyMetricsTitle": "Efficacy metrics",
138139
"disclosureProtectionTitle": "Disclosure protection",
139140
"bivariateDistributionSyntheticDataTitle": "6. Bivariate distributions",
140141
"outputDataTitle": "7. Generated synthetic data",

src/locales/nl.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
"evaluationOfGeneratedDataTitle": "4. Evaluatie van gegenereerde data",
136136
"diagnosticsReportTitle": "6. Diagnostisch rapport",
137137
"diagnosticsTitle": "Diagnostische Resultaten",
138+
"efficacyMetricsTitle": "Doeltreffendheid metrieken",
138139
"disclosureProtectionTitle": "Disclosure protection",
139140
"bivariateDistributionSyntheticDataTitle": "6. Bivariate distributies",
140141
"outputDataTitle": "7. Output data",

0 commit comments

Comments
 (0)