Merge pull request #56 from NGO-Algorithm-Audit/feature/additional-text-and-report-data-march-2025-part-2

devhelpr · web-flow · commit 6b553c128cf9 · 2025-03-20T21:22:46.000+01:00
Feature/additional text and report data march 2025 part 2
diff --git a/src/assets/synthetic-data.tsx b/src/assets/synthetic-data.tsx
@@ -161,7 +161,8 @@ def run():
     missingness_dict = md_handler.detect_missingness(real_data)
     print("Detected Missingness Type:", missingness_dict)
 
-    missingness_dict_df = pd.DataFrame(missingness_dict, index=[0])
+    missingness_dict_df = pd.DataFrame(list(missingness_dict.items()), columns=['key', 'value'])
+    missingness_dict_df = missingness_dict_df.rename(columns={'key': 'Column', 'value': 'Missing data type'})
 
     df_imputed = md_handler.apply_imputation(real_data, missingness_dict)
     
@@ -255,20 +256,35 @@ def run():
     report_df = report.generate_report()    
     print('report_df:', report_df)
 
+    
+
     # combine empty synthetic data with original data and with encoded data 
     combined_data = pd.concat((df_imputed.assign(realOrSynthetic='real'), synthetic_data.assign(realOrSynthetic='synthetic')), keys=['real','synthetic'], names=['Data'])
 
+    metrics_list = []
+
+    # Loop through column_dtypes
     for column in column_dtypes:
         if column_dtypes[column] == 'categorical':
             reg_efficacy = EfficacyMetrics(task='classification', target_column=column)
-            reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
-            print("=== Regression Efficacy Metrics ===", column)
-            print(reg_metrics)
         else:
             reg_efficacy = EfficacyMetrics(task='regression', target_column=column)
-            reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
-            print("=== Regression Efficacy Metrics ===", column)
-            print(reg_metrics)
+        
+
+        reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
+        reg_metrics['dataType'] = column_dtypes[column]
+    
+        # Append the column name and its metrics as a dictionary
+        reg_metrics['column'] = column  # Add column name to the metrics dictionary
+        
+        metrics_list.append(reg_metrics)
+
+    # Convert list of dictionaries to DataFrame
+    metrics_df = pd.DataFrame(metrics_list)
+    columns_order = ['dataType'] + [col for col in metrics_df.columns if col != 'dataType']
+    metrics_df = metrics_df[columns_order]
+    columns_order = ['column'] + [col for col in metrics_df.columns if col != 'column']
+    metrics_df = metrics_df[columns_order]
 
     clf_efficacy = EfficacyMetrics(task='classification', target_column="bar")
     clf_metrics = clf_efficacy.evaluate(df_imputed, synthetic_data)
@@ -335,6 +351,12 @@ def run():
                     'contentType' : 'correlationSyntheticData'
                 }]
             },
+            {
+                'reportType': 'table',
+                'titleKey': 'syntheticData.efficacyMetricsTitle',
+                'showIndex' : False,
+                'data': metrics_df.to_json(orient="records"),                
+            },
             {
                 'reportType': 'table',
                 'titleKey': 'syntheticData.disclosureProtectionTitle',
diff --git a/src/components/CSVReader.tsx b/src/components/CSVReader.tsx
@@ -52,6 +52,9 @@ export default function CSVReader({ onChange }: csvReader) {
     const { acceptedFiles, getRootProps, getInputProps } = useDropzone({
         onDrop,
         maxFiles: 1,
+        accept: {
+            'text/csv': ['.csv'],
+        },
     });
 
     if (acceptedFiles.length > 0) {
@@ -91,7 +94,7 @@ export default function CSVReader({ onChange }: csvReader) {
             {...getRootProps()}
             className="border-aaDark border-dashed border-2 cursor-pointer rounded-xl text-center p-10 lg:min-w-[400px]"
         >
-            <input {...getInputProps()} multiple={false} />
+            <input {...getInputProps()} multiple={false} accept=".csv" />
 
             <FormLabel>{t('dropzoneLabel')}</FormLabel>
         </FormItem>
diff --git a/src/locales/en.json b/src/locales/en.json
@@ -135,6 +135,7 @@
         "evaluationOfGeneratedDataTitle": "4. Evaluation of generated data",
         "diagnosticsReportTitle": "5. Diagnostic Report",
         "diagnosticsTitle": "Diagnostic Results",
+        "efficacyMetricsTitle": "Efficacy metrics",
         "disclosureProtectionTitle": "Disclosure protection",
         "bivariateDistributionSyntheticDataTitle": "6. Bivariate distributions",
         "outputDataTitle": "7. Generated synthetic data",
diff --git a/src/locales/nl.json b/src/locales/nl.json
@@ -135,6 +135,7 @@
         "evaluationOfGeneratedDataTitle": "4. Evaluatie van gegenereerde data",
         "diagnosticsReportTitle": "6. Diagnostisch rapport",
         "diagnosticsTitle": "Diagnostische Resultaten",
+        "efficacyMetricsTitle": "Doeltreffendheid metrieken",
         "disclosureProtectionTitle": "Disclosure protection",
         "bivariateDistributionSyntheticDataTitle": "6. Bivariate distributies",
         "outputDataTitle": "7. Output data",