Merge pull request #24 from NGO-Algorithm-Audit/feature/content-headings-structure

devhelpr · web-flow · commit b5d3d7c25a25 · 2025-01-03T11:14:08.000+01:00
Feature/content headings structure
diff --git a/src/assets/synthetic-data.tsx b/src/assets/synthetic-data.tsx
@@ -125,9 +125,7 @@ def run():
 
     admissions_sub = admissions_df[['sex', 'race1', 'ugpa', 'bar']]
     real_data = admissions_sub.dropna()
-    setResult(json.dumps(
-            {'type': 'heading', 'data': sdgMethod}
-    ))
+    
     if isDemo: 
         setResult(json.dumps({
             'type': 'heading',
@@ -138,6 +136,11 @@ def run():
             'key': 'syntheticData.demo.description'
         }))
 
+
+    setResult(json.dumps({
+        'type': 'heading',
+        'headingKey': 'syntheticData.dataSetPreview.heading'
+    }))
     setResult(json.dumps(
         {'type': 'data-set-preview', 'data': ''}
     ))
@@ -207,27 +210,7 @@ def run():
 
     results = run_diagnostic(real_data, synthetic_data, target_column='gpa')  
     print('Results:', results)
-    setResult(json.dumps(
-        {'type': 'heading', 'data': 'Diagnostic Results:'}
-    ))
-    setResult(json.dumps({'type': 'table', 'data': json.dumps([
-        {
-            'attribute': key,
-            'ks_stat': values['ks_stat'],
-            'p_value': values['p_value']
-        }
-        for key, values in results['distribution_results'].items()
-    ])}))
-
-    setResult(json.dumps(
-        {'type': 'heading', 'data': 'Correlation difference: ' + str(results['correlation_diff']) }
-    ))
-
-    setResult(json.dumps(
-        {'type': 'heading', 'data': '5. Output data'}
-    ))
-    setResult(json.dumps({'type': 'table', 'data': synthetic_data.head().to_json(orient="records")}))
-
+    
 
     # copy dataframe and assign NaN to all values
     synth_df = real_data.copy()
@@ -247,7 +230,26 @@ def run():
         'realCorrelations': real_data.corr().to_json(orient="records"),
         'syntheticCorrelations': synthetic_data.corr().to_json(orient="records"),
         'reports' : [
-            'univariate', 'distribution', 'correlation'
+            {
+                'reportType': 'heading',
+                'headingKey': 'syntheticData.explanatoryDataAnalysisTitle'
+            },
+            {'reportType': 'univariate'},
+            {
+                'reportType': 'heading',
+                'headingKey': 'syntheticData.cartModelTitle'
+            },
+             {
+                'reportType': 'text',
+                'textKey': 'syntheticData.cartModelDescription'
+            },
+            {
+                'reportType': 'heading',
+                'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
+            },
+            {'reportType': 'distribution'},
+            {'reportType': 'correlation'},
+            
         ]
     }))
 
@@ -257,6 +259,44 @@ def run():
     #     'synthetic': synthetic_data.corr().to_json(orient="records")
     # }))
 
+    setResult(json.dumps(
+        {
+            'type': 'heading', 
+            'headingKey': 'syntheticData.diagnosticsTitle'
+        }
+    ))
+    setResult(json.dumps({'type': 'table', 
+        'showIndex' : False,
+        'data': json.dumps([
+            {
+                'attribute': key,
+                'ks_stat': values['ks_stat'],
+                'p_value': values['p_value']
+            }
+            for key, values in results['distribution_results'].items()
+        ])}))
+
+    setResult(json.dumps(
+        {
+            'type': 'heading', 
+            'headingKey': 'syntheticData.correlationDifference',
+            'params': {
+                'correlationDifference' : str(results['correlation_diff'])
+            }
+        }
+    ))
+
+    setResult(json.dumps({
+        'type': 'heading', 
+        'headingKey': 'syntheticData.outputDataTitle'        
+    }))
+
+    setResult(json.dumps({
+        'type': 'table', 
+        'showIndex': True,
+        'data': synthetic_data.head().to_json(orient="records")
+    }))
+
     return 
     
 
diff --git a/src/components/SimpleTable.tsx b/src/components/SimpleTable.tsx
@@ -12,7 +12,9 @@ import { useTranslation } from 'react-i18next';
 export default function SimpleTable({
     title,
     data,
+    showIndex,
 }: {
+    showIndex: boolean;
     title?: string;
     data: Record<string, string | number>[];
 }) {
@@ -26,6 +28,7 @@ export default function SimpleTable({
                 {title && <TableCaption>{t(title)}</TableCaption>}
                 <TableHeader>
                     <TableRow className="bg-aaLight">
+                        {showIndex && <TableHead></TableHead>}
                         {Object.keys(data[0]).map(key => (
                             <TableHead key={key} className="text-black">
                                 {key}
@@ -36,6 +39,7 @@ export default function SimpleTable({
                 <TableBody>
                     {data.map((row, i) => (
                         <TableRow key={i}>
+                            {showIndex && <TableCell>{i}</TableCell>}
                             {Object.values(row).map((value, i) => (
                                 <TableCell key={i}>{value}</TableCell>
                             ))}
diff --git a/src/components/componentMapper.tsx b/src/components/componentMapper.tsx
@@ -90,6 +90,7 @@ export default function ComponentMapper({
                                     <SimpleTable
                                         data={data.data.slice(0, 5)}
                                         title="datasetPreview"
+                                        showIndex={true}
                                     />
                                 )}
                             </Fragment>
@@ -101,6 +102,7 @@ export default function ComponentMapper({
                                 key={index}
                                 data={JSON.parse(resultItem.data)}
                                 title={t(resultItem.title)}
+                                showIndex={resultItem.showIndex ?? false}
                             />
                         );
 
@@ -167,17 +169,14 @@ export default function ComponentMapper({
                         );
 
                     case 'text':
-                        // Handle text that might need translation
-                        const textContent = resultItem.key
-                            ? t(resultItem.key, resultItem.params)
-                            : resultItem.data;
-
                         return (
                             <Markdown
                                 key={index}
                                 className="-mt-2 text-gray-800 markdown"
                             >
-                                {textContent}
+                                {resultItem.key
+                                    ? t(resultItem.key, resultItem.params)
+                                    : resultItem.data}
                             </Markdown>
                         );
                     case 'histogram': {
@@ -214,10 +213,58 @@ export default function ComponentMapper({
                         const dataTypes = JSON.parse(resultItem.dataTypes);
                         console.log('reports', resultItem.reports);
                         return (
-                            <div key={`distribution-${index}`}>
+                            <div
+                                key={`distribution-${index}`}
+                                className="flex flex-col gap-6"
+                            >
                                 {resultItem.reports.map(
-                                    (report: string, indexReport: number) => {
-                                        if (report === 'univariate') {
+                                    (
+                                        report: {
+                                            reportType: string;
+                                            headingKey?: string;
+                                            textKey?: string;
+                                            params?: Record<
+                                                string,
+                                                string | number | boolean
+                                            >;
+                                        },
+                                        indexReport: number
+                                    ) => {
+                                        if (
+                                            report.reportType === 'heading' &&
+                                            report.headingKey
+                                        ) {
+                                            return (
+                                                <h5
+                                                    key={indexReport}
+                                                    className="text-gray-800 font-semibold mb-4"
+                                                >
+                                                    {t(
+                                                        report.headingKey,
+                                                        report.params
+                                                    )}
+                                                </h5>
+                                            );
+                                        }
+                                        if (
+                                            report.reportType === 'text' &&
+                                            report.textKey
+                                        ) {
+                                            return (
+                                                <Markdown
+                                                    key={index}
+                                                    className="-mt-2 text-gray-800 markdown"
+                                                >
+                                                    {t(
+                                                        report.textKey,
+                                                        report.params
+                                                    )}
+                                                </Markdown>
+                                            );
+                                        }
+                                        if (
+                                            report.reportType === 'univariate'
+                                        ) {
                                             return (
                                                 <div
                                                     key={indexReport}
@@ -250,7 +297,9 @@ export default function ComponentMapper({
                                                 </div>
                                             );
                                         }
-                                        if (report === 'distribution') {
+                                        if (
+                                            report.reportType === 'distribution'
+                                        ) {
                                             return (
                                                 <Fragment key={indexReport}>
                                                     {realData.length === 0 ||
@@ -325,7 +374,9 @@ export default function ComponentMapper({
                                             );
                                         }
 
-                                        if (report === 'correlation') {
+                                        if (
+                                            report.reportType === 'correlation'
+                                        ) {
                                             const {
                                                 columns: realColumns,
                                                 data: convertedData,
diff --git a/src/locales/en.json b/src/locales/en.json
@@ -16,7 +16,7 @@
     "downloadButton": "Download",
     "biasSettings": {
         "exportToPDF": "Download bias detection report as pdf",
-        "exportToJSON": "Export clusters as json", 
+        "exportToJSON": "Export clusters as json",
         "form": {
             "fieldsets": {
                 "data": {
@@ -59,7 +59,7 @@
     "syntheticData": {
         "demo": {
             "heading": "Information about demo dataset",
-            "description": "A subset of the [Law School Admission Bar](https://www.kaggle.com/datasets/danofer/law-school-admissions-bar-passage)* dataset is used as a demo. Synthetic data will be generated for the following columns:\n  \n&nbsp;&nbsp;\n- sex: student gender, i.e. 1 (male), 2 (female);\n- race1: race of student, i.e., asian, black, hispanic, white, other;\n- ugpa: undergraduate GPA of student (average course grades), continous variable;\n- bar: Ground truth label indicating whether or not the student passed the bar, i.e., passed 1st time, passed 2nd time, failed, non-graduated.\n  \n&nbsp;&nbsp;\n\nThe CART method will be used  evaluate the distribution and correlation differences between the real and synthetic data.\n  \n&nbsp;&nbsp;\n\n*The original paper can be found [here](https://files.eric.ed.gov/fulltext/ED469370.pdf)\n \n&nbsp;&nbsp;\n 0. Preview of the dataset"
+            "description": "A subset of the [Law School Admission Bar](https://www.kaggle.com/datasets/danofer/law-school-admissions-bar-passage)* dataset is used as a demo. Synthetic data will be generated for the following columns:\n  \n&nbsp;&nbsp;\n- sex: student gender, i.e. 1 (male), 2 (female);\n- race1: race of student, i.e., asian, black, hispanic, white, other;\n- ugpa: undergraduate GPA of student (average course grades), continous variable;\n- bar: Ground truth label indicating whether or not the student passed the bar, i.e., passed 1st time, passed 2nd time, failed, non-graduated.\n  \n&nbsp;&nbsp;\n\nThe CART method will be used  evaluate the distribution and correlation differences between the real and synthetic data.\n  \n&nbsp;&nbsp;\n\n*The original paper can be found [here](https://files.eric.ed.gov/fulltext/ED469370.pdf)\n \n&nbsp;&nbsp;\n"
         },
         "exportToPDF": "Download evaluation report as pdf",
         "exportToJSON": "Download synthetic data as json",
@@ -89,11 +89,21 @@
             "title": "Try it out!",
             "description": "Do you not have a dataset at hand? No worries use our demo dataset."
         },
-        "columnsInDataset": "1. Data types detection",
         "columnsInDatasetInfo": "If detected data types are incorrect, please change this locally in the dataset before attaching it again.",
         "univariateCharts": "Univariate distributions of the attached dataset",
         "synthVsReal": "Univariate distributions of synthetic vs real data",
-        "heatmapCorrelation": "Correlation matrix"
+        "heatmapCorrelation": "Correlation matrix",
+        "dataSetPreview": {
+            "heading": "0. Preview of data"
+        },
+        "columnsInDataset": "1. Data types detection",
+        "explanatoryDataAnalysisTitle": "2. Explanatory data analysis",
+        "cartModelTitle": "3. CART model",
+        "cartModelDescription": "The CART (Classification and Regression Trees) method generates synthetic data by learning patterns from real data through a decision tree that splits data into homogeneous groups based on feature values. It predicts averages for numerical data and assigns the most common category for categorical data, using these predictions to create new synthetic points.",
+        "evaluationOfGeneratedDataTitle": "4. Evaluation of generated data",
+        "outputDataTitle": "5. Output data",
+        "diagnosticsTitle": "Diagnostic Results:",
+        "correlationDifference": "Correlation difference: {{correlationDifference}}"
     },
 
     "biasAnalysis": {
diff --git a/src/locales/nl.json b/src/locales/nl.json
@@ -89,11 +89,21 @@
             "title": "Probeer het uit!",
             "description": "Heeft u geen dataset bij de hand? Geen zorgen, gebruik onze demodataset."
         },
-        "columnsInDataset": "1. Detectie van datatypes",
         "columnsInDatasetInfo": "Als de gedetecteerd data types niet correct zijn, pas dit dan lokaal aan in de dataset voordat u deze opnieuw aan de app koppelt.",
         "univariateCharts": "Univariate distributies van de gekoppelde dataset",
         "synthVsReal": "Univariate distributies in synthetische vs originele data",
-        "heatmapCorrelation": "Correlatiematrix"
+        "heatmapCorrelation": "Correlatiematrix",
+        "dataSetPreview": {
+            "heading": "0. Preview van de data"
+        },
+        "columnsInDataset": "1. Detectie van datatypes",
+        "explanatoryDataAnalysisTitle": "2. Explanatory data analyse",
+        "cartModelTitle": "3. CART model",
+        "cartModelDescription": "De CART-methode (Classification and Regression Trees) genereert synthetische data door patronen uit echte data te leren via een beslisboom die de data opdeelt in homogene groepen op basis van kenmerkwaarden. Voor numerieke data voorspelt de methode gemiddelden, en voor categorische data wijst het de meest voorkomende categorie toe. Deze voorspellingen worden vervolgens gebruikt om nieuwe synthetische gegevenspunten te creëren.",
+        "evaluationOfGeneratedDataTitle": "4. Evaluatie van gegenereerde data",
+        "outputDataTitle": "5. Output data",
+        "diagnosticsTitle": "Diagnostische Resultaten:",
+        "correlationDifference": "Correlatie verschil: {{correlationDifference}}"
     },
     "biasAnalysis": {
         "demo": {