Merge pull request #65 from NGO-Algorithm-Audit/feature/text-tweaks

devhelpr · web-flow · commit 13d990d10782 · 2025-03-25T16:05:34.000+01:00
Feature/text tweaks
diff --git a/src/assets/synthetic-data.tsx b/src/assets/synthetic-data.tsx
@@ -349,19 +349,25 @@ def run():
                 'reportType': 'heading',
                 'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
             },
+            {
+                'reportType': 'heading2',
+                'headingKey': 'syntheticData.distributionsTitle'
+            },
             {'reportType': 'univariateDistributionSyntheticData'},
+            {'reportType': 'bivariateDistributionSyntheticData'},
             {
-                'reportType': 'heading',
+                'reportType': 'heading2',
                 'headingKey': 'syntheticData.diagnosticsReportTitle'
             },
             {            
                 'reportType': 'table',
                 'titleKey': 'syntheticData.diagnosticsTitle',
                 'showIndex' : False,    
-                'data': report_df.to_json(orient="records"),                            
-                'postContent': [{
-                    'contentType' : 'correlationSyntheticData'
-                }]
+                'data': report_df.to_json(orient="records"),                                           
+            },
+            {            
+                'reportType': 'correlationSyntheticData',
+                'titleKey': 'syntheticData.correlationMatrixTitle',                
             },
             {
                 'reportType': 'table',
@@ -374,13 +380,7 @@ def run():
                 'titleKey': 'syntheticData.disclosureProtectionTitle',
                 'showIndex' : False,
                 'data': dp_report_df.to_json(orient="records"),                
-            },
-            {
-                'reportType': 'heading',
-                'headingKey': 'syntheticData.bivariateDistributionSyntheticDataTitle'
-            },
-            {'reportType': 'bivariateDistributionSyntheticData'},
-            
+            }                       
         ]
     }))
 
diff --git a/src/components/DistributionReport.tsx b/src/components/DistributionReport.tsx
@@ -108,6 +108,16 @@ export const DistributionReport = (
                             </h5>
                         );
                     }
+                    if (report.reportType === 'heading2' && report.headingKey) {
+                        return (
+                            <h6
+                                key={indexReport}
+                                className="text-gray-800 font-semibold mb-4"
+                            >
+                                {t(report.headingKey, report.params)}
+                            </h6>
+                        );
+                    }
                     if (report.reportType === 'text' && report.textKey) {
                         return (
                             <MarkdownWithTooltips
@@ -239,6 +249,104 @@ export const DistributionReport = (
                             </div>
                         );
                     }
+
+                    if (report.reportType === 'correlationSyntheticData') {
+                        if (!report.titleKey) {
+                            return null;
+                        }
+                        const preContent: additionalContent = report.preContent
+                            ? JSON.parse(report.preContent)
+                            : [];
+                        const postContent: additionalContent =
+                            (report.postContent as unknown as additionalContent) ??
+                            [];
+
+                        return (
+                            <div key={indexReport} className="mb-4">
+                                <Accordion
+                                    title={t(report.titleKey)}
+                                    content={
+                                        <div className="pt-[20px];">
+                                            <p>&nbsp;</p>
+                                            {preContent.map(
+                                                (content, index) => {
+                                                    if (
+                                                        content.contentType ===
+                                                        'text'
+                                                    ) {
+                                                        return (
+                                                            <MarkdownWithTooltips
+                                                                key={index}
+                                                                className="-mt-2 text-gray-800 markdown"
+                                                            >
+                                                                {t(
+                                                                    content.textKey ??
+                                                                        '',
+                                                                    content.params
+                                                                )}
+                                                            </MarkdownWithTooltips>
+                                                        );
+                                                    }
+                                                }
+                                            )}
+
+                                            <div
+                                                key={`index`}
+                                                className="grid lg:grid-cols-[50%_50%] grid-cols-[100%]"
+                                            >
+                                                <div className="col-[1] lg:col-[1]">
+                                                    <CorrelationMatrix
+                                                        title={t(
+                                                            'heatmap.realdata'
+                                                        )}
+                                                        heatmapData={createHeatmapdata(
+                                                            distributionReportProps.realCorrelations
+                                                        )}
+                                                        showLegend={false}
+                                                    />
+                                                </div>
+                                                <div className="col-[1] lg:col-[2]">
+                                                    <CorrelationMatrix
+                                                        title={t(
+                                                            'heatmap.synthData'
+                                                        )}
+                                                        heatmapData={createHeatmapdata(
+                                                            distributionReportProps.synthDataCorrelations
+                                                        )}
+                                                        showLegend={true}
+                                                    />
+                                                </div>
+                                            </div>
+
+                                            {report.postContent &&
+                                                postContent.map(
+                                                    (content, index) => {
+                                                        if (
+                                                            content.contentType ===
+                                                            'text'
+                                                        ) {
+                                                            return (
+                                                                <MarkdownWithTooltips
+                                                                    key={index}
+                                                                    className="-mt-2 text-gray-800 markdown"
+                                                                >
+                                                                    {t(
+                                                                        content.textKey ??
+                                                                            '',
+                                                                        content.params
+                                                                    )}
+                                                                </MarkdownWithTooltips>
+                                                            );
+                                                        }
+                                                    }
+                                                )}
+                                        </div>
+                                    }
+                                />
+                            </div>
+                        );
+                    }
+
                     if (
                         report.reportType === 'univariateDistributionRealData'
                     ) {
diff --git a/src/components/TooltipWrapper.tsx b/src/components/TooltipWrapper.tsx
@@ -22,7 +22,7 @@ export function TooltipWrapper({
     return (
         <span>
             {textBefore}
-            <TooltipProvider>
+            <TooltipProvider delayDuration={0}>
                 <Tooltip>
                     <TooltipTrigger asChild>
                         <span className="border-b-2 border-dashed border-gray-600 cursor-help">
diff --git a/src/locales/en.ts b/src/locales/en.ts
@@ -111,7 +111,7 @@ export const en = {
                         'By default, the CART method is used to generate synthetic data. CART generally produces higher quality synthetic data, but might not work well on datasets with categorical variables with 20+ categories. Use Gaussian Copula in those cases.',
                 },
                 nanTreatment: {
-                    title: 'NaN Values Treatment',
+                    title: 'NaN values treatment',
                     drop: 'Drop rows with NaN values',
                     impute: 'Impute NaN values',
                     tooltip:
@@ -151,13 +151,14 @@ export const en = {
         cartModelDescription:
             'The CART (Classification and Regression Trees) method generates synthetic data by learning patterns from real data through a decision tree that splits data into homogeneous groups based on feature values. It predicts averages for numerical data and assigns the most common category for categorical data, using these predictions to create new synthetic points.',
         evaluationOfGeneratedDataTitle: '4. Evaluation of generated data',
-        diagnosticsReportTitle: '5. Diagnostic Report',
+        distributionsTitle: '4.1 Distributions',
+        diagnosticsReportTitle: '4.2. Diagnostic Report',
         diagnosticsTitle: 'Diagnostic Results',
+        correlationMatrixTitle: 'Correlation matrix',
         efficacyMetricsTitle: 'Efficacy metrics',
-        disclosureProtectionTitle: 'Disclosure protection',
-        bivariateDistributionSyntheticDataTitle: '6. Bivariate distributions',
-        outputDataTitle: '7. Generated synthetic data',
-        moreInfoTitle: '8. More information',
+        disclosureProtectionTitle: 'Privacy metrics',
+        outputDataTitle: '5. Generated synthetic data',
+        moreInfoTitle: '6. More information',
         correlationDifference:
             'Correlation difference: {{correlationDifference}}',
         univariateText:
diff --git a/src/locales/nl.ts b/src/locales/nl.ts
@@ -156,13 +156,14 @@ export const nl = {
         cartModelDescription:
             'De CART-methode (Classification and Regression Trees) genereert synthetische data door patronen uit echte data te leren via een beslisboom die de data opdeelt in homogene groepen op basis van kenmerkwaarden. Voor numerieke data voorspelt de methode gemiddelden, en voor categorische data wijst het de meest voorkomende categorie toe. Deze voorspellingen worden vervolgens gebruikt om nieuwe synthetische gegevenspunten te creëren.',
         evaluationOfGeneratedDataTitle: '4. Evaluatie van gegenereerde data',
-        diagnosticsReportTitle: '6. Diagnostisch rapport',
+        distributionsTitle: '4.1 Distributie',
+        diagnosticsReportTitle: '4.2. Diagnostisch rapport',
         diagnosticsTitle: 'Diagnostische Resultaten',
+        correlationMatrixTitle: 'Correlatie matrix',
         efficacyMetricsTitle: 'Doeltreffendheid metrieken',
-        disclosureProtectionTitle: 'Disclosure protection',
-        bivariateDistributionSyntheticDataTitle: '6. Bivariate distributies',
-        outputDataTitle: '7. Output data',
-        moreInfoTitle: '8. Meer informatie',
+        disclosureProtectionTitle: 'Privacy metrieken',
+        outputDataTitle: '5. Output data',
+        moreInfoTitle: '6. Meer informatie',
         correlationDifference: 'Correlatie verschil: {{correlationDifference}}',
         moreInfo:
             '&nbsp;&nbsp;\n  \n  \n  \nWil je meer weten over synthetische data?\n  \n  \n  \n- [python-synthpop op Github](https://github.com/NGO-Algorithm-Audit/python-synthpop)\n- [local-first web app op Github](https://github.com/NGO-Algorithm-Audit/local-first-web-tool/tree/main)\n- [Synthetische Data: wat, waarom en hoe?](https://royalsociety.org/-/media/policy/projects/privacy-enhancing-technologies/Synthetic_Data_Survey-24.pdf)\n- [Kennis Netwerk Synthetische Data](https://online.rijksinnovatiecommunity.nl/groups/399-kennisnetwerk-synthetischedata/welcome) (for Dutch public organizations)\n- [Synthetische data portaal van DUO](https://duo.nl/open_onderwijsdata/footer/synthetische-data.jsp)\n- [CART: synthpop resources](https://synthpop.org.uk/resources.html)\n- [Gaussian Copula - Synthetic Data Vault](https://docs.sdv.dev/sdv)',