Merge pull request #93 from NGO-Algorithm-Audit/feature/sdg-tweaks-9jun

devhelpr · web-flow · commit 68d3cf52d24e · 2025-06-09T09:27:06.000+02:00
Feature/sdg tweaks 9jun
diff --git a/src/assets/synthetic-data.tsx b/src/assets/synthetic-data.tsx
@@ -264,11 +264,13 @@ def run():
                 'reportType': 'table',
                 'titleKey': 'syntheticData.handlingMissingDataTableTitle',
                 'showIndex' : False,    
-                'data': missingness_dict_df.to_json(orient="records"),                                            
+                'data': missingness_dict_df.to_json(orient="records"),
+                'noTableBelowTable': True                                            
             },
             {
                 'reportType': 'text',
-                'textKey': 'syntheticData.missingData'
+                'textKey': 'syntheticData.missingData',
+                'noHTML': True
             },
             {
                 'reportType': 'heading',
@@ -341,12 +343,20 @@ def run():
         'headingKey': 'syntheticData.outputDataTitle'        
     }))
 
+    setResult(json.dumps({
+        'type': 'text', 
+        'key': 'syntheticData.outputDataDescription'        
+    }))
+
     setResult(json.dumps({
         'type': 'table', 
         'showIndex': True,
         'data': synthetic_data.head().to_json(orient="records")
     }))
 
+    setResult(json.dumps({
+        'type': 'export-button',
+    }))
 
     setResult(json.dumps({
         'type': 'heading',
diff --git a/src/components/DistributionReport.tsx b/src/components/DistributionReport.tsx
@@ -73,6 +73,8 @@ interface DistributionReport {
     showIndex?: boolean;
     preContent?: string;
     postContent?: string;
+    noTableBelowTable?: boolean;
+    noHTML?: boolean;
 }
 export interface DistributionReportProps {
     dataTypes: string;
@@ -124,6 +126,7 @@ export const DistributionReport = (
                             <MarkdownWithTooltips
                                 key={indexReport}
                                 className="-mt-2 text-gray-800 markdown"
+                                noHTML={report.noHTML ?? false}
                             >
                                 {t(report.textKey, report.params)}
                             </MarkdownWithTooltips>
@@ -179,6 +182,9 @@ export const DistributionReport = (
                                                 showIndex={
                                                     report.showIndex ?? false
                                                 }
+                                                noTableBelowTable={
+                                                    report.noTableBelowTable
+                                                }
                                             />
                                             {report.postContent &&
                                                 postContent.map(
diff --git a/src/components/MarkdownWithTooltips.tsx b/src/components/MarkdownWithTooltips.tsx
@@ -9,6 +9,7 @@ import rehypeRaw from 'rehype-raw';
 interface MarkdownWithTooltipsProps {
     children: string;
     className?: string;
+    noHTML?: boolean;
 }
 
 interface CustomElementData extends ElementData {
@@ -24,12 +25,16 @@ interface CustomElement extends Element {
 export function MarkdownWithTooltips({
     children,
     className,
+    noHTML,
 }: MarkdownWithTooltipsProps) {
+    const rehypePlugins = noHTML
+        ? [rehypeInfoTooltip]
+        : [rehypeRaw, rehypeInfoTooltip];
     return (
         <Markdown
             className={className}
             remarkPlugins={[remarkInfoTooltip, remarkGfm]}
-            rehypePlugins={[rehypeRaw, rehypeInfoTooltip]}
+            rehypePlugins={rehypePlugins}
             components={{
                 // @ts-expect-error - math is a custom components
                 TooltipWrapper,
diff --git a/src/components/SimpleTable.tsx b/src/components/SimpleTable.tsx
@@ -14,11 +14,13 @@ export default function SimpleTable({
     data,
     showIndex,
     translate,
+    noTableBelowTable,
 }: {
     showIndex: boolean;
     title?: string;
     data: Record<string, string | number>[];
     translate?: boolean;
+    noTableBelowTable?: boolean;
 }) {
     const { t } = useTranslation();
     // limit data to the first 100 rows.
@@ -27,7 +29,9 @@ export default function SimpleTable({
     return (
         <div className={`bg-white border border-gray-200 ${title && 'mb-4'}`}>
             <Table className={`text-xs ${title && 'mb-4'}`}>
-                {title && <TableCaption>{t(title)}</TableCaption>}
+                {!noTableBelowTable && title && (
+                    <TableCaption>{t(title)}</TableCaption>
+                )}
                 <TableHeader>
                     <TableRow className="bg-aaLight">
                         {showIndex && <TableHead></TableHead>}
diff --git a/src/components/SyntheticDataSettings.tsx b/src/components/SyntheticDataSettings.tsx
@@ -20,6 +20,7 @@ import {
     TooltipContent,
 } from './ui/touch-tooltip';
 import Markdown from 'react-markdown';
+import { IconInfoTooltip } from './ui/info-icon-tooltip';
 
 const createFormSchema = (t: (key: string) => string) =>
     z.object({
@@ -133,7 +134,16 @@ export default function SyntheticDataSettings({
                         <legend className="-ml-1 px-1 text-sm font-medium">
                             {t('syntheticData.form.fieldset.sourceDataset')}
                         </legend>
-                        <div className="grid gap-3">
+                        <div className="relative grid gap-3 select-none">
+                            <div className="flex flex-row items-center gap-1 absolute -top-[10px] leading-0 left-4 px-1 bg-white text-sm font-medium">
+                                {t('syntheticData.form.fieldset.dataSet')}
+
+                                <IconInfoTooltip
+                                    tooltipText={t(
+                                        'syntheticData.form.fieldset.dataSetTooltip'
+                                    )}
+                                />
+                            </div>
                             <FormField
                                 control={form.control}
                                 name="file"
diff --git a/src/locales/en.ts b/src/locales/en.ts
@@ -132,6 +132,11 @@ export const en = {
             },
             fieldset: {
                 sourceDataset: 'Input',
+                dataSet: 'Dataset',
+                dataSetTooltip: `Preprocess your data such that: 
+                    - missing values are removed or replaced;
+                    - all columns (except your outcome label column) should have the same datatypes, e.g., numerical or categorical;
+                    - the outcome label column is numerical`,
                 sdgMethod: {
                     title: 'Method',
                     cart: 'CART',
@@ -231,17 +236,17 @@ For classification (when the target is categorical):
         disclosureProtectionTitle: 'Privacy metrics',
         disclosureProtectionDescription: `The disclosure protection metric measures the proportion of synthetic data points that closely resemble real data points (within a predefined threshold), posing a risk of traceability to personal data. A low 'risk\_rate' and a high 'disclosure\_protection\_rate' indicate effective protection against the unintentional exposure of personal data.`,
         outputDataTitle: '5. Download synthetic data and evaluation report',
+        outputDataDescription: 'Preview of generated synthetic data',
         moreInfoTitle: '6. More information',
         meanSquaredError:
-                    'Average squared difference between predicted and actual values, quantifying the accuracy of a model’s predictions by penalizing larger errors more heavily',
+            'Average squared difference between predicted and actual values, quantifying the accuracy of a model’s predictions by penalizing larger errors more heavily',
         meanAbsoluteError:
-                    'Average magnitude of the errors between predicted and actual values, providing a straightforward assessment of model accuracy without emphasizing large errors',
-        R2:
-                    'Quantifies how well a model’s predictions match the actual data by measuring the proportion of variance in the target variable explained by the model',
+            'Average magnitude of the errors between predicted and actual values, providing a straightforward assessment of model accuracy without emphasizing large errors',
+        R2: 'Quantifies how well a model’s predictions match the actual data by measuring the proportion of variance in the target variable explained by the model',
         accuracyScore:
-                    'Measures the proportion of correctly predicted instances out of the total instances, providing an overall assessment of a model’s performance in classification tasks',
+            'Measures the proportion of correctly predicted instances out of the total instances, providing an overall assessment of a model’s performance in classification tasks',
         weightedF1Score:
-                    'Harmonic mean of precision and recall, calculated for each class and weighted by the class’s support (number of true instances), providing a balanced performance measure for imbalanced datasets',
+            'Harmonic mean of precision and recall, calculated for each class and weighted by the class’s support (number of true instances), providing a balanced performance measure for imbalanced datasets',
         correlationDifference:
             'Correlation difference: {{correlationDifference}}',
         univariateText:
diff --git a/src/locales/nl.ts b/src/locales/nl.ts
@@ -132,6 +132,11 @@ export const nl = {
             },
             fieldset: {
                 sourceDataset: 'Input',
+                dataSet: 'Dataset',
+                dataSetTooltip: `Bereid je data voor zodat: 
+                    - missende waarden zijn verwijderd of vervangen;
+                    - alle kolommen (behalve de uitkomstlabel-kolom) dezelfde datatypes hebben, numeriek of categorisch;
+                    - de uitkomstlabel-kolom numeriek is`,
                 sdgMethod: {
                     title: 'Methode',
                     cart: 'CART',
@@ -190,17 +195,17 @@ export const nl = {
         disclosureProtectionTitle: 'Privacy metrieken',
         disclosureProtectionDescription: `De onthullings beschermings metriek meet het aandeel synthetische datapunten die te veel lijkt op echte datapunten (binnen een vooraf gedefinieerde drempelwaarde), wat een risico op herleidbaarheid naar persoonsgegevens vormt. Een lage 'risk_rate' en hoge 'disclosure_protection_rate' duidt op een goede bescherming tegen het onbedoeld prijsgeven van persoonsgegevens.`,
         outputDataTitle: '5. Download synthetische data en evaluatierapport',
+        outputDataDescription: 'Preview van gegenereerde synthetische data',
         moreInfoTitle: '6. Meer informatie',
         meanSquaredError:
-                    'Gemiddeld kwadraatverschil tussen voorspelde en werkelijke waarden, dat de nauwkeurigheid van de voorspellingen van een model kwantificeert door grotere fouten zwaarder te bestraffen',
+            'Gemiddeld kwadraatverschil tussen voorspelde en werkelijke waarden, dat de nauwkeurigheid van de voorspellingen van een model kwantificeert door grotere fouten zwaarder te bestraffen',
         meanAbsoluteError:
-                    'Gemiddelde grootte van de fouten tussen voorspelde en werkelijke waarden, die een eenvoudige beoordeling van de nauwkeurigheid van het model biedt zonder de nadruk te leggen op grote fouten',
-        R2:
-                    'Kwantificeert hoe goed de voorspellingen van een model overeenkomen met de werkelijke gegevens door het aandeel van de variantie in de doelvariabele te meten dat door het model wordt verklaard',
+            'Gemiddelde grootte van de fouten tussen voorspelde en werkelijke waarden, die een eenvoudige beoordeling van de nauwkeurigheid van het model biedt zonder de nadruk te leggen op grote fouten',
+        R2: 'Kwantificeert hoe goed de voorspellingen van een model overeenkomen met de werkelijke gegevens door het aandeel van de variantie in de doelvariabele te meten dat door het model wordt verklaard',
         accuracyScore:
-                    'Meet het aandeel correct voorspelde gevallen ten opzichte van het totaal, en geeft zo een algemene beoordeling van de prestaties van het classificatiemodel',
+            'Meet het aandeel correct voorspelde gevallen ten opzichte van het totaal, en geeft zo een algemene beoordeling van de prestaties van het classificatiemodel',
         weightedF1Score:
-                    'Het harmonisch gemiddelde van precisie en recall, berekend per klasse en gewogen naar het aantal echte gevallen per klasse, wat een metriek biedt voor datasets met ongelijke klassenverdeling',
+            'Het harmonisch gemiddelde van precisie en recall, berekend per klasse en gewogen naar het aantal echte gevallen per klasse, wat een metriek biedt voor datasets met ongelijke klassenverdeling',
         correlationDifference: 'Correlatie verschil: {{correlationDifference}}',
         moreInfo:
             '&nbsp;&nbsp;\n  \n  \n  \nWil je meer weten over synthetische data?\n  \n  \n  \n- [python-synthpop op Github](https://github.com/NGO-Algorithm-Audit/python-synthpop)\n- [local-first web app op Github](https://github.com/NGO-Algorithm-Audit/local-first-web-tool/tree/main)\n- [Synthetische Data: wat, waarom en hoe?](https://royalsociety.org/-/media/policy/projects/privacy-enhancing-technologies/Synthetic_Data_Survey-24.pdf)\n- [Kennis Netwerk Synthetische Data](https://online.rijksinnovatiecommunity.nl/groups/399-kennisnetwerk-synthetischedata/welcome) (Nederlandse organisaties)\n- [Synthetische data portaal van DUO](https://duo.nl/open_onderwijsdata/footer/synthetische-data.jsp)\n- [CART: synthpop resources](https://synthpop.org.uk/resources.html)\n- [Gaussian Copula - Synthetic Data Vault](https://docs.sdv.dev/sdv)',
diff --git a/src/routes/SyntheticData.tsx b/src/routes/SyntheticData.tsx
@@ -161,18 +161,22 @@ export default function SyntheticDataGeneration() {
                     loading && 'overflow-hidden'
                 )}
             >
-                {initialised && data.data.length > 0 && result.length > 0 && (
-                    <ExportButton
-                        buttonAlign="right"
-                        clusterInfo={clusterInfo}
+                {result.length > 0 ? (
+                    <ComponentMapper
+                        items={result}
                         data={data}
-                        handleExport={handleExport}
-                        reactToPrintFn={reactToPrintFn}
+                        exportButton={
+                            <div className="flex flex-row gap-2 hideonprint justify-start">
+                                <ExportButton
+                                    buttonAlign={'left'}
+                                    clusterInfo={clusterInfo}
+                                    reactToPrintFn={reactToPrintFn}
+                                    data={data}
+                                    handleExport={handleExport}
+                                />
+                            </div>
+                        }
                     />
-                )}
-
-                {result.length > 0 ? (
-                    <ComponentMapper items={result} data={data} />
                 ) : loading ? (
                     <LoadingState
                         loadingMessageKey={loadingMessage}
@@ -189,16 +193,6 @@ export default function SyntheticDataGeneration() {
                         <div className="flex-1" />
                     </>
                 )}
-
-                {initialised && data.data.length > 0 && result.length > 0 && (
-                    <ExportButton
-                        buttonAlign="center"
-                        clusterInfo={clusterInfo}
-                        data={data}
-                        handleExport={handleExport}
-                        reactToPrintFn={reactToPrintFn}
-                    />
-                )}
             </div>
         </main>
     );