Skip to content

Commit fa44e70

Browse files
authored
Merge pull request #55 from NGO-Algorithm-Audit/feature/ui-texts-and-data-and-info-boxes
Feature/UI texts and data and info boxes
2 parents 3ffee8a + 03a7604 commit fa44e70

File tree

6 files changed

+71
-36
lines changed

6 files changed

+71
-36
lines changed

src/assets/synthetic-data.tsx

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ def run():
161161
missingness_dict = md_handler.detect_missingness(real_data)
162162
print("Detected Missingness Type:", missingness_dict)
163163
164+
missingness_dict_df = pd.DataFrame(missingness_dict, index=[0])
165+
164166
df_imputed = md_handler.apply_imputation(real_data, missingness_dict)
165167
166168
@@ -250,7 +252,7 @@ def run():
250252
setOutputData("syntheticData", synthetic_data.to_json(orient='records'))
251253
252254
report = MetricsReport(df_imputed, synthetic_data, metadata)
253-
report_df = report.generate_report()
255+
report_df = report.generate_report()
254256
print('report_df:', report_df)
255257
256258
# combine empty synthetic data with original data and with encoded data
@@ -268,21 +270,17 @@ def run():
268270
print("=== Regression Efficacy Metrics ===", column)
269271
print(reg_metrics)
270272
271-
# reg_efficacy = EfficacyMetrics(task='regression', target_column="ugpa")
272-
# reg_metrics = reg_efficacy.evaluate(df_imputed, synthetic_data)
273-
# print("=== Regression Efficacy Metrics === UGPA")
274-
# print(reg_metrics)
275-
276273
clf_efficacy = EfficacyMetrics(task='classification', target_column="bar")
277274
clf_metrics = clf_efficacy.evaluate(df_imputed, synthetic_data)
278275
print("=== Classification Efficacy Metrics === BAR")
279276
print(clf_metrics)
280277
281-
282278
dp = DisclosureProtection(df_imputed, synthetic_data)
283279
dp_score = dp.score()
284280
dp_report = dp.report()
285281
282+
dp_report_df = pd.DataFrame(dp_report, index=[0])
283+
286284
print("=== Disclosure Protection ===")
287285
print(f"Score: {dp_score:.3f}")
288286
print("Detailed Report:", dp_report)
@@ -296,12 +294,26 @@ def run():
296294
'combined_data' : combined_data.to_json(orient="records"),
297295
'realCorrelations': df_encoded.corr().to_json(orient="records"),
298296
'synthDataCorrelations': synth_df_encoded.corr().to_json(orient="records"),
299-
'reports' : [
297+
'reports' : [
300298
{
301299
'reportType': 'heading',
302-
'headingKey': 'syntheticData.cartModelTitle' if sdgMethod == 'cart' else 'syntheticData.gaussianCopulaModelTitle'
300+
'headingKey': 'syntheticData.handlingMissingDataTitle'
301+
},
302+
{
303+
'reportType': 'text',
304+
'textKey': 'syntheticData.handlingMissingDataDescription'
303305
},
304-
{
306+
{
307+
'reportType': 'table',
308+
'titleKey': 'syntheticData.handlingMissingDataTableTitle',
309+
'showIndex' : False,
310+
'data': missingness_dict_df.to_json(orient="records"),
311+
},
312+
{
313+
'reportType': 'heading',
314+
'headingKey': 'syntheticData.cartModelTitle' if sdgMethod == 'cart' else 'syntheticData.gaussianCopulaModelTitle'
315+
},
316+
{
305317
'reportType': 'text',
306318
'textKey': 'syntheticData.cartModelDescription' if sdgMethod == 'cart' else 'syntheticData.gaussianCopulaModelDescription'
307319
},
@@ -310,6 +322,10 @@ def run():
310322
'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
311323
},
312324
{'reportType': 'univariateDistributionSyntheticData'},
325+
{
326+
'reportType': 'heading',
327+
'headingKey': 'syntheticData.diagnosticsReportTitle'
328+
},
313329
{
314330
'reportType': 'table',
315331
'titleKey': 'syntheticData.diagnosticsTitle',
@@ -319,7 +335,18 @@ def run():
319335
'contentType' : 'correlationSyntheticData'
320336
}]
321337
},
322-
{'reportType': 'bivariateDistributionSyntheticData'}
338+
{
339+
'reportType': 'table',
340+
'titleKey': 'syntheticData.disclosureProtectionTitle',
341+
'showIndex' : False,
342+
'data': dp_report_df.to_json(orient="records"),
343+
},
344+
{
345+
'reportType': 'heading',
346+
'headingKey': 'syntheticData.bivariateDistributionSyntheticDataTitle'
347+
},
348+
{'reportType': 'bivariateDistributionSyntheticData'},
349+
323350
]
324351
}))
325352
@@ -334,6 +361,12 @@ def run():
334361
'data': synthetic_data.head().to_json(orient="records")
335362
}))
336363
364+
365+
setResult(json.dumps({
366+
'type': 'heading',
367+
'headingKey': 'syntheticData.moreInfoTitle'
368+
}))
369+
337370
setResult(json.dumps({
338371
'type': 'text',
339372
'key': 'syntheticData.moreInfo'

src/components/SyntheticDataSettings.tsx

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ export default function SyntheticDataSettings({
119119
samples: outputSamples[0],
120120
});
121121
};
122-
123122
return (
124123
<Form {...form}>
125124
<div className="h-auto md:h-full flex flex-col justify-between">
@@ -237,19 +236,6 @@ export default function SyntheticDataSettings({
237236
</div>
238237

239238
<div className="flex flex-row ml-auto gap-2 hideonprint">
240-
<Button
241-
onClick={event => {
242-
event.preventDefault();
243-
onDemoRun();
244-
return false;
245-
}}
246-
size="sm"
247-
variant={'outline'}
248-
className="gap-1.5 xl:hidden"
249-
disabled={isLoading}
250-
>
251-
{t('syntheticData.form.actions.tryItOut')}
252-
</Button>
253239
<Button
254240
type="submit"
255241
size="sm"

src/components/pyodide/use-python.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ export const usePython = <T, TExport>(emptyParams: T) => {
4040
setLoading(false);
4141
} else if (event.data.type === 'data-set') {
4242
setError(undefined);
43-
setLoading(false);
4443
} else {
4544
setError('Unknown message type');
4645
setLoading(false);
@@ -52,10 +51,13 @@ export const usePython = <T, TExport>(emptyParams: T) => {
5251

5352
const runPython = useCallback(
5453
(message: { type: 'start'; params: { parameters: T } }) => {
54+
setLoading(true);
55+
setLoadingMessage('running analysis');
56+
5557
setClusterInfo(undefined);
5658
setResult([]);
5759
setError(undefined);
58-
setLoading(true);
60+
5961
workerRef.current?.postMessage(message), [];
6062
},
6163
[]

src/locales/en.json

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,20 @@
125125
"heading": "0. Preview of real data"
126126
},
127127
"columnsInDataset": "1. Data types detection",
128+
"handlingMissingDataTitle": "2. Handling missing data",
129+
"handlingMissingDataDescription": "Handling missing data description",
130+
"handlingMissingDataTableTitle": "Columns with missing data",
128131
"_explanatoryDataAnalysisTitle": "2. Explanatory data analysis",
129-
"cartModelTitle": "2. Method: CART model",
130-
"gaussianCopulaModelTitle": "2. Method: Gaussian Copula model",
132+
"cartModelTitle": "3. Method: CART model",
133+
"gaussianCopulaModelTitle": "3. Method: Gaussian Copula model",
131134
"cartModelDescription": "The CART (Classification and Regression Trees) method generates synthetic data by learning patterns from real data through a decision tree that splits data into homogeneous groups based on feature values. It predicts averages for numerical data and assigns the most common category for categorical data, using these predictions to create new synthetic points.",
132-
"evaluationOfGeneratedDataTitle": "3. Evaluation of generated data",
133-
"outputDataTitle": "4. Generated synthetic data",
135+
"evaluationOfGeneratedDataTitle": "4. Evaluation of generated data",
136+
"diagnosticsReportTitle": "5. Diagnostic Report",
134137
"diagnosticsTitle": "Diagnostic Results",
138+
"disclosureProtectionTitle": "Disclosure protection",
139+
"bivariateDistributionSyntheticDataTitle": "6. Bivariate distributions",
140+
"outputDataTitle": "7. Generated synthetic data",
141+
"moreInfoTitle": "8. More information",
135142
"correlationDifference": "Correlation difference: {{correlationDifference}}",
136143
"univariateText": "{{samples}} synthetic data points are generated using CART. The figures below display the value frequency for each variable. The synthetic data is of high quality when the frequencies are approximately the same.",
137144
"bivariateText": "The figures below display the differences in value frequency for a combination of variables. For comparing two categorical variables, bar charts are plotted. For comparing a numerical and a categorical variables, a so called [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is shown. For comparing two numercial variables, a [LOESS plot](https://en.wikipedia.org/wiki/Local_regression) is created. For all plots holds: the synthetic data is of high quality when the shape of the distributions in the synthetic data equal the distributions in the real data.",

src/locales/nl.json

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,20 @@
125125
"heading": "0. Preview van de data"
126126
},
127127
"columnsInDataset": "1. Detectie van datatypes",
128-
"_explanatoryDataAnalysisTitle": "2. Explanatory data analyse",
128+
"handlingMissingDataTitle": "2. Handling missing data",
129+
"handlingMissingDataDescription": "Handling missing data description",
130+
"handlingMissingDataTableTitle": "Columns with missing data",
131+
"_explanatoryDataAnalysisTitle": "3. Explanatory data analyse",
129132
"cartModelTitle": "2. Methode: CART model",
130-
"gaussianCopulaModelTitle": "2. Methode: Gaussian Copula model",
133+
"gaussianCopulaModelTitle": "3. Methode: Gaussian Copula model",
131134
"cartModelDescription": "De CART-methode (Classification and Regression Trees) genereert synthetische data door patronen uit echte data te leren via een beslisboom die de data opdeelt in homogene groepen op basis van kenmerkwaarden. Voor numerieke data voorspelt de methode gemiddelden, en voor categorische data wijst het de meest voorkomende categorie toe. Deze voorspellingen worden vervolgens gebruikt om nieuwe synthetische gegevenspunten te creëren.",
132-
"evaluationOfGeneratedDataTitle": "3. Evaluatie van gegenereerde data",
133-
"outputDataTitle": "4. Output data",
135+
"evaluationOfGeneratedDataTitle": "4. Evaluatie van gegenereerde data",
136+
"diagnosticsReportTitle": "6. Diagnostisch rapport",
134137
"diagnosticsTitle": "Diagnostische Resultaten",
138+
"disclosureProtectionTitle": "Disclosure protection",
139+
"bivariateDistributionSyntheticDataTitle": "6. Bivariate distributies",
140+
"outputDataTitle": "7. Output data",
141+
"moreInfoTitle": "8. Meer informatie",
135142
"correlationDifference": "Correlatie verschil: {{correlationDifference}}",
136143
"moreInfo": "&nbsp;&nbsp;\n \n \n \nWil je meer weten over synthetische data?\n \n \n \n- [python-synthpop op Github](https://github.com/NGO-Algorithm-Audit/python-synthpop)\n- [local-first web app op Github](https://github.com/NGO-Algorithm-Audit/local-first-web-tool/tree/main)\n- [Synthetische Data: wat, waarom en hoe?](https://royalsociety.org/-/media/policy/projects/privacy-enhancing-technologies/Synthetic_Data_Survey-24.pdf)\n- [Kennis Netwerk Synthetische Data](https://online.rijksinnovatiecommunity.nl/groups/399-kennisnetwerk-synthetischedata/welcome) (for Dutch public organizations)\n- [Synthetische data portaal van DUO](https://duo.nl/open_onderwijsdata/footer/synthetische-data.jsp)\n- [CART: synthpop resources](https://synthpop.org.uk/resources.html)\n- [Gaussian Copula - Synthetic Data Vault](https://docs.sdv.dev/sdv)"
137144
},

src/routes/SyntheticData.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ export default function SyntheticDataGeneration() {
169169

170170
{result.length > 0 ? (
171171
<ComponentMapper items={result} data={data} />
172-
) : data.data.length > 0 ? null : loading ? (
172+
) : loading ? (
173173
<LoadingState
174174
loadingMessageKey={loadingMessage}
175175
initializingKey={

0 commit comments

Comments
 (0)