Skip to content

Commit 93879f2

Browse files
committed
Merge branch 'main' into JFP_edits
1 parent 496db79 commit 93879f2

File tree

6 files changed

+236
-56
lines changed

6 files changed

+236
-56
lines changed

src/assets/synthetic-data.tsx

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import warnings
77
import scipy.stats as stats
88
from scipy.stats import norm, ks_2samp
99
from sklearn.preprocessing import LabelEncoder
10-
from synthpop import MissingDataHandler, DataProcessor, CARTMethod
10+
from synthpop import MissingDataHandler, DataProcessor, CARTMethod, GaussianCopulaMethod
1111
from synthpop.metrics import (
1212
MetricsReport,
1313
EfficacyMetrics,
@@ -177,11 +177,16 @@ def run():
177177
# Preprocess the data: transforms raw data into a numerical format
178178
processed_data = processor.preprocess(df_imputed)
179179
180-
cart = CARTMethod(metadata, smoothing=True, proper=True, minibucket=5, random_state=42)
181-
cart.fit(processed_data)
182-
183-
synthetic_processed = cart.sample(samples)
184-
180+
if (sdgMethod == 'cart'):
181+
cart = CARTMethod(metadata, smoothing=True, proper=True, minibucket=5, random_state=42)
182+
cart.fit(processed_data)
183+
synthetic_processed = cart.sample(samples)
184+
185+
if (sdgMethod == 'gc'):
186+
gc = GaussianCopulaMethod(metadata)
187+
gc.fit(processed_data)
188+
synthetic_processed = gc.sample(samples)
189+
185190
print("synthetic_processed (first 5 rows):", synthetic_processed.head())
186191
187192
synthetic_data = processor.postprocess(synthetic_processed)
@@ -225,25 +230,25 @@ def run():
225230
# spop.fit(real_data, dtypes=dtypes_dict)
226231
# synthetic_data = spop.generate(k=samples)
227232
228-
if (sdgMethod == 'gc'):
233+
# if (sdgMethod == 'gc'):
229234
# Initialize synthesizer and fit it to the data
230-
synthesizer = GaussianCopulaSynthesizer()
235+
# synthesizer = GaussianCopulaSynthesizer()
231236
232237
# Handle NaN values based on the selected treatment method
233-
if nanTreatment == 'drop':
234-
df_imputed = df_imputed.dropna()
235-
elif nanTreatment == 'impute':
238+
# if nanTreatment == 'drop':
239+
# df_imputed = df_imputed.dropna()
240+
# elif nanTreatment == 'impute':
236241
# Use mean imputation for numerical columns and mode imputation for categorical columns
237-
for column in df_imputed.columns:
238-
if column_dtypes[column] == 'categorical':
239-
df_imputed[column] = df_imputed[column].fillna(df_imputed[column].mode()[0])
240-
else:
241-
df_imputed[column] = df_imputed[column].fillna(df_imputed[column].mean())
242+
# for column in df_imputed.columns:
243+
# if column_dtypes[column] == 'categorical':
244+
# df_imputed[column] = df_imputed[column].fillna(df_imputed[column].mode()[0])
245+
# else:
246+
# df_imputed[column] = df_imputed[column].fillna(df_imputed[column].mean())
242247
243-
synthesizer.fit(df_imputed)
248+
#synthesizer.fit(df_imputed)
244249
245250
# Generate synthetic data
246-
synthetic_data = synthesizer.sample(samples)
251+
# synthetic_data = synthesizer.sample(samples)
247252
248253
synth_df_decoded = synthetic_data.copy()
249254
@@ -349,38 +354,57 @@ def run():
349354
'reportType': 'heading',
350355
'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
351356
},
357+
{
358+
'reportType': 'heading2',
359+
'headingKey': 'syntheticData.distributionsTitle'
360+
},
352361
{'reportType': 'univariateDistributionSyntheticData'},
362+
{'reportType': 'bivariateDistributionSyntheticData'},
353363
{
354-
'reportType': 'heading',
364+
'reportType': 'heading2',
355365
'headingKey': 'syntheticData.diagnosticsReportTitle'
356-
},
366+
},
357367
{
358368
'reportType': 'table',
359369
'titleKey': 'syntheticData.diagnosticsTitle',
360-
'showIndex' : False,
361-
'data': report_df.to_json(orient="records"),
362-
'postContent': [{
363-
'contentType' : 'correlationSyntheticData'
364-
}]
370+
'showIndex' : False,
371+
'preContent' : [{
372+
'contentType': 'text',
373+
'textKey': 'syntheticData.diagnosticsReportDescription'
374+
}],
375+
'data': report_df.to_json(orient="records"),
376+
},
377+
{
378+
'reportType': 'correlationSyntheticData',
379+
'titleKey': 'syntheticData.correlationMatrixTitle',
380+
'preContent' : [{
381+
'contentType': 'text',
382+
'textKey': 'syntheticData.correlationMatrixDescription'
383+
}],
365384
},
366385
{
367386
'reportType': 'table',
368387
'titleKey': 'syntheticData.efficacyMetricsTitle',
369388
'showIndex' : False,
389+
'preContent' : [{
390+
'contentType': 'text',
391+
'textKey': 'syntheticData.efficacyMetricsDescription'
392+
}],
370393
'data': metrics_df.to_json(orient="records"),
371394
},
372395
{
373396
'reportType': 'table',
374397
'titleKey': 'syntheticData.disclosureProtectionTitle',
375398
'showIndex' : False,
399+
'preContent' : [{
400+
'contentType': 'text',
401+
'textKey': 'syntheticData.disclosureProtectionDescription'
402+
},{
403+
'contentType': 'text',
404+
'text': f"Score: {dp_score:.3f}"
405+
}],
376406
'data': dp_report_df.to_json(orient="records"),
377-
},
378-
{
379-
'reportType': 'heading',
380-
'headingKey': 'syntheticData.bivariateDistributionSyntheticDataTitle'
381-
},
382-
{'reportType': 'bivariateDistributionSyntheticData'},
383-
407+
}
384408
]
385409
}))
386410

src/components/DistributionReport.tsx

Lines changed: 122 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ function countCategory2ForCategory1(
5959
type additionalContent = {
6060
contentType: string;
6161
textKey?: string;
62+
text?: string;
6263
params?: Record<string, string | number | boolean>;
6364
}[];
6465

@@ -108,6 +109,16 @@ export const DistributionReport = (
108109
</h5>
109110
);
110111
}
112+
if (report.reportType === 'heading2' && report.headingKey) {
113+
return (
114+
<h6
115+
key={indexReport}
116+
className="text-gray-800 font-semibold mb-4"
117+
>
118+
{t(report.headingKey, report.params)}
119+
</h6>
120+
);
121+
}
111122
if (report.reportType === 'text' && report.textKey) {
112123
return (
113124
<MarkdownWithTooltips
@@ -127,7 +138,7 @@ export const DistributionReport = (
127138
return null;
128139
}
129140
const preContent: additionalContent = report.preContent
130-
? JSON.parse(report.preContent)
141+
? (report.preContent as unknown as additionalContent)
131142
: [];
132143
const postContent: additionalContent =
133144
(report.postContent as unknown as additionalContent) ??
@@ -151,11 +162,12 @@ export const DistributionReport = (
151162
key={index}
152163
className="-mt-2 text-gray-800 markdown"
153164
>
154-
{t(
155-
content.textKey ??
156-
'',
157-
content.params
158-
)}
165+
{content.text ??
166+
t(
167+
content.textKey ??
168+
'',
169+
content.params
170+
)}
159171
</MarkdownWithTooltips>
160172
);
161173
}
@@ -180,11 +192,12 @@ export const DistributionReport = (
180192
key={index}
181193
className="-mt-2 text-gray-800 markdown"
182194
>
183-
{t(
184-
content.textKey ??
185-
'',
186-
content.params
187-
)}
195+
{content.text ??
196+
t(
197+
content.textKey ??
198+
'',
199+
content.params
200+
)}
188201
</MarkdownWithTooltips>
189202
);
190203
} else if (
@@ -239,6 +252,104 @@ export const DistributionReport = (
239252
</div>
240253
);
241254
}
255+
256+
if (report.reportType === 'correlationSyntheticData') {
257+
if (!report.titleKey) {
258+
return null;
259+
}
260+
const preContent: additionalContent = report.preContent
261+
? (report.preContent as unknown as additionalContent)
262+
: [];
263+
const postContent: additionalContent =
264+
(report.postContent as unknown as additionalContent) ??
265+
[];
266+
267+
return (
268+
<div key={indexReport} className="mb-4">
269+
<Accordion
270+
title={t(report.titleKey)}
271+
content={
272+
<div className="pt-[20px];">
273+
<p>&nbsp;</p>
274+
{preContent.map(
275+
(content, index) => {
276+
if (
277+
content.contentType ===
278+
'text'
279+
) {
280+
return (
281+
<MarkdownWithTooltips
282+
key={index}
283+
className="-mt-2 text-gray-800 markdown"
284+
>
285+
{t(
286+
content.textKey ??
287+
'',
288+
content.params
289+
)}
290+
</MarkdownWithTooltips>
291+
);
292+
}
293+
}
294+
)}
295+
296+
<div
297+
key={`index`}
298+
className="grid lg:grid-cols-[50%_50%] grid-cols-[100%]"
299+
>
300+
<div className="col-[1] lg:col-[1]">
301+
<CorrelationMatrix
302+
title={t(
303+
'heatmap.realdata'
304+
)}
305+
heatmapData={createHeatmapdata(
306+
distributionReportProps.realCorrelations
307+
)}
308+
showLegend={false}
309+
/>
310+
</div>
311+
<div className="col-[1] lg:col-[2]">
312+
<CorrelationMatrix
313+
title={t(
314+
'heatmap.synthData'
315+
)}
316+
heatmapData={createHeatmapdata(
317+
distributionReportProps.synthDataCorrelations
318+
)}
319+
showLegend={true}
320+
/>
321+
</div>
322+
</div>
323+
324+
{report.postContent &&
325+
postContent.map(
326+
(content, index) => {
327+
if (
328+
content.contentType ===
329+
'text'
330+
) {
331+
return (
332+
<MarkdownWithTooltips
333+
key={index}
334+
className="-mt-2 text-gray-800 markdown"
335+
>
336+
{t(
337+
content.textKey ??
338+
'',
339+
content.params
340+
)}
341+
</MarkdownWithTooltips>
342+
);
343+
}
344+
}
345+
)}
346+
</div>
347+
}
348+
/>
349+
</div>
350+
);
351+
}
352+
242353
if (
243354
report.reportType === 'univariateDistributionRealData'
244355
) {

src/components/TooltipWrapper.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export function TooltipWrapper({
2222
return (
2323
<span>
2424
{textBefore}
25-
<TooltipProvider>
25+
<TooltipProvider delayDuration={0}>
2626
<Tooltip>
2727
<TooltipTrigger asChild>
2828
<span className="border-b-2 border-dashed border-gray-600 cursor-help">

src/components/componentMapper.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ export default function ComponentMapper({
128128

129129
case 'text':
130130
return (
131-
<TooltipProvider>
131+
<TooltipProvider key={index}>
132132
<MarkdownWithTooltips
133133
key={index}
134134
className="-mt-2 text-gray-800 markdown"

0 commit comments

Comments
 (0)