Skip to content

Commit d89ba42

Browse files
committed
Add nanTreatment option to synthetic data generation; implement NaN handling logic in GaussianCopulaSynthesizer and update related components for consistency
1 parent 628c998 commit d89ba42

File tree

5 files changed

+85
-1
lines changed

5 files changed

+85
-1
lines changed

src/assets/synthetic-data.tsx

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ from js import isDemo
2828
from js import sdgMethod
2929
from js import samples
3030
from js import setOutputData
31+
from js import nanTreatment
3132
3233
3334
class GaussianCopulaSynthesizer:
@@ -77,7 +78,7 @@ def run():
7778
csv_data = StringIO(data)
7879
7980
admissions_df = pd.read_csv(csv_data, index_col=False)
80-
81+
print("nanTreatment:", nanTreatment)
8182
# admissions_sub = admissions_df[['sex', 'race1', 'ugpa', 'bar']]
8283
# real_data = admissions_sub.dropna()
8384
@@ -227,6 +228,18 @@ def run():
227228
if (sdgMethod == 'gc'):
228229
# Initialize synthesizer and fit it to the data
229230
synthesizer = GaussianCopulaSynthesizer()
231+
232+
# Handle NaN values based on the selected treatment method
233+
if nanTreatment == 'drop':
234+
df_imputed = df_imputed.dropna()
235+
elif nanTreatment == 'impute':
236+
# Use mean imputation for numerical columns and mode imputation for categorical columns
237+
for column in df_imputed.columns:
238+
if column_dtypes[column] == 'categorical':
239+
df_imputed[column] = df_imputed[column].fillna(df_imputed[column].mode()[0])
240+
else:
241+
df_imputed[column] = df_imputed[column].fillna(df_imputed[column].mean())
242+
230243
synthesizer.fit(df_imputed)
231244
232245
# Generate synthetic data

src/components/SyntheticDataSettings.tsx

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const createFormSchema = (t: (key: string) => string) =>
2727
required_error: t('syntheticData.form.errors.csvRequired'),
2828
}),
2929
sdgMethod: z.string(),
30+
nanTreatment: z.string(),
3031
});
3132

3233
interface DemoDataColumns {
@@ -56,6 +57,7 @@ export default function SyntheticDataSettings({
5657
resolver: zodResolver(FormSchema),
5758
defaultValues: {
5859
sdgMethod: 'cart',
60+
nanTreatment: 'drop',
5961
},
6062
});
6163
const [columnsCountError, setColumnsCountError] = useState(false);
@@ -117,6 +119,7 @@ export default function SyntheticDataSettings({
117119
isDemo: false,
118120
sdgMethod: data.sdgMethod,
119121
samples: outputSamples[0],
122+
nanTreatment: data.nanTreatment,
120123
});
121124
};
122125
return (
@@ -209,6 +212,68 @@ export default function SyntheticDataSettings({
209212
</RadioGroup>
210213
)}
211214
/>
215+
216+
{form.watch('sdgMethod') === 'gc' && (
217+
<div className="mt-4">
218+
<label className="text-sm font-medium flex flex-row items-center gap-1">
219+
{t(
220+
'syntheticData.form.fieldset.nanTreatment.title'
221+
)}
222+
<TooltipProvider>
223+
<Tooltip>
224+
<TooltipTrigger
225+
onClick={event => {
226+
event.preventDefault();
227+
}}
228+
>
229+
<InfoIcon className="size-3.5" />
230+
</TooltipTrigger>
231+
<TooltipContent>
232+
<div className="whitespace-pre-wrap max-w-full w-[400px] p-2">
233+
<Markdown className="-mt-2 text-gray-800 markdown">
234+
{t(
235+
'syntheticData.form.fieldset.nanTreatment.tooltip'
236+
)}
237+
</Markdown>
238+
</div>
239+
</TooltipContent>
240+
</Tooltip>
241+
</TooltipProvider>
242+
</label>
243+
<FormField
244+
control={form.control}
245+
name="nanTreatment"
246+
render={({ field }) => (
247+
<RadioGroup
248+
onValueChange={field.onChange}
249+
defaultValue={field.value}
250+
className="flex flex-col space-y-1 mt-2"
251+
>
252+
<FormItem className="flex items-center space-x-3 space-y-0">
253+
<FormControl>
254+
<RadioGroupItem value="drop" />
255+
</FormControl>
256+
<FormLabel className="font-normal">
257+
{t(
258+
'syntheticData.form.fieldset.nanTreatment.drop'
259+
)}
260+
</FormLabel>
261+
</FormItem>
262+
<FormItem className="flex items-center space-x-3 space-y-0">
263+
<FormControl>
264+
<RadioGroupItem value="impute" />
265+
</FormControl>
266+
<FormLabel className="font-normal">
267+
{t(
268+
'syntheticData.form.fieldset.nanTreatment.impute'
269+
)}
270+
</FormLabel>
271+
</FormItem>
272+
</RadioGroup>
273+
)}
274+
/>
275+
</div>
276+
)}
212277
</div>
213278

214279
<div className="grid gap-3">

src/components/synthetic-data-interfaces/SyntheticDataParameters.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ export interface SyntheticDataParameters {
22
isDemo: boolean;
33
sdgMethod: string;
44
samples: number;
5+
nanTreatment: string;
56
}

src/components/synthetic-data-interfaces/cluster-export.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
export interface SyntheticDataInfo {
22
isDemo: boolean;
33
sdgMethod: string;
4+
nanTreatment: string;
45
samples: number;
56
syntheticData: object;
67
date: Date;

src/routes/SyntheticData.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ export default function SyntheticDataGeneration() {
7373
isDemo: false,
7474
sdgMethod: 'gc',
7575
samples: 1000,
76+
nanTreatment: '',
7677
});
7778

7879
const params = new URLSearchParams(window.location.search);
@@ -110,6 +111,7 @@ export default function SyntheticDataGeneration() {
110111
isDemo: true,
111112
sdgMethod: 'cart',
112113
samples: 5000,
114+
nanTreatment: '',
113115
});
114116
}
115117
}, [initialised, data]);
@@ -118,6 +120,7 @@ export default function SyntheticDataGeneration() {
118120
isDemo: boolean;
119121
sdgMethod: string;
120122
samples: number;
123+
nanTreatment: string;
121124
}) => {
122125
runPython({
123126
type: 'start',
@@ -126,6 +129,7 @@ export default function SyntheticDataGeneration() {
126129
isDemo: props.isDemo,
127130
sdgMethod: props.sdgMethod,
128131
samples: props.samples,
132+
nanTreatment: props.nanTreatment,
129133
},
130134
},
131135
});

0 commit comments

Comments
 (0)