Skip to content

Commit f9d3665

Browse files
committed
2 parents 1add713 + dd123ed commit f9d3665

File tree

14 files changed

+545
-213
lines changed

14 files changed

+545
-213
lines changed

public/helper.tsx

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
export const Test = () => (
2+
<GroupBarChart
3+
yAxisLabel={'count'}
4+
data={[
5+
{
6+
name: 'test1',
7+
values: [
8+
{
9+
name: 'testa',
10+
value: 2,
11+
},
12+
{
13+
name: 'testb',
14+
value: 3,
15+
},
16+
],
17+
},
18+
{
19+
name: 'test2',
20+
values: [
21+
{
22+
name: 'testa',
23+
value: 2,
24+
},
25+
{
26+
name: 'testb',
27+
value: 3,
28+
},
29+
],
30+
},
31+
{
32+
name: 'test3',
33+
values: [
34+
{
35+
name: 'testa',
36+
value: 4,
37+
},
38+
{
39+
name: 'testb',
40+
value: 2,
41+
},
42+
],
43+
},
44+
]}
45+
title={`testa vs testb`}
46+
/>
47+
);

src/assets/synthetic-data.tsx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -241,13 +241,13 @@ def run():
241241
'realCorrelations': real_data.corr().to_json(orient="records"),
242242
'syntheticCorrelations': np.abs(df_numeric.corr() - synth_df_numeric.corr()).to_json(orient="records"),
243243
'reports' : [
244-
{
245-
'reportType': 'heading',
246-
'headingKey': 'syntheticData.explanatoryDataAnalysisTitle'
247-
},
248-
{'reportType': 'univariateDistributionRealData'},
249-
{'reportType': 'bivariateDistributionRealData'},
250-
{'reportType': 'correlationRealData'},
244+
# {
245+
# 'reportType': 'heading',
246+
# 'headingKey': 'syntheticData.explanatoryDataAnalysisTitle'
247+
# },
248+
# {'reportType': 'univariateDistributionRealData'},
249+
# {'reportType': 'bivariateDistributionRealData'},
250+
# {'reportType': 'correlationRealData'},
251251
{
252252
'reportType': 'heading',
253253
'headingKey': 'syntheticData.cartModelTitle'

src/components/CSVReader.tsx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ export interface csvReader {
1111
data: Record<string, string>[],
1212
stringified: string,
1313
fileName: string,
14-
demo?: boolean
14+
demo?: boolean,
15+
columnsCount?: number
1516
) => void;
1617
}
1718

@@ -30,10 +31,13 @@ export default function CSVReader({ onChange }: csvReader) {
3031
dynamicTyping: true,
3132

3233
complete: function (results) {
34+
const columns = (results?.data as unknown as [][])[0];
3335
onChange(
3436
results.data as Record<string, string>[],
3537
Papa.unparse(results.data),
36-
file.name
38+
file.name,
39+
false,
40+
Object.keys(columns).length
3741
);
3842
},
3943
error: function (error) {

src/components/DistributionReport.tsx

Lines changed: 163 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,24 @@ import ViolinChart from './graphs/ViolinChart';
1111
import GroupBarChart from './graphs/GroupBarChart';
1212
import SimpleTable from './SimpleTable';
1313

14+
function countCategory2ForCategory1(
15+
data: Record<string, any>[],
16+
category1: string,
17+
category2: string,
18+
column1: string,
19+
column2: string
20+
) {
21+
const count = data.filter(
22+
row => row[column1] === category1 && row[column2] === category2
23+
).length;
24+
25+
const total = data.filter(row => row[column1] === category1).length;
26+
if (total === 0) {
27+
return 0;
28+
}
29+
return (count / total) * 100;
30+
}
31+
1432
interface DistributionReport {
1533
reportType: string;
1634
headingKey?: string;
@@ -129,7 +147,7 @@ export const DistributionReport = (
129147
const charts = columnNames.map((column, index) => {
130148
const dataType = dataTypes[column];
131149
return columnNames.map((column2, index2) => {
132-
if (column === column2 || index >= index2) {
150+
if (column === column2 || index <= index2) {
133151
return null;
134152
}
135153
const dataType2 = dataTypes[column2];
@@ -195,11 +213,28 @@ export const DistributionReport = (
195213
return (
196214
<div key={column + column2}>
197215
<GroupBarChart
216+
yAxisLabel={t(
217+
'distribution.frequency'
218+
)}
198219
data={histogramData}
199220
title={`${column} vs ${column2}`}
200221
/>
201222
</div>
202223
);
224+
} else if (
225+
dataType === 'float' &&
226+
dataType2 === 'category'
227+
) {
228+
return (
229+
<ViolinChart
230+
key={column + column2}
231+
categoricalColumn={column2}
232+
numericColumn={column}
233+
realData={realData}
234+
syntheticData={syntheticData}
235+
comparison={false}
236+
/>
237+
);
203238
}
204239
});
205240
});
@@ -219,30 +254,133 @@ export const DistributionReport = (
219254
report.reportType ===
220255
'bivariateDistributionSyntheticData'
221256
) {
222-
const charts = columnNames.map(column => {
223-
const dataType = dataTypes[column];
224-
return columnNames.map(column2 => {
225-
if (column === column2) {
226-
return null;
227-
}
228-
const dataType2 = dataTypes[column2];
229-
if (
230-
dataType === 'float' &&
231-
dataType2 === 'category'
232-
) {
233-
return (
234-
<ViolinChart
235-
key={column + column2}
236-
categoricalColumn={column2}
237-
numericColumn={column}
238-
realData={realData}
239-
syntheticData={syntheticData}
240-
/>
241-
);
242-
}
243-
return null;
244-
});
245-
});
257+
console.log(
258+
'bivariateDistributionSyntheticData',
259+
columnNames
260+
);
261+
const charts = columnNames.map(
262+
(column, indexcolumn1) => {
263+
const dataType = dataTypes[column];
264+
return columnNames.map(
265+
(column2, indexcolumn2) => {
266+
const dataType2 = dataTypes[column2];
267+
if (indexcolumn1 >= indexcolumn2) {
268+
return null;
269+
}
270+
271+
if (
272+
dataType === 'category' &&
273+
dataType2 === 'float'
274+
) {
275+
return (
276+
<ViolinChart
277+
key={column + column2}
278+
categoricalColumn={column}
279+
numericColumn={column2}
280+
realData={realData}
281+
syntheticData={
282+
syntheticData
283+
}
284+
comparison={true}
285+
/>
286+
);
287+
} else if (
288+
dataType === 'float' &&
289+
dataType2 === 'category'
290+
) {
291+
return (
292+
<ViolinChart
293+
key={column + column2}
294+
categoricalColumn={column2}
295+
numericColumn={column}
296+
realData={realData}
297+
syntheticData={
298+
syntheticData
299+
}
300+
comparison={true}
301+
/>
302+
);
303+
} else if (
304+
dataType === 'category' &&
305+
dataType2 === 'category'
306+
) {
307+
const categories = Array.from(
308+
new Set([
309+
...realData.map(
310+
(d: any) => d[column]
311+
),
312+
])
313+
);
314+
const categories2 = Array.from(
315+
new Set([
316+
...realData.map(
317+
(d: any) => d[column2]
318+
),
319+
])
320+
);
321+
322+
return (
323+
<div>
324+
<h2 className="text-center font-bold mt-2 text-[12px]">
325+
{column} vs {column2}
326+
</h2>
327+
<div className="flex flex-row w-full overflow-auto gap-4">
328+
{categories.map(
329+
item => (
330+
<div
331+
key={item}
332+
className="flex flex-col"
333+
>
334+
<GroupBarChart
335+
colorRange={[
336+
'steelblue',
337+
'orange',
338+
]}
339+
yAxisLabel={t(
340+
'distribution.percentage'
341+
)}
342+
title={`${column} = ${item}`}
343+
data={categories2.map(
344+
item2 => ({
345+
// count : number of times where item2 appears in the data for category2 and rows where category1 = item
346+
name: `${item2}`,
347+
values: [
348+
{
349+
name: 'real',
350+
value: countCategory2ForCategory1(
351+
realData,
352+
item,
353+
item2,
354+
column,
355+
column2
356+
),
357+
},
358+
{
359+
name: 'synth',
360+
value: countCategory2ForCategory1(
361+
syntheticData,
362+
item,
363+
item2,
364+
column,
365+
column2
366+
),
367+
},
368+
],
369+
})
370+
)}
371+
/>
372+
</div>
373+
)
374+
)}
375+
</div>
376+
</div>
377+
);
378+
}
379+
return null;
380+
}
381+
);
382+
}
383+
);
246384
return (
247385
<div key={indexReport} className="mb-4">
248386
<Accordion

src/components/SyntheticDataSettings.tsx

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ interface DemoDataColumns {
2929
bar: string;
3030
}
3131

32-
export default function BiasSettings({
32+
export default function SyntheticDataSettings({
3333
onRun,
3434
onDataLoad,
3535
isLoading,
@@ -51,7 +51,7 @@ export default function BiasSettings({
5151
sdgMethod: 'gc',
5252
},
5353
});
54-
54+
const [columnsCountError, setColumnsCountError] = useState(false);
5555
const [outputSamples, setOutputSamples] = useState([1000]);
5656
const [dataKey, setDataKey] = useState<string>(new Date().toISOString());
5757
const [data, setData] = useState<{
@@ -63,13 +63,20 @@ export default function BiasSettings({
6363
const onFileLoad = (
6464
data: Record<string, string>[],
6565
stringified: string,
66-
fileName: string
66+
fileName: string,
67+
isDemo?: boolean,
68+
columnsCount?: number
6769
) => {
6870
if (stringified.length === 0) {
71+
setColumnsCountError(false);
6972
form.reset();
7073
} else {
7174
form.setValue('file', stringified);
75+
if (!isDemo && columnsCount && columnsCount > 8) {
76+
setColumnsCountError(true);
77+
}
7278
}
79+
7380
setData({ data, stringified, fileName });
7481
setDataKey(new Date().toISOString());
7582
};
@@ -89,6 +96,7 @@ export default function BiasSettings({
8996
ugpa: (row as DemoDataColumns)['ugpa'],
9097
bar: (row as DemoDataColumns)['bar'],
9198
}));
99+
92100
onDataLoad(
93101
demoData as Record<string, string>[],
94102
Papa.unparse(demoData),
@@ -125,6 +133,15 @@ export default function BiasSettings({
125133
)}
126134
/>
127135
</div>
136+
<div className="flex flex-row gap-2 justify-start">
137+
{columnsCountError && (
138+
<div className="text-red-500">
139+
{t(
140+
'syntheticData.form.errors.columnsCountError'
141+
)}
142+
</div>
143+
)}
144+
</div>
128145

129146
<div className="flex flex-col gap-3">
130147
<label className="text-sm font-medium">
@@ -211,7 +228,7 @@ export default function BiasSettings({
211228
type="submit"
212229
size="sm"
213230
className="gap-1.5"
214-
disabled={isLoading}
231+
disabled={isLoading || columnsCountError}
215232
>
216233
{!isLoading
217234
? t('syntheticData.form.actions.runGeneration')

src/components/componentMapper.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ export default function ComponentMapper({
152152
<ErrorBoundary key={index}>
153153
<GroupBarChart
154154
data={histogramData}
155+
yAxisLabel={t('distribution.frequency')}
155156
title={resultItem.title ?? ''}
156157
/>
157158
</ErrorBoundary>

0 commit comments

Comments
 (0)