Skip to content

Commit b5d3d7c

Browse files
authored
Merge pull request #24 from NGO-Algorithm-Audit/feature/content-headings-structure
Feature/content headings structure
2 parents 719c2b3 + 5f3f46d commit b5d3d7c

File tree

5 files changed

+157
-42
lines changed

5 files changed

+157
-42
lines changed

src/assets/synthetic-data.tsx

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,7 @@ def run():
125125
126126
admissions_sub = admissions_df[['sex', 'race1', 'ugpa', 'bar']]
127127
real_data = admissions_sub.dropna()
128-
setResult(json.dumps(
129-
{'type': 'heading', 'data': sdgMethod}
130-
))
128+
131129
if isDemo:
132130
setResult(json.dumps({
133131
'type': 'heading',
@@ -138,6 +136,11 @@ def run():
138136
'key': 'syntheticData.demo.description'
139137
}))
140138
139+
140+
setResult(json.dumps({
141+
'type': 'heading',
142+
'headingKey': 'syntheticData.dataSetPreview.heading'
143+
}))
141144
setResult(json.dumps(
142145
{'type': 'data-set-preview', 'data': ''}
143146
))
@@ -207,27 +210,7 @@ def run():
207210
208211
results = run_diagnostic(real_data, synthetic_data, target_column='gpa')
209212
print('Results:', results)
210-
setResult(json.dumps(
211-
{'type': 'heading', 'data': 'Diagnostic Results:'}
212-
))
213-
setResult(json.dumps({'type': 'table', 'data': json.dumps([
214-
{
215-
'attribute': key,
216-
'ks_stat': values['ks_stat'],
217-
'p_value': values['p_value']
218-
}
219-
for key, values in results['distribution_results'].items()
220-
])}))
221-
222-
setResult(json.dumps(
223-
{'type': 'heading', 'data': 'Correlation difference: ' + str(results['correlation_diff']) }
224-
))
225-
226-
setResult(json.dumps(
227-
{'type': 'heading', 'data': '5. Output data'}
228-
))
229-
setResult(json.dumps({'type': 'table', 'data': synthetic_data.head().to_json(orient="records")}))
230-
213+
231214
232215
# copy dataframe and assign NaN to all values
233216
synth_df = real_data.copy()
@@ -247,7 +230,26 @@ def run():
247230
'realCorrelations': real_data.corr().to_json(orient="records"),
248231
'syntheticCorrelations': synthetic_data.corr().to_json(orient="records"),
249232
'reports' : [
250-
'univariate', 'distribution', 'correlation'
233+
{
234+
'reportType': 'heading',
235+
'headingKey': 'syntheticData.explanatoryDataAnalysisTitle'
236+
},
237+
{'reportType': 'univariate'},
238+
{
239+
'reportType': 'heading',
240+
'headingKey': 'syntheticData.cartModelTitle'
241+
},
242+
{
243+
'reportType': 'text',
244+
'textKey': 'syntheticData.cartModelDescription'
245+
},
246+
{
247+
'reportType': 'heading',
248+
'headingKey': 'syntheticData.evaluationOfGeneratedDataTitle'
249+
},
250+
{'reportType': 'distribution'},
251+
{'reportType': 'correlation'},
252+
251253
]
252254
}))
253255
@@ -257,6 +259,44 @@ def run():
257259
# 'synthetic': synthetic_data.corr().to_json(orient="records")
258260
# }))
259261
262+
setResult(json.dumps(
263+
{
264+
'type': 'heading',
265+
'headingKey': 'syntheticData.diagnosticsTitle'
266+
}
267+
))
268+
setResult(json.dumps({'type': 'table',
269+
'showIndex' : False,
270+
'data': json.dumps([
271+
{
272+
'attribute': key,
273+
'ks_stat': values['ks_stat'],
274+
'p_value': values['p_value']
275+
}
276+
for key, values in results['distribution_results'].items()
277+
])}))
278+
279+
setResult(json.dumps(
280+
{
281+
'type': 'heading',
282+
'headingKey': 'syntheticData.correlationDifference',
283+
'params': {
284+
'correlationDifference' : str(results['correlation_diff'])
285+
}
286+
}
287+
))
288+
289+
setResult(json.dumps({
290+
'type': 'heading',
291+
'headingKey': 'syntheticData.outputDataTitle'
292+
}))
293+
294+
setResult(json.dumps({
295+
'type': 'table',
296+
'showIndex': True,
297+
'data': synthetic_data.head().to_json(orient="records")
298+
}))
299+
260300
return
261301
262302

src/components/SimpleTable.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ import { useTranslation } from 'react-i18next';
1212
export default function SimpleTable({
1313
title,
1414
data,
15+
showIndex,
1516
}: {
17+
showIndex: boolean;
1618
title?: string;
1719
data: Record<string, string | number>[];
1820
}) {
@@ -26,6 +28,7 @@ export default function SimpleTable({
2628
{title && <TableCaption>{t(title)}</TableCaption>}
2729
<TableHeader>
2830
<TableRow className="bg-aaLight">
31+
{showIndex && <TableHead></TableHead>}
2932
{Object.keys(data[0]).map(key => (
3033
<TableHead key={key} className="text-black">
3134
{key}
@@ -36,6 +39,7 @@ export default function SimpleTable({
3639
<TableBody>
3740
{data.map((row, i) => (
3841
<TableRow key={i}>
42+
{showIndex && <TableCell>{i}</TableCell>}
3943
{Object.values(row).map((value, i) => (
4044
<TableCell key={i}>{value}</TableCell>
4145
))}

src/components/componentMapper.tsx

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ export default function ComponentMapper({
9090
<SimpleTable
9191
data={data.data.slice(0, 5)}
9292
title="datasetPreview"
93+
showIndex={true}
9394
/>
9495
)}
9596
</Fragment>
@@ -101,6 +102,7 @@ export default function ComponentMapper({
101102
key={index}
102103
data={JSON.parse(resultItem.data)}
103104
title={t(resultItem.title)}
105+
showIndex={resultItem.showIndex ?? false}
104106
/>
105107
);
106108

@@ -167,17 +169,14 @@ export default function ComponentMapper({
167169
);
168170

169171
case 'text':
170-
// Handle text that might need translation
171-
const textContent = resultItem.key
172-
? t(resultItem.key, resultItem.params)
173-
: resultItem.data;
174-
175172
return (
176173
<Markdown
177174
key={index}
178175
className="-mt-2 text-gray-800 markdown"
179176
>
180-
{textContent}
177+
{resultItem.key
178+
? t(resultItem.key, resultItem.params)
179+
: resultItem.data}
181180
</Markdown>
182181
);
183182
case 'histogram': {
@@ -214,10 +213,58 @@ export default function ComponentMapper({
214213
const dataTypes = JSON.parse(resultItem.dataTypes);
215214
console.log('reports', resultItem.reports);
216215
return (
217-
<div key={`distribution-${index}`}>
216+
<div
217+
key={`distribution-${index}`}
218+
className="flex flex-col gap-6"
219+
>
218220
{resultItem.reports.map(
219-
(report: string, indexReport: number) => {
220-
if (report === 'univariate') {
221+
(
222+
report: {
223+
reportType: string;
224+
headingKey?: string;
225+
textKey?: string;
226+
params?: Record<
227+
string,
228+
string | number | boolean
229+
>;
230+
},
231+
indexReport: number
232+
) => {
233+
if (
234+
report.reportType === 'heading' &&
235+
report.headingKey
236+
) {
237+
return (
238+
<h5
239+
key={indexReport}
240+
className="text-gray-800 font-semibold mb-4"
241+
>
242+
{t(
243+
report.headingKey,
244+
report.params
245+
)}
246+
</h5>
247+
);
248+
}
249+
if (
250+
report.reportType === 'text' &&
251+
report.textKey
252+
) {
253+
return (
254+
<Markdown
255+
key={index}
256+
className="-mt-2 text-gray-800 markdown"
257+
>
258+
{t(
259+
report.textKey,
260+
report.params
261+
)}
262+
</Markdown>
263+
);
264+
}
265+
if (
266+
report.reportType === 'univariate'
267+
) {
221268
return (
222269
<div
223270
key={indexReport}
@@ -250,7 +297,9 @@ export default function ComponentMapper({
250297
</div>
251298
);
252299
}
253-
if (report === 'distribution') {
300+
if (
301+
report.reportType === 'distribution'
302+
) {
254303
return (
255304
<Fragment key={indexReport}>
256305
{realData.length === 0 ||
@@ -325,7 +374,9 @@ export default function ComponentMapper({
325374
);
326375
}
327376

328-
if (report === 'correlation') {
377+
if (
378+
report.reportType === 'correlation'
379+
) {
329380
const {
330381
columns: realColumns,
331382
data: convertedData,

src/locales/en.json

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"downloadButton": "Download",
1717
"biasSettings": {
1818
"exportToPDF": "Download bias detection report as pdf",
19-
"exportToJSON": "Export clusters as json",
19+
"exportToJSON": "Export clusters as json",
2020
"form": {
2121
"fieldsets": {
2222
"data": {
@@ -59,7 +59,7 @@
5959
"syntheticData": {
6060
"demo": {
6161
"heading": "Information about demo dataset",
62-
"description": "A subset of the [Law School Admission Bar](https://www.kaggle.com/datasets/danofer/law-school-admissions-bar-passage)* dataset is used as a demo. Synthetic data will be generated for the following columns:\n \n&nbsp;&nbsp;\n- sex: student gender, i.e. 1 (male), 2 (female);\n- race1: race of student, i.e., asian, black, hispanic, white, other;\n- ugpa: undergraduate GPA of student (average course grades), continous variable;\n- bar: Ground truth label indicating whether or not the student passed the bar, i.e., passed 1st time, passed 2nd time, failed, non-graduated.\n \n&nbsp;&nbsp;\n\nThe CART method will be used evaluate the distribution and correlation differences between the real and synthetic data.\n \n&nbsp;&nbsp;\n\n*The original paper can be found [here](https://files.eric.ed.gov/fulltext/ED469370.pdf)\n \n&nbsp;&nbsp;\n 0. Preview of the dataset"
62+
"description": "A subset of the [Law School Admission Bar](https://www.kaggle.com/datasets/danofer/law-school-admissions-bar-passage)* dataset is used as a demo. Synthetic data will be generated for the following columns:\n \n&nbsp;&nbsp;\n- sex: student gender, i.e. 1 (male), 2 (female);\n- race1: race of student, i.e., asian, black, hispanic, white, other;\n- ugpa: undergraduate GPA of student (average course grades), continous variable;\n- bar: Ground truth label indicating whether or not the student passed the bar, i.e., passed 1st time, passed 2nd time, failed, non-graduated.\n \n&nbsp;&nbsp;\n\nThe CART method will be used evaluate the distribution and correlation differences between the real and synthetic data.\n \n&nbsp;&nbsp;\n\n*The original paper can be found [here](https://files.eric.ed.gov/fulltext/ED469370.pdf)\n \n&nbsp;&nbsp;\n"
6363
},
6464
"exportToPDF": "Download evaluation report as pdf",
6565
"exportToJSON": "Download synthetic data as json",
@@ -89,11 +89,21 @@
8989
"title": "Try it out!",
9090
"description": "Do you not have a dataset at hand? No worries use our demo dataset."
9191
},
92-
"columnsInDataset": "1. Data types detection",
9392
"columnsInDatasetInfo": "If detected data types are incorrect, please change this locally in the dataset before attaching it again.",
9493
"univariateCharts": "Univariate distributions of the attached dataset",
9594
"synthVsReal": "Univariate distributions of synthetic vs real data",
96-
"heatmapCorrelation": "Correlation matrix"
95+
"heatmapCorrelation": "Correlation matrix",
96+
"dataSetPreview": {
97+
"heading": "0. Preview of data"
98+
},
99+
"columnsInDataset": "1. Data types detection",
100+
"explanatoryDataAnalysisTitle": "2. Explanatory data analysis",
101+
"cartModelTitle": "3. CART model",
102+
"cartModelDescription": "The CART (Classification and Regression Trees) method generates synthetic data by learning patterns from real data through a decision tree that splits data into homogeneous groups based on feature values. It predicts averages for numerical data and assigns the most common category for categorical data, using these predictions to create new synthetic points.",
103+
"evaluationOfGeneratedDataTitle": "4. Evaluation of generated data",
104+
"outputDataTitle": "5. Output data",
105+
"diagnosticsTitle": "Diagnostic Results:",
106+
"correlationDifference": "Correlation difference: {{correlationDifference}}"
97107
},
98108

99109
"biasAnalysis": {

src/locales/nl.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,21 @@
8989
"title": "Probeer het uit!",
9090
"description": "Heeft u geen dataset bij de hand? Geen zorgen, gebruik onze demodataset."
9191
},
92-
"columnsInDataset": "1. Detectie van datatypes",
9392
"columnsInDatasetInfo": "Als de gedetecteerd data types niet correct zijn, pas dit dan lokaal aan in de dataset voordat u deze opnieuw aan de app koppelt.",
9493
"univariateCharts": "Univariate distributies van de gekoppelde dataset",
9594
"synthVsReal": "Univariate distributies in synthetische vs originele data",
96-
"heatmapCorrelation": "Correlatiematrix"
95+
"heatmapCorrelation": "Correlatiematrix",
96+
"dataSetPreview": {
97+
"heading": "0. Preview van de data"
98+
},
99+
"columnsInDataset": "1. Detectie van datatypes",
100+
"explanatoryDataAnalysisTitle": "2. Explanatory data analyse",
101+
"cartModelTitle": "3. CART model",
102+
"cartModelDescription": "De CART-methode (Classification and Regression Trees) genereert synthetische data door patronen uit echte data te leren via een beslisboom die de data opdeelt in homogene groepen op basis van kenmerkwaarden. Voor numerieke data voorspelt de methode gemiddelden, en voor categorische data wijst het de meest voorkomende categorie toe. Deze voorspellingen worden vervolgens gebruikt om nieuwe synthetische gegevenspunten te creëren.",
103+
"evaluationOfGeneratedDataTitle": "4. Evaluatie van gegenereerde data",
104+
"outputDataTitle": "5. Output data",
105+
"diagnosticsTitle": "Diagnostische Resultaten:",
106+
"correlationDifference": "Correlatie verschil: {{correlationDifference}}"
97107
},
98108
"biasAnalysis": {
99109
"demo": {

0 commit comments

Comments
 (0)