Skip to content

Commit f902f6e

Browse files
authored
Merge pull request #29 from NGO-Algorithm-Audit/feature/violin-tweaks
Feature/violin tweaks
2 parents 0b7ec9b + dbadfca commit f902f6e

File tree

2 files changed

+68
-29
lines changed

2 files changed

+68
-29
lines changed

src/components/graphs/ViolinChart.tsx

Lines changed: 67 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ interface ViolinChartProps {
1010
}
1111

1212
const margin = { top: 30, right: 50, bottom: 60, left: 80 };
13-
const height = 380 - margin.top - margin.bottom;
13+
const height = 580 - margin.top - margin.bottom;
1414

1515
const ViolinChart = ({
1616
categoricalColumn,
@@ -87,7 +87,7 @@ const ViolinChart = ({
8787
...realData.map(d => +d[numericColumn]),
8888
...syntheticData.map(d => +d[numericColumn]),
8989
]) || 0;
90-
const paddedMaxValue = maxValue + (maxValue - minValue) * 0.1;
90+
const paddedMaxValue = maxValue + (maxValue - minValue) * 0.25;
9191

9292
const yScale = d3
9393
.scaleLinear()
@@ -101,17 +101,51 @@ const ViolinChart = ({
101101

102102
// Function to create violin path
103103
const createViolin = (values: number[], side: 'left' | 'right') => {
104-
// Create kernel density estimation
104+
// Skip if no values
105+
if (values.length === 0) return null;
106+
107+
// Calculate Scott's rule for bandwidth
108+
const std = Math.sqrt(d3.variance(values) || 0);
109+
const bw = Math.pow(4 / (3 * values.length), 1 / 5) * std;
110+
111+
// Extend the range by 2 bandwidths on each side (cut=2)
112+
const minValue = d3.min(values) || 0;
113+
const maxValue = d3.max(values) || 0;
114+
const extension = 2 * bw;
115+
const densityPoints = d3.range(
116+
minValue - extension,
117+
maxValue + extension,
118+
(maxValue - minValue + 2 * extension) / 100
119+
);
120+
121+
// Create kernel density estimation with Gaussian kernel
105122
const kde = kernelDensityEstimator(
106-
kernelEpanechnikov(0.2),
107-
yScale.ticks(50)
123+
v => kernelGaussian(v, bw),
124+
densityPoints
108125
);
109126
const density: [number, number][] = kde(values);
110-
const maxDensity = d3.max(density, d => d[1]) || 0;
127+
128+
// Scale the density values
129+
// const maxDensity = d3.max(density, d => d[1]) || 0;
130+
// const normalizedDensity = density.map(
131+
// d =>
132+
// [d[0], (d[1] / maxDensity) * bandwidth] as [
133+
// number,
134+
// number,
135+
// ]
136+
// );
137+
const totalArea = d3.sum(density, d => d[1]);
138+
const normalizedDensity = density.map(
139+
d =>
140+
[d[0], (d[1] / totalArea) * bandwidth] as [
141+
number,
142+
number,
143+
]
144+
);
111145

112146
const widthScale = d3
113147
.scaleLinear()
114-
.domain([0, maxDensity])
148+
.domain([0, d3.max(normalizedDensity, d => d[1]) || 0])
115149
.range([0, bandwidth]);
116150

117151
const area = d3
@@ -125,9 +159,9 @@ const ViolinChart = ({
125159
return side === 'left' ? 0 : width;
126160
})
127161
.y(d => yScale(d[0]))
128-
.curve(d3.curveBasis);
162+
.curve(d3.curveLinear);
129163

130-
return area(density);
164+
return area(normalizedDensity);
131165
};
132166

133167
// Kernel functions
@@ -140,12 +174,11 @@ const ViolinChart = ({
140174
};
141175
}
142176

143-
function kernelEpanechnikov(k: number) {
144-
return function (v: number) {
145-
return Math.abs((v /= k)) <= 1
146-
? (0.75 * (1 - v * v)) / k
147-
: 0;
148-
};
177+
function kernelGaussian(v: number, bandwidth: number) {
178+
return (
179+
Math.exp(-0.5 * Math.pow(v / bandwidth, 2)) /
180+
(bandwidth * Math.sqrt(2 * Math.PI))
181+
);
149182
}
150183

151184
// Calculate center position for the violin plot
@@ -179,31 +212,37 @@ const ViolinChart = ({
179212
.attr('y2', yScale(q))
180213
.attr('transform', `translate(${centerPos}, 0)`)
181214
.style('stroke', color)
182-
.style('stroke-width', 1)
215+
.style('stroke-width', 2)
183216
.style('stroke-dasharray', '3,3');
184217
});
185218
};
186219

187220
// Draw real data violin (left side)
188221
if (real.length > 0) {
189-
svg.append('path')
190-
.attr('d', createViolin(real, 'left'))
191-
.attr('transform', `translate(${centerPos}, 0)`)
192-
.style('fill', 'steelblue')
193-
.style('opacity', 0.5);
222+
const path = createViolin(real, 'left');
223+
if (path) {
224+
svg.append('path')
225+
.attr('d', path)
226+
.attr('transform', `translate(${centerPos}, 0)`)
227+
.style('fill', 'steelblue')
228+
.style('opacity', 0.5);
194229

195-
drawQuartileLines(real, 'left', 'steelblue');
230+
drawQuartileLines(real, 'left', 'steelblue');
231+
}
196232
}
197233

198234
// Draw synthetic data violin (right side)
199235
if (synthetic.length > 0) {
200-
svg.append('path')
201-
.attr('d', createViolin(synthetic, 'right'))
202-
.attr('transform', `translate(${centerPos}, 0)`)
203-
.style('fill', 'orange')
204-
.style('opacity', 0.5);
236+
const path = createViolin(synthetic, 'right');
237+
if (path) {
238+
svg.append('path')
239+
.attr('d', path)
240+
.attr('transform', `translate(${centerPos}, 0)`)
241+
.style('fill', 'orange')
242+
.style('opacity', 0.5);
205243

206-
drawQuartileLines(synthetic, 'right', 'orange');
244+
drawQuartileLines(synthetic, 'right', 'orange');
245+
}
207246
}
208247
});
209248

src/routes/SyntheticData.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ export default function SyntheticDataGeneration() {
115115
onRun({
116116
isDemo: true,
117117
sdgMethod: 'cart',
118-
samples: 1000,
118+
samples: 5000,
119119
});
120120
}
121121
}, [initialised, data]);

0 commit comments

Comments
 (0)