Skip to content

Commit 069cb55

Browse files
committed
feat: use histogram samples for t-test analysis
Justification: Each sample in the histogram represents a durationPerOp sample calculated by dividing a certain number of iterations of executing the function under test divided by the cumulative time of those runs. Which is the average of the execution time of each execution. opsSec and opsSecPerRun are then an average of the samples, which are themselves averages. Therefore, using opsSecPerRun as a t-test inaccurately applying the calculation to an average of averages, when it is meant to be applied to a set of averages totalling a minimum of 30 samples, with 40 preferable. In other words, it's a histogram entry that represents a valid t-test sample.
1 parent f73ed33 commit 069cb55

File tree

8 files changed

+44
-36
lines changed

8 files changed

+44
-36
lines changed

.DS_Store

6 KB
Binary file not shown.

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -779,14 +779,15 @@ the two samples, which is common in benchmark scenarios.
779779

780780
### Enabling T-Test Mode
781781

782-
Enable t-test mode with `ttest: true`. This automatically sets `repeatSuite=30` to collect enough
783-
independent samples for reliable statistical analysis (per the Central Limit Theorem):
782+
Enable t-test mode with `ttest: true`. Requires 30 independent samples for reliable statistical analysis (per the
783+
Central Limit Theorem):
784784

785785
```js
786786
const { Suite } = require('bench-node');
787787

788788
const suite = new Suite({
789-
ttest: true, // Enables t-test and auto-sets repeatSuite=30
789+
ttest: true, // Enables t-test, which requires 30, preferably 40 samples for statistical significance
790+
minSamples: 40 // sample count is >= minSamples x repeatSuite
790791
});
791792

792793
suite

examples/.DS_Store

6 KB
Binary file not shown.

examples/statistical-significance/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ Enable t-test mode with `ttest: true`:
2020
const { Suite } = require('bench-node');
2121

2222
const suite = new Suite({
23-
ttest: true, // Automatically sets repeatSuite=30
23+
ttest: true,
24+
minSamples: 30, // minSamples x repeatSuite must be > 30
2425
});
2526

2627
suite.add('baseline', { baseline: true }, () => {

examples/statistical-significance/node.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const { Suite } = require('../../lib');
1616
// Enable t-test mode - this automatically sets repeatSuite=30 for all benchmarks
1717
const suite = new Suite({
1818
ttest: true,
19+
minSamples: 30
1920
});
2021

2122
// Baseline: Simple array sum using for loop

lib/index.js

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ const defaultBenchOptions = {
103103
};
104104

105105
// Minimum repeatSuite runs required for reliable t-test results
106-
const MIN_REPEAT_FOR_TTEST = 30;
106+
const MIN_SAMPLES_FOR_TTEST = 30;
107107

108108
function throwIfNoNativesSyntax() {
109109
if (process.execArgv.includes("--allow-natives-syntax") === false) {
@@ -180,8 +180,6 @@ class Suite {
180180
if (options.repeatSuite !== undefined) {
181181
validateNumber(options.repeatSuite, "options.repeatSuite", 1);
182182
repeatSuite = options.repeatSuite;
183-
} else if (this.#ttest) {
184-
repeatSuite = MIN_REPEAT_FOR_TTEST;
185183
}
186184
this.#repeatSuite = repeatSuite;
187185

@@ -231,6 +229,12 @@ class Suite {
231229
throw new Error("There is already a baseline benchmark");
232230
}
233231

232+
if (options.minSamples * options.repeatSuite < MIN_SAMPLES_FOR_TTEST) {
233+
process.emitWarning(
234+
`The benchmark "${name}" may not have enough samples to run t-test analysis. Please set minSamples x repeatSuite >= ${MIN_SAMPLES_FOR_TTEST}`,
235+
);
236+
}
237+
234238
const benchmark = new Benchmark(
235239
name,
236240
fn,

lib/utils/analyze.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ function analyze(results, sorted = true, options = {}) {
4444
}
4545

4646
if (ttest) {
47-
const resultSamples = result.opsSecPerRun;
48-
const baselineSamplesForTest = baselineResult.opsSecPerRun;
47+
const resultSamples = result.histogram?.sampleData ?? [];
48+
const baselineSamplesForTest =
49+
baselineResult.histogram?.sampleData ?? [];
4950

5051
if (
5152
baselineSamplesForTest?.length >= 30 &&

test/ttest.js

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -238,13 +238,13 @@ describe("T-Test Integration with analyze", () => {
238238
assert.strictEqual(testResult.significanceTest, undefined);
239239
});
240240

241-
it("should include significanceTest when ttest is true and opsSecPerRun >= 30", () => {
242-
// Generate 30+ opsSecPerRun samples (from repeatSuite)
243-
const baselineOpsSecPerRun = Array.from(
241+
it("should include significanceTest when ttest is true and samples >= 30", () => {
242+
// Generate 30+ samples
243+
const baselineSampleData = Array.from(
244244
{ length: 30 },
245245
(_, i) => 100 + (i % 3) - 1,
246246
);
247-
const testOpsSecPerRun = Array.from(
247+
const testSampleData = Array.from(
248248
{ length: 30 },
249249
(_, i) => 200 + (i % 3) - 1,
250250
);
@@ -254,12 +254,12 @@ describe("T-Test Integration with analyze", () => {
254254
name: "baseline",
255255
opsSec: 100,
256256
baseline: true,
257-
opsSecPerRun: baselineOpsSecPerRun,
257+
histogram: { sampleData: baselineSampleData },
258258
},
259259
{
260260
name: "test",
261261
opsSec: 200,
262-
opsSecPerRun: testOpsSecPerRun,
262+
histogram: { sampleData: testSampleData },
263263
},
264264
];
265265

@@ -272,7 +272,7 @@ describe("T-Test Integration with analyze", () => {
272272
assert.ok(typeof testResult.significanceTest.confidence === "string");
273273
});
274274

275-
it("should not include significanceTest without opsSecPerRun", () => {
275+
it("should not include significanceTest without sufficient samples", () => {
276276
const results = [
277277
{
278278
name: "baseline",
@@ -288,22 +288,22 @@ describe("T-Test Integration with analyze", () => {
288288
const analyzed = analyze(results, true, { ttest: true });
289289
const testResult = analyzed.find((r) => r.name === "test");
290290

291-
// Should not throw, and significanceTest should not be set (no opsSecPerRun)
291+
// Should not throw, and significanceTest should not be set (no samples)
292292
assert.strictEqual(testResult.significanceTest, undefined);
293293
});
294294

295-
it("should not include significanceTest when opsSecPerRun < 30", () => {
295+
it("should not include significanceTest when samples < 30", () => {
296296
const results = [
297297
{
298298
name: "baseline",
299299
opsSec: 100,
300300
baseline: true,
301-
opsSecPerRun: Array.from({ length: 10 }, () => 100),
301+
histogram: { samples: Array.from({ length: 10 }, () => 100) },
302302
},
303303
{
304304
name: "test",
305305
opsSec: 200,
306-
opsSecPerRun: Array.from({ length: 10 }, () => 200),
306+
histogram: { samples: Array.from({ length: 10 }, () => 200) },
307307
},
308308
];
309309

@@ -315,12 +315,12 @@ describe("T-Test Integration with analyze", () => {
315315
});
316316

317317
it("should detect significant difference between clearly different benchmarks", () => {
318-
// Generate 30+ opsSecPerRun with clearly different means
319-
const baselineOpsSecPerRun = Array.from(
318+
// Generate 30+ samples with clearly different means
319+
const baselineSampleData = Array.from(
320320
{ length: 30 },
321321
(_, i) => 100 + (i % 5) - 2,
322322
);
323-
const fastOpsSecPerRun = Array.from(
323+
const fastSampleData = Array.from(
324324
{ length: 30 },
325325
(_, i) => 200 + (i % 5) - 2,
326326
);
@@ -330,12 +330,12 @@ describe("T-Test Integration with analyze", () => {
330330
name: "baseline",
331331
opsSec: 100,
332332
baseline: true,
333-
opsSecPerRun: baselineOpsSecPerRun,
333+
histogram: { sampleData: baselineSampleData },
334334
},
335335
{
336336
name: "fast",
337337
opsSec: 200,
338-
opsSecPerRun: fastOpsSecPerRun,
338+
histogram: { sampleData: fastSampleData },
339339
},
340340
];
341341

@@ -348,12 +348,12 @@ describe("T-Test Integration with analyze", () => {
348348

349349
it("should not mark as significant when differences are within noise", () => {
350350
// Same benchmark run twice - should have similar results with high variance overlap
351-
// Generate 30+ opsSecPerRun with overlapping distributions
352-
const baselineOpsSecPerRun = Array.from(
351+
// Generate 30+ samples with overlapping distributions
352+
const baselineSampleData = Array.from(
353353
{ length: 30 },
354354
(_, i) => 100 + ((i % 5) - 2) * 2,
355355
);
356-
const similarOpsSecPerRun = Array.from(
356+
const similarSampleData = Array.from(
357357
{ length: 30 },
358358
(_, i) => 101 + ((i % 5) - 2) * 2,
359359
);
@@ -363,12 +363,12 @@ describe("T-Test Integration with analyze", () => {
363363
name: "baseline",
364364
opsSec: 100,
365365
baseline: true,
366-
opsSecPerRun: baselineOpsSecPerRun,
366+
histogram: { sampleData: baselineSampleData },
367367
},
368368
{
369369
name: "similar",
370370
opsSec: 101, // Very close to baseline
371-
opsSecPerRun: similarOpsSecPerRun,
371+
histogram: { sampleData: similarSampleData },
372372
},
373373
];
374374

@@ -384,18 +384,18 @@ describe("Statistical significance requires repeatSuite >= 30", () => {
384384
const { analyze } = require("../lib/utils/analyze");
385385

386386
it("should only compute significance when repeatSuite provides 30+ samples", () => {
387-
// With 30+ opsSecPerRun, significance should be computed
387+
// With 30+ samples, significance should be computed
388388
const results = [
389389
{
390390
name: "baseline",
391391
opsSec: 100,
392392
baseline: true,
393-
opsSecPerRun: Array.from({ length: 30 }, () => 100),
393+
histogram: { sampleData: Array.from({ length: 30 }, () => 100) },
394394
},
395395
{
396396
name: "test",
397397
opsSec: 200,
398-
opsSecPerRun: Array.from({ length: 30 }, () => 200),
398+
histogram: { sampleData: Array.from({ length: 30 }, () => 200) },
399399
},
400400
];
401401

@@ -406,18 +406,18 @@ describe("Statistical significance requires repeatSuite >= 30", () => {
406406
});
407407

408408
it("should not compute significance when repeatSuite < 30", () => {
409-
// With fewer than 30 opsSecPerRun, significance should not be computed
409+
// With fewer than 30 samples, significance should not be computed
410410
const results = [
411411
{
412412
name: "baseline",
413413
opsSec: 100,
414414
baseline: true,
415-
opsSecPerRun: Array.from({ length: 10 }, () => 100),
415+
histogram: { sampleData: Array.from({ length: 10 }, () => 100) },
416416
},
417417
{
418418
name: "test",
419419
opsSec: 200,
420-
opsSecPerRun: Array.from({ length: 10 }, () => 200),
420+
histogram: { sampleData: Array.from({ length: 10 }, () => 200) },
421421
},
422422
];
423423

0 commit comments

Comments
 (0)