feat: report significant: false when t-test == true but sample size is too small.

jdmarshall · jdmarshall · commit a0b4160dc898 · 2026-01-04T20:12:36.000-08:00
This will help me sort out inconclusive tests without missing misconfigured
ones.

This is necessitated by the changes in the previous commit that allow
for failure instead of forcing success.
diff --git a/lib/utils/analyze.js b/lib/utils/analyze.js
@@ -63,6 +63,10 @@ function analyze(results, sorted = true, options = {}) {
 						confidence: ttestResult.confidence,
 						stars: ttestResult.stars,
 					};
+				} else {
+					result.significanceTest = {
+						significant: false
+					}
 				}
 			}
 		}
diff --git a/test/ttest.js b/test/ttest.js
@@ -272,7 +272,7 @@ describe("T-Test Integration with analyze", () => {
 		assert.ok(typeof testResult.significanceTest.confidence === "string");
 	});
 
-	it("should not include significanceTest without sufficient samples", () => {
+	it("should mark significanceTest as failed without samples", () => {
 		const results = [
 			{
 				name: "baseline",
@@ -288,8 +288,7 @@ describe("T-Test Integration with analyze", () => {
 		const analyzed = analyze(results, true, { ttest: true });
 		const testResult = analyzed.find((r) => r.name === "test");
 
-		// Should not throw, and significanceTest should not be set (no samples)
-		assert.strictEqual(testResult.significanceTest, undefined);
+		assert.deepEqual(testResult.significanceTest, { significant: false});
 	});
 
 	it("should not include significanceTest when samples < 30", () => {
@@ -310,8 +309,7 @@ describe("T-Test Integration with analyze", () => {
 		const analyzed = analyze(results, true, { ttest: true });
 		const testResult = analyzed.find((r) => r.name === "test");
 
-		// Should not throw, and significanceTest should not be set (not enough samples)
-		assert.strictEqual(testResult.significanceTest, undefined);
+		assert.deepEqual(testResult.significanceTest, { significant: false});
 	});
 
 	it("should detect significant difference between clearly different benchmarks", () => {
@@ -424,6 +422,6 @@ describe("Statistical significance requires repeatSuite >= 30", () => {
 		const analyzed = analyze(results, true, { ttest: true });
 		const testResult = analyzed.find((r) => r.name === "test");
 
-		assert.strictEqual(testResult.significanceTest, undefined);
+		assert.deepEqual(testResult.significanceTest, { significant: false});
 	});
 });

Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,10 @@ function analyze(results, sorted = true, options = {}) {`
`63`	`63`	`confidence: ttestResult.confidence,`
`64`	`64`	`stars: ttestResult.stars,`
`65`	`65`	`};`
	`66`	`+ } else {`
	`67`	`+ result.significanceTest = {`
	`68`	`+ significant: false`
	`69`	`+ }`
`66`	`70`	`}`
`67`	`71`	`}`
`68`	`72`	`}`