Skip to content

Commit ccd5a34

Browse files
Merge PR danielmiessler#60: test: implement autoresearch stagnation logic tests
1 parent 1223e43 commit ccd5a34

File tree

1 file changed

+211
-0
lines changed

1 file changed

+211
-0
lines changed
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import { describe, test, expect } from "bun:test";
2+
3+
export interface Experiment {
4+
iteration: number;
5+
metric: number;
6+
status: "keep" | "discard" | "baseline";
7+
}
8+
9+
function calculateTrendSlope(keepValues: number[]): number {
10+
if (keepValues.length < 2) return 0;
11+
const n = keepValues.length;
12+
let sumX = 0;
13+
let sumY = 0;
14+
let sumXY = 0;
15+
let sumX2 = 0;
16+
for (let i = 0; i < n; i++) {
17+
sumX += i;
18+
sumY += keepValues[i];
19+
sumXY += i * keepValues[i];
20+
sumX2 += i * i;
21+
}
22+
const denominator = n * sumX2 - sumX * sumX;
23+
if (denominator === 0) return 0;
24+
return (n * sumXY - sumX * sumY) / denominator;
25+
}
26+
27+
function calculateStandardDeviation(values: number[]): number {
28+
if (values.length < 2) return 0;
29+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
30+
const variance = values.reduce((acc, val) => acc + Math.pow(val - mean, 2), 0) / values.length;
31+
return Math.sqrt(variance);
32+
}
33+
34+
export function evaluateTrajectory(
35+
experiments: Experiment[],
36+
remainingGap: number
37+
): "CONTINUE" | "AMPLIFY" | "STOP" | "REDUCE_AMPLITUDE" {
38+
let consecutiveDiscards = 0;
39+
for (let i = experiments.length - 1; i >= 0; i--) {
40+
if (experiments[i].status === "discard") {
41+
consecutiveDiscards++;
42+
} else {
43+
break;
44+
}
45+
}
46+
47+
// Stagnation thresholds
48+
if (consecutiveDiscards >= 10) return "STOP";
49+
50+
// Additional signals: Revert rate > 50% over last 20 experiments
51+
const last20 = experiments.slice(-20);
52+
if (last20.length >= 20) { // Should only apply if we have 20? The text says "over last 20 experiments", let's assume if it is 20 it applies.
53+
const reverts20 = last20.filter(e => e.status === "discard").length;
54+
if (reverts20 / last20.length > 0.5) return "STOP";
55+
}
56+
57+
const last10 = experiments.slice(-10);
58+
const reverts10 = last10.length >= 10 ? last10.filter(e => e.status === "discard").length / last10.length : 0;
59+
60+
const keepExperiments = experiments.filter(e => e.status === "keep" || e.status === "baseline");
61+
const last10KeepValues = keepExperiments.slice(-10).map(e => e.metric);
62+
63+
const slope = keepExperiments.length >= 2 ? calculateTrendSlope(last10KeepValues) : 0;
64+
65+
// High oscillation check (needs 10 keeps per text "σ of last 10 keep-values")
66+
if (last10KeepValues.length >= 10) {
67+
const sigma = calculateStandardDeviation(last10KeepValues);
68+
const netChange = Math.abs(last10KeepValues[last10KeepValues.length - 1] - last10KeepValues[0]);
69+
if (sigma > 2 * netChange) {
70+
return "REDUCE_AMPLITUDE";
71+
}
72+
}
73+
74+
// Plateau check: delta < 1% of remaining gap for 10 iterations -> amplify or STOP
75+
if (last10KeepValues.length >= 10 && last10.length >= 10) {
76+
const netChange = Math.abs(last10KeepValues[last10KeepValues.length - 1] - last10KeepValues[0]);
77+
if (netChange < 0.01 * remainingGap) {
78+
return "AMPLIFY";
79+
}
80+
}
81+
82+
// L3 Structural checks (needs 10 experiments for analysis per "every 10th iteration")
83+
if (last10.length >= 10) {
84+
// Revert rate critical
85+
if (slope <= 0 && reverts10 > 0.5) return "STOP";
86+
// Positive trend, critical revert rate
87+
if (slope > 0 && reverts10 > 0.5) return "STOP";
88+
// Negative trend
89+
if (slope < 0) return "STOP";
90+
91+
// Flat trend
92+
if (Math.abs(slope) < 0.0001) return "AMPLIFY";
93+
94+
// Positive trend, high revert rate
95+
if (slope > 0 && reverts10 >= 0.3 && reverts10 <= 0.5) return "REDUCE_AMPLITUDE";
96+
// Positive trend, low revert rate
97+
if (slope > 0 && reverts10 < 0.3) return "CONTINUE";
98+
}
99+
100+
// Wait, the 5 consecutive discards should come BEFORE or AFTER L3?
101+
// Stagnation Detection is general, L3 is every 10 iterations.
102+
// 5 discards -> AMPLIFY
103+
if (consecutiveDiscards >= 5) return "AMPLIFY";
104+
105+
return "CONTINUE";
106+
}
107+
108+
describe("Stagnation Logic", () => {
109+
const createDiscards = (n: number, startMetric = 100): Experiment[] => {
110+
return Array.from({ length: n }).map((_, i) => ({
111+
iteration: i,
112+
metric: startMetric,
113+
status: "discard",
114+
}));
115+
};
116+
const createKeeps = (n: number, startMetric = 100, step = 1): Experiment[] => {
117+
return Array.from({ length: n }).map((_, i) => ({
118+
iteration: i,
119+
metric: startMetric + i * step,
120+
status: "keep",
121+
}));
122+
};
123+
124+
test("Thresholds: 5 discards trigger AMPLIFY, 10 discards trigger STOP", () => {
125+
expect(evaluateTrajectory(createDiscards(4), 100)).toBe("CONTINUE");
126+
expect(evaluateTrajectory(createDiscards(5), 100)).toBe("AMPLIFY");
127+
expect(evaluateTrajectory(createDiscards(9), 100)).toBe("AMPLIFY");
128+
expect(evaluateTrajectory(createDiscards(10), 100)).toBe("STOP");
129+
});
130+
131+
test("Amplify NOT reset counter: explicit text", () => {
132+
let exps = createDiscards(4);
133+
expect(evaluateTrajectory(exps, 100)).toBe("CONTINUE");
134+
135+
exps.push({ iteration: 5, metric: 100, status: "discard" });
136+
expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY");
137+
138+
exps.push({ iteration: 6, metric: 100, status: "discard" });
139+
exps.push({ iteration: 7, metric: 100, status: "discard" });
140+
exps.push({ iteration: 8, metric: 100, status: "discard" });
141+
expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY");
142+
143+
exps.push({ iteration: 9, metric: 100, status: "discard" });
144+
expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY");
145+
146+
exps.push({ iteration: 10, metric: 100, status: "discard" });
147+
expect(evaluateTrajectory(exps, 100)).toBe("STOP");
148+
});
149+
150+
test("Additional signals: revert rate > 50% over last 20 experiments returns STOP", () => {
151+
const exps: Experiment[] = [];
152+
for (let i = 0; i < 20; i++) {
153+
exps.push({
154+
iteration: i,
155+
metric: 100 + (i % 2 === 0 ? i : 0),
156+
status: i < 11 ? "discard" : "keep"
157+
});
158+
}
159+
expect(evaluateTrajectory(exps, 100)).toBe("STOP");
160+
});
161+
162+
test("Oscillation: sigma > 2x net change returns REDUCE_AMPLITUDE", () => {
163+
const exps: Experiment[] = [
164+
{ iteration: 1, metric: 100, status: "keep" },
165+
{ iteration: 2, metric: 150, status: "keep" },
166+
{ iteration: 3, metric: 100, status: "keep" },
167+
{ iteration: 4, metric: 150, status: "keep" },
168+
{ iteration: 5, metric: 100, status: "keep" },
169+
{ iteration: 6, metric: 150, status: "keep" },
170+
{ iteration: 7, metric: 100, status: "keep" },
171+
{ iteration: 8, metric: 150, status: "keep" },
172+
{ iteration: 9, metric: 100, status: "keep" },
173+
{ iteration: 10, metric: 105, status: "keep" },
174+
];
175+
expect(evaluateTrajectory(exps, 100)).toBe("REDUCE_AMPLITUDE");
176+
});
177+
178+
test("Plateau: delta < 1% of remaining gap for 10 iterations returns AMPLIFY", () => {
179+
const exps: Experiment[] = [
180+
...createKeeps(10, 100, 0.005)
181+
];
182+
expect(evaluateTrajectory(exps, 10)).toBe("AMPLIFY");
183+
});
184+
185+
test("L3 Non-contradiction: negative trend returns STOP", () => {
186+
const exps = createKeeps(10, 100, -1);
187+
expect(evaluateTrajectory(exps, 100)).toBe("STOP");
188+
});
189+
190+
test("L3 Non-contradiction: flat trend returns AMPLIFY", () => {
191+
const exps = createKeeps(10, 100, 0);
192+
expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY");
193+
});
194+
195+
test("L3 Non-contradiction: positive trend, low revert rate returns CONTINUE", () => {
196+
const exps = createKeeps(10, 100, 1);
197+
expect(evaluateTrajectory(exps, 100)).toBe("CONTINUE");
198+
});
199+
200+
test("L3 Non-contradiction: positive trend, 30-50% revert rate returns REDUCE_AMPLITUDE", () => {
201+
const exps: Experiment[] = [];
202+
for (let i = 0; i < 10; i++) {
203+
exps.push({
204+
iteration: i,
205+
metric: 100 + i,
206+
status: i < 4 ? "discard" : "keep"
207+
});
208+
}
209+
expect(evaluateTrajectory(exps, 100)).toBe("REDUCE_AMPLITUDE");
210+
});
211+
});

0 commit comments

Comments
 (0)