|
| 1 | +import { describe, test, expect } from "bun:test"; |
| 2 | + |
| 3 | +export interface Experiment { |
| 4 | + iteration: number; |
| 5 | + metric: number; |
| 6 | + status: "keep" | "discard" | "baseline"; |
| 7 | +} |
| 8 | + |
| 9 | +function calculateTrendSlope(keepValues: number[]): number { |
| 10 | + if (keepValues.length < 2) return 0; |
| 11 | + const n = keepValues.length; |
| 12 | + let sumX = 0; |
| 13 | + let sumY = 0; |
| 14 | + let sumXY = 0; |
| 15 | + let sumX2 = 0; |
| 16 | + for (let i = 0; i < n; i++) { |
| 17 | + sumX += i; |
| 18 | + sumY += keepValues[i]; |
| 19 | + sumXY += i * keepValues[i]; |
| 20 | + sumX2 += i * i; |
| 21 | + } |
| 22 | + const denominator = n * sumX2 - sumX * sumX; |
| 23 | + if (denominator === 0) return 0; |
| 24 | + return (n * sumXY - sumX * sumY) / denominator; |
| 25 | +} |
| 26 | + |
| 27 | +function calculateStandardDeviation(values: number[]): number { |
| 28 | + if (values.length < 2) return 0; |
| 29 | + const mean = values.reduce((a, b) => a + b, 0) / values.length; |
| 30 | + const variance = values.reduce((acc, val) => acc + Math.pow(val - mean, 2), 0) / values.length; |
| 31 | + return Math.sqrt(variance); |
| 32 | +} |
| 33 | + |
| 34 | +export function evaluateTrajectory( |
| 35 | + experiments: Experiment[], |
| 36 | + remainingGap: number |
| 37 | +): "CONTINUE" | "AMPLIFY" | "STOP" | "REDUCE_AMPLITUDE" { |
| 38 | + let consecutiveDiscards = 0; |
| 39 | + for (let i = experiments.length - 1; i >= 0; i--) { |
| 40 | + if (experiments[i].status === "discard") { |
| 41 | + consecutiveDiscards++; |
| 42 | + } else { |
| 43 | + break; |
| 44 | + } |
| 45 | + } |
| 46 | + |
| 47 | + // Stagnation thresholds |
| 48 | + if (consecutiveDiscards >= 10) return "STOP"; |
| 49 | + |
| 50 | + // Additional signals: Revert rate > 50% over last 20 experiments |
| 51 | + const last20 = experiments.slice(-20); |
| 52 | + if (last20.length >= 20) { // Should only apply if we have 20? The text says "over last 20 experiments", let's assume if it is 20 it applies. |
| 53 | + const reverts20 = last20.filter(e => e.status === "discard").length; |
| 54 | + if (reverts20 / last20.length > 0.5) return "STOP"; |
| 55 | + } |
| 56 | + |
| 57 | + const last10 = experiments.slice(-10); |
| 58 | + const reverts10 = last10.length >= 10 ? last10.filter(e => e.status === "discard").length / last10.length : 0; |
| 59 | + |
| 60 | + const keepExperiments = experiments.filter(e => e.status === "keep" || e.status === "baseline"); |
| 61 | + const last10KeepValues = keepExperiments.slice(-10).map(e => e.metric); |
| 62 | + |
| 63 | + const slope = keepExperiments.length >= 2 ? calculateTrendSlope(last10KeepValues) : 0; |
| 64 | + |
| 65 | + // High oscillation check (needs 10 keeps per text "σ of last 10 keep-values") |
| 66 | + if (last10KeepValues.length >= 10) { |
| 67 | + const sigma = calculateStandardDeviation(last10KeepValues); |
| 68 | + const netChange = Math.abs(last10KeepValues[last10KeepValues.length - 1] - last10KeepValues[0]); |
| 69 | + if (sigma > 2 * netChange) { |
| 70 | + return "REDUCE_AMPLITUDE"; |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + // Plateau check: delta < 1% of remaining gap for 10 iterations -> amplify or STOP |
| 75 | + if (last10KeepValues.length >= 10 && last10.length >= 10) { |
| 76 | + const netChange = Math.abs(last10KeepValues[last10KeepValues.length - 1] - last10KeepValues[0]); |
| 77 | + if (netChange < 0.01 * remainingGap) { |
| 78 | + return "AMPLIFY"; |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + // L3 Structural checks (needs 10 experiments for analysis per "every 10th iteration") |
| 83 | + if (last10.length >= 10) { |
| 84 | + // Revert rate critical |
| 85 | + if (slope <= 0 && reverts10 > 0.5) return "STOP"; |
| 86 | + // Positive trend, critical revert rate |
| 87 | + if (slope > 0 && reverts10 > 0.5) return "STOP"; |
| 88 | + // Negative trend |
| 89 | + if (slope < 0) return "STOP"; |
| 90 | + |
| 91 | + // Flat trend |
| 92 | + if (Math.abs(slope) < 0.0001) return "AMPLIFY"; |
| 93 | + |
| 94 | + // Positive trend, high revert rate |
| 95 | + if (slope > 0 && reverts10 >= 0.3 && reverts10 <= 0.5) return "REDUCE_AMPLITUDE"; |
| 96 | + // Positive trend, low revert rate |
| 97 | + if (slope > 0 && reverts10 < 0.3) return "CONTINUE"; |
| 98 | + } |
| 99 | + |
| 100 | + // Wait, the 5 consecutive discards should come BEFORE or AFTER L3? |
| 101 | + // Stagnation Detection is general, L3 is every 10 iterations. |
| 102 | + // 5 discards -> AMPLIFY |
| 103 | + if (consecutiveDiscards >= 5) return "AMPLIFY"; |
| 104 | + |
| 105 | + return "CONTINUE"; |
| 106 | +} |
| 107 | + |
| 108 | +describe("Stagnation Logic", () => { |
| 109 | + const createDiscards = (n: number, startMetric = 100): Experiment[] => { |
| 110 | + return Array.from({ length: n }).map((_, i) => ({ |
| 111 | + iteration: i, |
| 112 | + metric: startMetric, |
| 113 | + status: "discard", |
| 114 | + })); |
| 115 | + }; |
| 116 | + const createKeeps = (n: number, startMetric = 100, step = 1): Experiment[] => { |
| 117 | + return Array.from({ length: n }).map((_, i) => ({ |
| 118 | + iteration: i, |
| 119 | + metric: startMetric + i * step, |
| 120 | + status: "keep", |
| 121 | + })); |
| 122 | + }; |
| 123 | + |
| 124 | + test("Thresholds: 5 discards trigger AMPLIFY, 10 discards trigger STOP", () => { |
| 125 | + expect(evaluateTrajectory(createDiscards(4), 100)).toBe("CONTINUE"); |
| 126 | + expect(evaluateTrajectory(createDiscards(5), 100)).toBe("AMPLIFY"); |
| 127 | + expect(evaluateTrajectory(createDiscards(9), 100)).toBe("AMPLIFY"); |
| 128 | + expect(evaluateTrajectory(createDiscards(10), 100)).toBe("STOP"); |
| 129 | + }); |
| 130 | + |
| 131 | + test("Amplify NOT reset counter: explicit text", () => { |
| 132 | + let exps = createDiscards(4); |
| 133 | + expect(evaluateTrajectory(exps, 100)).toBe("CONTINUE"); |
| 134 | + |
| 135 | + exps.push({ iteration: 5, metric: 100, status: "discard" }); |
| 136 | + expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY"); |
| 137 | + |
| 138 | + exps.push({ iteration: 6, metric: 100, status: "discard" }); |
| 139 | + exps.push({ iteration: 7, metric: 100, status: "discard" }); |
| 140 | + exps.push({ iteration: 8, metric: 100, status: "discard" }); |
| 141 | + expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY"); |
| 142 | + |
| 143 | + exps.push({ iteration: 9, metric: 100, status: "discard" }); |
| 144 | + expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY"); |
| 145 | + |
| 146 | + exps.push({ iteration: 10, metric: 100, status: "discard" }); |
| 147 | + expect(evaluateTrajectory(exps, 100)).toBe("STOP"); |
| 148 | + }); |
| 149 | + |
| 150 | + test("Additional signals: revert rate > 50% over last 20 experiments returns STOP", () => { |
| 151 | + const exps: Experiment[] = []; |
| 152 | + for (let i = 0; i < 20; i++) { |
| 153 | + exps.push({ |
| 154 | + iteration: i, |
| 155 | + metric: 100 + (i % 2 === 0 ? i : 0), |
| 156 | + status: i < 11 ? "discard" : "keep" |
| 157 | + }); |
| 158 | + } |
| 159 | + expect(evaluateTrajectory(exps, 100)).toBe("STOP"); |
| 160 | + }); |
| 161 | + |
| 162 | + test("Oscillation: sigma > 2x net change returns REDUCE_AMPLITUDE", () => { |
| 163 | + const exps: Experiment[] = [ |
| 164 | + { iteration: 1, metric: 100, status: "keep" }, |
| 165 | + { iteration: 2, metric: 150, status: "keep" }, |
| 166 | + { iteration: 3, metric: 100, status: "keep" }, |
| 167 | + { iteration: 4, metric: 150, status: "keep" }, |
| 168 | + { iteration: 5, metric: 100, status: "keep" }, |
| 169 | + { iteration: 6, metric: 150, status: "keep" }, |
| 170 | + { iteration: 7, metric: 100, status: "keep" }, |
| 171 | + { iteration: 8, metric: 150, status: "keep" }, |
| 172 | + { iteration: 9, metric: 100, status: "keep" }, |
| 173 | + { iteration: 10, metric: 105, status: "keep" }, |
| 174 | + ]; |
| 175 | + expect(evaluateTrajectory(exps, 100)).toBe("REDUCE_AMPLITUDE"); |
| 176 | + }); |
| 177 | + |
| 178 | + test("Plateau: delta < 1% of remaining gap for 10 iterations returns AMPLIFY", () => { |
| 179 | + const exps: Experiment[] = [ |
| 180 | + ...createKeeps(10, 100, 0.005) |
| 181 | + ]; |
| 182 | + expect(evaluateTrajectory(exps, 10)).toBe("AMPLIFY"); |
| 183 | + }); |
| 184 | + |
| 185 | + test("L3 Non-contradiction: negative trend returns STOP", () => { |
| 186 | + const exps = createKeeps(10, 100, -1); |
| 187 | + expect(evaluateTrajectory(exps, 100)).toBe("STOP"); |
| 188 | + }); |
| 189 | + |
| 190 | + test("L3 Non-contradiction: flat trend returns AMPLIFY", () => { |
| 191 | + const exps = createKeeps(10, 100, 0); |
| 192 | + expect(evaluateTrajectory(exps, 100)).toBe("AMPLIFY"); |
| 193 | + }); |
| 194 | + |
| 195 | + test("L3 Non-contradiction: positive trend, low revert rate returns CONTINUE", () => { |
| 196 | + const exps = createKeeps(10, 100, 1); |
| 197 | + expect(evaluateTrajectory(exps, 100)).toBe("CONTINUE"); |
| 198 | + }); |
| 199 | + |
| 200 | + test("L3 Non-contradiction: positive trend, 30-50% revert rate returns REDUCE_AMPLITUDE", () => { |
| 201 | + const exps: Experiment[] = []; |
| 202 | + for (let i = 0; i < 10; i++) { |
| 203 | + exps.push({ |
| 204 | + iteration: i, |
| 205 | + metric: 100 + i, |
| 206 | + status: i < 4 ? "discard" : "keep" |
| 207 | + }); |
| 208 | + } |
| 209 | + expect(evaluateTrajectory(exps, 100)).toBe("REDUCE_AMPLITUDE"); |
| 210 | + }); |
| 211 | +}); |
0 commit comments