Skip to content

Commit 1813145

Browse files
committed
Protect current agentic turn from eviction in tryFit/tryFitStable
Root cause of infinite tool-call loop: tryFit walked backwards from the end of the message array filling the raw budget. As an agentic turn accumulated tool-call steps (each a separate assistant message with the same parentID as the current user message), the cumulative token cost could exceed rawBudget — causing tryFit to set a cutoff that dropped earlier steps from the same turn. The model then saw only the most recent step(s) + the original user request, had no memory of prior work, and re-issued the same tool call — infinite loop. Fix: currentTurnStart() identifies the last user message index. tryFit now: 1. Slices the current turn (last user msg + all following assistants) 2. Computes currentTurnTokens upfront — this budget is always reserved 3. Returns null (escalate) if the current turn alone exceeds rawBudget 4. Fills remaining budget with older messages in the backward scan 5. Marks current-turn messages as strip-protected in tool-output stripping tryFitStable inherits the fix: its cache-hit path includes all messages from the pinned index to end (which includes current-turn messages), and its cache-miss path delegates to tryFit. Added 3 tests covering: current-turn steps always included in compressed window, eviction of older messages to make room, and layer escalation when current turn exceeds the layer's raw budget.
1 parent 94f5037 commit 1813145

File tree

3 files changed

+194
-17
lines changed

3 files changed

+194
-17
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "opencode-lore",
3-
"version": "0.2.4",
3+
"version": "0.2.5",
44
"type": "module",
55
"license": "MIT",
66
"description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

src/gradient.ts

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,23 @@ export function estimateMessages(messages: MessageWithParts[]): number {
926926
return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
927927
}
928928

929+
// Identify the current agentic turn: the last user message plus all subsequent
930+
// assistant messages that share its ID as parentID. These messages form an atomic
931+
// unit — the model must see all of them or it will lose track of its own prior
932+
// tool calls and re-issue them in an infinite loop.
933+
function currentTurnStart(messages: MessageWithParts[]): number {
934+
// Find the last user message
935+
let lastUserIdx = -1;
936+
for (let i = messages.length - 1; i >= 0; i--) {
937+
if (messages[i].info.role === "user") {
938+
lastUserIdx = i;
939+
break;
940+
}
941+
}
942+
if (lastUserIdx === -1) return 0; // no user message — treat all as current turn
943+
return lastUserIdx;
944+
}
945+
929946
function tryFit(input: {
930947
messages: MessageWithParts[];
931948
prefix: MessageWithParts[];
@@ -939,32 +956,49 @@ function tryFit(input: {
939956
if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
940957
return null;
941958

942-
// Walk backwards through messages, accumulating tokens within raw budget
943-
let rawTokens = 0;
944-
let cutoff = input.messages.length;
959+
// Identify the current turn (last user message + all following assistant messages).
960+
// These are always included — they must never be evicted. If they alone exceed the
961+
// raw budget, escalate to the next layer (which strips tool outputs to reduce size).
962+
const turnStart = currentTurnStart(input.messages);
963+
const currentTurn = input.messages.slice(turnStart);
964+
const currentTurnTokens = currentTurn.reduce((s, m) => s + estimateMessage(m), 0);
965+
966+
if (currentTurnTokens > input.rawBudget) {
967+
// Current turn alone exceeds budget — can't fit even with everything else dropped.
968+
// Signal failure so the caller escalates to the next layer (tool-output stripping).
969+
return null;
970+
}
971+
972+
// Walk backwards through older messages (before the current turn),
973+
// filling the remaining budget after reserving space for the current turn.
974+
const olderMessages = input.messages.slice(0, turnStart);
975+
const remainingBudget = input.rawBudget - currentTurnTokens;
976+
let olderTokens = 0;
977+
let cutoff = olderMessages.length; // default: include none of the older messages
945978
const protectedTurns = input.protectedTurns ?? 0;
946-
let turns = 0;
947979

948-
for (let i = input.messages.length - 1; i >= 0; i--) {
949-
const msg = input.messages[i];
950-
if (msg.info.role === "user") turns++;
980+
for (let i = olderMessages.length - 1; i >= 0; i--) {
981+
const msg = olderMessages[i];
951982
const tokens = estimateMessage(msg);
952-
if (rawTokens + tokens > input.rawBudget) {
983+
if (olderTokens + tokens > remainingBudget) {
953984
cutoff = i + 1;
954985
break;
955986
}
956-
rawTokens += tokens;
987+
olderTokens += tokens;
957988
if (i === 0) cutoff = 0;
958989
}
959990

960-
const raw = input.messages.slice(cutoff);
961-
// Must keep at least 1 raw message — otherwise this layer fails
962-
if (!raw.length) return null;
991+
const rawMessages = [...olderMessages.slice(cutoff), ...currentTurn];
992+
const rawTokens = olderTokens + currentTurnTokens;
963993

964-
// Apply system-reminder stripping + optional tool output stripping
965-
const processed = raw.map((msg, idx) => {
966-
const fromEnd = raw.length - idx;
994+
// Apply system-reminder stripping + optional tool output stripping.
995+
// The current turn (end of rawMessages) is always "protected" — never stripped.
996+
const currentTurnSet = new Set(currentTurn.map((m) => m.info.id));
997+
const processed = rawMessages.map((msg, idx) => {
998+
const fromEnd = rawMessages.length - idx;
999+
const isCurrentTurn = currentTurnSet.has(msg.info.id);
9671000
const isProtected =
1001+
isCurrentTurn ||
9681002
input.strip === "none" ||
9691003
(input.strip === "old-tools" && fromEnd <= protectedTurns * 2);
9701004
const parts = isProtected

test/gradient.test.ts

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { describe, test, expect, beforeAll, afterAll } from "bun:test";
1+
import { describe, test, expect, beforeAll, beforeEach, afterAll } from "bun:test";
22
import { db, close, ensureProject } from "../src/db";
33
import {
44
transform,
@@ -471,3 +471,146 @@ describe("gradient — exact token tracking (proactive layer 0)", () => {
471471
expect(result.layer).toBeGreaterThanOrEqual(1);
472472
});
473473
});
474+
475+
// Helper: make an assistant message that is a "sibling step" in an agentic
476+
// tool-call loop — same parentID as the last user message.
477+
function makeStep(
478+
id: string,
479+
parentUserID: string,
480+
text: string,
481+
sessionID = "grad-sess",
482+
): { info: Message; parts: Part[] } {
483+
const info: Message = {
484+
id,
485+
sessionID,
486+
role: "assistant",
487+
time: { created: Date.now() },
488+
parentID: parentUserID,
489+
modelID: "claude-sonnet-4-20250514",
490+
providerID: "anthropic",
491+
mode: "build",
492+
path: { cwd: "/test", root: "/test" },
493+
cost: 0,
494+
tokens: {
495+
input: 100,
496+
output: 50,
497+
reasoning: 0,
498+
cache: { read: 0, write: 0 },
499+
},
500+
};
501+
return {
502+
info,
503+
parts: [
504+
{
505+
id: `part-${id}`,
506+
sessionID,
507+
messageID: id,
508+
type: "text",
509+
text,
510+
time: { start: Date.now(), end: Date.now() },
511+
},
512+
],
513+
};
514+
}
515+
516+
describe("gradient — current turn protection (agentic tool-call loop)", () => {
517+
const SESSION = "turn-protect-sess";
518+
519+
beforeEach(() => {
520+
resetCalibration();
521+
resetPrefixCache();
522+
resetRawWindowCache();
523+
// Small context to make overflow happen with fewer messages
524+
setModelLimits({ context: 5_000, output: 1_000 });
525+
calibrate(0, 0); // zero overhead
526+
ensureProject(PROJECT);
527+
});
528+
529+
test("all current-turn agentic steps are included in the compressed window", () => {
530+
// context=10000, output=2000, maxInput=8000, rawBudget ≈ 5600
531+
// Old messages: 40 × 600 chars ≈ 6000 tokens — exceeds rawBudget alone
532+
const oldMsgs = Array.from({ length: 40 }, (_, i) =>
533+
makeMsg(`old-${i}`, i % 2 === 0 ? "user" : "assistant", "X".repeat(600), SESSION),
534+
);
535+
// Current turn: user + 4 agentic steps × 400 chars ≈ 450 tokens — must all be kept
536+
const currentUser = makeMsg("cur-user", "user", "do the thing", SESSION);
537+
const steps = Array.from({ length: 4 }, (_, i) =>
538+
makeStep(`step-${i}`, "cur-user", "tool result " + "Y".repeat(380), SESSION),
539+
);
540+
const messages = [...oldMsgs, currentUser, ...steps];
541+
542+
const result = transform({ messages, projectPath: PROJECT, sessionID: SESSION });
543+
544+
// Should be in gradient mode (too many messages to fit raw)
545+
expect(result.layer).toBeGreaterThanOrEqual(1);
546+
547+
// The current user message must be in the window
548+
const ids = result.messages.map((m) => m.info.id);
549+
expect(ids).toContain("cur-user");
550+
551+
// All 4 steps must be in the window — none dropped
552+
for (let i = 0; i < 4; i++) {
553+
expect(ids).toContain(`step-${i}`);
554+
}
555+
});
556+
557+
test("current turn steps are not evicted even when budget is tight", () => {
558+
// context=10000, output=2000, maxInput=8000, rawBudget ≈ 5600
559+
// Old messages: 50 × 600 chars ≈ 7500 tokens — way over budget alone
560+
// Current turn: user + 8 steps × 400 chars ≈ 850 tokens — must all be kept
561+
const oldMsgs = Array.from({ length: 50 }, (_, i) =>
562+
makeMsg(`tight-old-${i}`, i % 2 === 0 ? "user" : "assistant", "Z".repeat(600), SESSION),
563+
);
564+
const currentUser = makeMsg("tight-user", "user", "go", SESSION);
565+
const steps = Array.from({ length: 8 }, (_, i) =>
566+
makeStep(`tight-step-${i}`, "tight-user", "R".repeat(400), SESSION),
567+
);
568+
const messages = [...oldMsgs, currentUser, ...steps];
569+
570+
const result = transform({ messages, projectPath: PROJECT, sessionID: SESSION });
571+
expect(result.layer).toBeGreaterThanOrEqual(1);
572+
573+
const ids = result.messages.map((m) => m.info.id);
574+
// All 8 steps must be present
575+
for (let i = 0; i < 8; i++) {
576+
expect(ids).toContain(`tight-step-${i}`);
577+
}
578+
// Old messages should be partially evicted (some dropped to make room)
579+
const oldCount = ids.filter((id) => id.startsWith("tight-old-")).length;
580+
const totalOld = 50;
581+
expect(oldCount).toBeLessThan(totalOld);
582+
});
583+
584+
test("layer escalates when current turn alone exceeds raw budget", () => {
585+
// Current turn is massive — 8 steps × 2000 chars each ≈ 4000 tokens
586+
// rawBudget at layer 1 ≈ 5600 tokens — the current turn just fits,
587+
// but with layer 2's tighter budget it should escalate.
588+
// Use a tiny context to make the math work.
589+
setModelLimits({ context: 3_000, output: 500 });
590+
calibrate(0, 0);
591+
592+
const currentUser = makeMsg("huge-user", "user", "massive task", SESSION);
593+
// ~800 chars each ≈ 200 tokens per step, 8 steps = ~1600 tokens
594+
// rawBudget at layer 1 ≈ (3000-500) * 0.7 ≈ 1750 tokens → fits
595+
// rawBudget at layer 2 ≈ (3000-500) * 0.5 ≈ 1250 tokens → escalates
596+
const steps = Array.from({ length: 8 }, (_, i) =>
597+
makeStep(`huge-step-${i}`, "huge-user", "W".repeat(500), SESSION),
598+
);
599+
// Fill with old messages to force gradient mode
600+
const oldMsgs = Array.from({ length: 20 }, (_, i) =>
601+
makeMsg(`huge-old-${i}`, i % 2 === 0 ? "user" : "assistant", "V".repeat(200), SESSION),
602+
);
603+
const messages = [...oldMsgs, currentUser, ...steps];
604+
605+
const result = transform({ messages, projectPath: PROJECT, sessionID: SESSION });
606+
607+
// Must be in gradient mode
608+
expect(result.layer).toBeGreaterThanOrEqual(1);
609+
610+
// Current turn steps must always be present regardless of layer
611+
const ids = result.messages.map((m) => m.info.id);
612+
for (let i = 0; i < 8; i++) {
613+
expect(ids).toContain(`huge-step-${i}`);
614+
}
615+
});
616+
});

0 commit comments

Comments
 (0)