Protect current agentic turn from eviction in tryFit/tryFitStable

BYK · BYK · commit 18131458fa93 · 2026-02-25T07:14:58.000Z
Root cause of infinite tool-call loop: tryFit walked backwards from the
end of the message array filling the raw budget. As an agentic turn
accumulated tool-call steps (each a separate assistant message with the
same parentID as the current user message), the cumulative token cost
could exceed rawBudget — causing tryFit to set a cutoff that dropped
earlier steps from the same turn. The model then saw only the most recent
step(s) + the original user request, had no memory of prior work, and
re-issued the same tool call — infinite loop.

Fix: currentTurnStart() identifies the last user message index. tryFit
now:
1. Slices the current turn (last user msg + all following assistants)
2. Computes currentTurnTokens upfront — this budget is always reserved
3. Returns null (escalate) if the current turn alone exceeds rawBudget
4. Fills remaining budget with older messages in the backward scan
5. Marks current-turn messages as strip-protected in tool-output stripping

tryFitStable inherits the fix: its cache-hit path includes all messages
from the pinned index to end (which includes current-turn messages), and
its cache-miss path delegates to tryFit.

Added 3 tests covering: current-turn steps always included in compressed
window, eviction of older messages to make room, and layer escalation
when current turn exceeds the layer's raw budget.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.2.4",
+  "version": "0.2.5",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
diff --git a/src/gradient.ts b/src/gradient.ts
@@ -926,6 +926,23 @@ export function estimateMessages(messages: MessageWithParts[]): number {
   return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
 }
 
+// Identify the current agentic turn: the last user message plus all subsequent
+// assistant messages that share its ID as parentID. These messages form an atomic
+// unit — the model must see all of them or it will lose track of its own prior
+// tool calls and re-issue them in an infinite loop.
+function currentTurnStart(messages: MessageWithParts[]): number {
+  // Find the last user message
+  let lastUserIdx = -1;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].info.role === "user") {
+      lastUserIdx = i;
+      break;
+    }
+  }
+  if (lastUserIdx === -1) return 0; // no user message — treat all as current turn
+  return lastUserIdx;
+}
+
 function tryFit(input: {
   messages: MessageWithParts[];
   prefix: MessageWithParts[];
@@ -939,32 +956,49 @@ function tryFit(input: {
   if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
     return null;
 
-  // Walk backwards through messages, accumulating tokens within raw budget
-  let rawTokens = 0;
-  let cutoff = input.messages.length;
+  // Identify the current turn (last user message + all following assistant messages).
+  // These are always included — they must never be evicted. If they alone exceed the
+  // raw budget, escalate to the next layer (which strips tool outputs to reduce size).
+  const turnStart = currentTurnStart(input.messages);
+  const currentTurn = input.messages.slice(turnStart);
+  const currentTurnTokens = currentTurn.reduce((s, m) => s + estimateMessage(m), 0);
+
+  if (currentTurnTokens > input.rawBudget) {
+    // Current turn alone exceeds budget — can't fit even with everything else dropped.
+    // Signal failure so the caller escalates to the next layer (tool-output stripping).
+    return null;
+  }
+
+  // Walk backwards through older messages (before the current turn),
+  // filling the remaining budget after reserving space for the current turn.
+  const olderMessages = input.messages.slice(0, turnStart);
+  const remainingBudget = input.rawBudget - currentTurnTokens;
+  let olderTokens = 0;
+  let cutoff = olderMessages.length; // default: include none of the older messages
   const protectedTurns = input.protectedTurns ?? 0;
-  let turns = 0;
 
-  for (let i = input.messages.length - 1; i >= 0; i--) {
-    const msg = input.messages[i];
-    if (msg.info.role === "user") turns++;
+  for (let i = olderMessages.length - 1; i >= 0; i--) {
+    const msg = olderMessages[i];
     const tokens = estimateMessage(msg);
-    if (rawTokens + tokens > input.rawBudget) {
+    if (olderTokens + tokens > remainingBudget) {
       cutoff = i + 1;
       break;
     }
-    rawTokens += tokens;
+    olderTokens += tokens;
     if (i === 0) cutoff = 0;
   }
 
-  const raw = input.messages.slice(cutoff);
-  // Must keep at least 1 raw message — otherwise this layer fails
-  if (!raw.length) return null;
+  const rawMessages = [...olderMessages.slice(cutoff), ...currentTurn];
+  const rawTokens = olderTokens + currentTurnTokens;
 
-  // Apply system-reminder stripping + optional tool output stripping
-  const processed = raw.map((msg, idx) => {
-    const fromEnd = raw.length - idx;
+  // Apply system-reminder stripping + optional tool output stripping.
+  // The current turn (end of rawMessages) is always "protected" — never stripped.
+  const currentTurnSet = new Set(currentTurn.map((m) => m.info.id));
+  const processed = rawMessages.map((msg, idx) => {
+    const fromEnd = rawMessages.length - idx;
+    const isCurrentTurn = currentTurnSet.has(msg.info.id);
     const isProtected =
+      isCurrentTurn ||
       input.strip === "none" ||
       (input.strip === "old-tools" && fromEnd <= protectedTurns * 2);
     const parts = isProtected
diff --git a/test/gradient.test.ts b/test/gradient.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, beforeAll, afterAll } from "bun:test";
+import { describe, test, expect, beforeAll, beforeEach, afterAll } from "bun:test";
 import { db, close, ensureProject } from "../src/db";
 import {
   transform,
@@ -471,3 +471,146 @@ describe("gradient — exact token tracking (proactive layer 0)", () => {
     expect(result.layer).toBeGreaterThanOrEqual(1);
   });
 });
+
+// Helper: make an assistant message that is a "sibling step" in an agentic
+// tool-call loop — same parentID as the last user message.
+function makeStep(
+  id: string,
+  parentUserID: string,
+  text: string,
+  sessionID = "grad-sess",
+): { info: Message; parts: Part[] } {
+  const info: Message = {
+    id,
+    sessionID,
+    role: "assistant",
+    time: { created: Date.now() },
+    parentID: parentUserID,
+    modelID: "claude-sonnet-4-20250514",
+    providerID: "anthropic",
+    mode: "build",
+    path: { cwd: "/test", root: "/test" },
+    cost: 0,
+    tokens: {
+      input: 100,
+      output: 50,
+      reasoning: 0,
+      cache: { read: 0, write: 0 },
+    },
+  };
+  return {
+    info,
+    parts: [
+      {
+        id: `part-${id}`,
+        sessionID,
+        messageID: id,
+        type: "text",
+        text,
+        time: { start: Date.now(), end: Date.now() },
+      },
+    ],
+  };
+}
+
+describe("gradient — current turn protection (agentic tool-call loop)", () => {
+  const SESSION = "turn-protect-sess";
+
+  beforeEach(() => {
+    resetCalibration();
+    resetPrefixCache();
+    resetRawWindowCache();
+    // Small context to make overflow happen with fewer messages
+    setModelLimits({ context: 5_000, output: 1_000 });
+    calibrate(0, 0); // zero overhead
+    ensureProject(PROJECT);
+  });
+
+  test("all current-turn agentic steps are included in the compressed window", () => {
+    // context=10000, output=2000, maxInput=8000, rawBudget ≈ 5600
+    // Old messages: 40 × 600 chars ≈ 6000 tokens — exceeds rawBudget alone
+    const oldMsgs = Array.from({ length: 40 }, (_, i) =>
+      makeMsg(`old-${i}`, i % 2 === 0 ? "user" : "assistant", "X".repeat(600), SESSION),
+    );
+    // Current turn: user + 4 agentic steps × 400 chars ≈ 450 tokens — must all be kept
+    const currentUser = makeMsg("cur-user", "user", "do the thing", SESSION);
+    const steps = Array.from({ length: 4 }, (_, i) =>
+      makeStep(`step-${i}`, "cur-user", "tool result " + "Y".repeat(380), SESSION),
+    );
+    const messages = [...oldMsgs, currentUser, ...steps];
+
+    const result = transform({ messages, projectPath: PROJECT, sessionID: SESSION });
+
+    // Should be in gradient mode (too many messages to fit raw)
+    expect(result.layer).toBeGreaterThanOrEqual(1);
+
+    // The current user message must be in the window
+    const ids = result.messages.map((m) => m.info.id);
+    expect(ids).toContain("cur-user");
+
+    // All 4 steps must be in the window — none dropped
+    for (let i = 0; i < 4; i++) {
+      expect(ids).toContain(`step-${i}`);
+    }
+  });
+
+  test("current turn steps are not evicted even when budget is tight", () => {
+    // context=10000, output=2000, maxInput=8000, rawBudget ≈ 5600
+    // Old messages: 50 × 600 chars ≈ 7500 tokens — way over budget alone
+    // Current turn: user + 8 steps × 400 chars ≈ 850 tokens — must all be kept
+    const oldMsgs = Array.from({ length: 50 }, (_, i) =>
+      makeMsg(`tight-old-${i}`, i % 2 === 0 ? "user" : "assistant", "Z".repeat(600), SESSION),
+    );
+    const currentUser = makeMsg("tight-user", "user", "go", SESSION);
+    const steps = Array.from({ length: 8 }, (_, i) =>
+      makeStep(`tight-step-${i}`, "tight-user", "R".repeat(400), SESSION),
+    );
+    const messages = [...oldMsgs, currentUser, ...steps];
+
+    const result = transform({ messages, projectPath: PROJECT, sessionID: SESSION });
+    expect(result.layer).toBeGreaterThanOrEqual(1);
+
+    const ids = result.messages.map((m) => m.info.id);
+    // All 8 steps must be present
+    for (let i = 0; i < 8; i++) {
+      expect(ids).toContain(`tight-step-${i}`);
+    }
+    // Old messages should be partially evicted (some dropped to make room)
+    const oldCount = ids.filter((id) => id.startsWith("tight-old-")).length;
+    const totalOld = 50;
+    expect(oldCount).toBeLessThan(totalOld);
+  });
+
+  test("layer escalates when current turn alone exceeds raw budget", () => {
+    // Current turn is massive — 8 steps × 2000 chars each ≈ 4000 tokens
+    // rawBudget at layer 1 ≈ 5600 tokens — the current turn just fits,
+    // but with layer 2's tighter budget it should escalate.
+    // Use a tiny context to make the math work.
+    setModelLimits({ context: 3_000, output: 500 });
+    calibrate(0, 0);
+
+    const currentUser = makeMsg("huge-user", "user", "massive task", SESSION);
+    // ~800 chars each ≈ 200 tokens per step, 8 steps = ~1600 tokens
+    // rawBudget at layer 1 ≈ (3000-500) * 0.7 ≈ 1750 tokens → fits
+    // rawBudget at layer 2 ≈ (3000-500) * 0.5 ≈ 1250 tokens → escalates
+    const steps = Array.from({ length: 8 }, (_, i) =>
+      makeStep(`huge-step-${i}`, "huge-user", "W".repeat(500), SESSION),
+    );
+    // Fill with old messages to force gradient mode
+    const oldMsgs = Array.from({ length: 20 }, (_, i) =>
+      makeMsg(`huge-old-${i}`, i % 2 === 0 ? "user" : "assistant", "V".repeat(200), SESSION),
+    );
+    const messages = [...oldMsgs, currentUser, ...steps];
+
+    const result = transform({ messages, projectPath: PROJECT, sessionID: SESSION });
+
+    // Must be in gradient mode
+    expect(result.layer).toBeGreaterThanOrEqual(1);
+
+    // Current turn steps must always be present regardless of layer
+    const ids = result.messages.map((m) => m.info.id);
+    for (let i = 0; i < 8; i++) {
+      expect(ids).toContain(`huge-step-${i}`);
+    }
+  });
+});

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "opencode-lore",`
`3`		`- "version": "0.2.4",`
	`3`	`+ "version": "0.2.5",`
`4`	`4`	`"type": "module",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"description": "Three-tier memory architecture for OpenCode — distillation, not summarization",`