Fix calibration using DB message count instead of compressed window count

BYK · BYK · commit 1cb90cc93abf · 2026-02-24T22:46:22.000Z
After a compressed turn (layers 1-4), calibrate() was called with
withParts.length — the total number of messages in the DB — instead of
the number of messages actually sent to the model (the compressed window).

On the next turn, newMsgCount = dbCount - dbCount = near-zero, so
expectedInput ≈ lastKnownInput (the compressed prompt size, e.g. 114K).
Since 114K &lt; maxInput (168K), layer 0 fires and sends all messages
uncompressed → overflow (405K on a 200K-limit model).

Fix: transform() now sets lastTransformedCount = result.messages.length
via a thin public wrapper around the renamed transformInner(). The event
handler uses getLastTransformedCount() for calibration instead of
withParts.length. On layer 0 these are equal; on layers 1-4, the delta
on the next turn is now computed relative to the compressed window.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
diff --git a/src/gradient.ts b/src/gradient.ts
@@ -51,6 +51,18 @@ let lastKnownLtm = 0;
 let lastKnownSessionID: string | null = null;
 let lastKnownMessageCount = 0;
 
+// Number of messages in the most recent transform() output — i.e. how many
+// messages were actually sent to the model. On layer 0 this equals the full
+// session length. On layers 1-4 it equals the compressed window size.
+// Calibration must use this count (not the total DB message count) so that
+// the delta on the next turn reflects only messages added since the last
+// compressed window, not since the last DB snapshot.
+let lastTransformedCount = 0;
+
+export function getLastTransformedCount(): number {
+  return lastTransformedCount;
+}
+
 // --- Force escalation ---
 // Set when the API returns "prompt is too long" — forces the transform to skip
 // layer 0 (and optionally layer 1) on the next call to ensure the context is
@@ -139,6 +151,7 @@ export function resetCalibration() {
   lastKnownLtm = 0;
   lastKnownSessionID = null;
   lastKnownMessageCount = 0;
+  lastTransformedCount = 0;
   forceMinLayer = 0;
 }
 
@@ -691,7 +704,7 @@ export function needsUrgentDistillation(): boolean {
   return v;
 }
 
-export function transform(input: {
+function transformInner(input: {
   messages: MessageWithParts[];
   projectPath: string;
   sessionID?: string;
@@ -890,6 +903,24 @@ export function transform(input: {
   };
 }
 
+// Public wrapper: records the compressed message count for calibration.
+// Calibration needs to know how many messages were SENT to the model (the
+// compressed window), not the total DB count. On layer 0 these are equal;
+// on layers 1-4 the compressed window is smaller, and the delta on the next
+// turn must be computed relative to the compressed count — otherwise the
+// expected input on the next turn is anchored to the compressed input token
+// count but the "new messages" delta is computed against the full DB count,
+// making newMsgCount ≈ 0 and causing layer 0 passthrough on an overflowing session.
+export function transform(input: {
+  messages: MessageWithParts[];
+  projectPath: string;
+  sessionID?: string;
+}): TransformResult {
+  const result = transformInner(input);
+  lastTransformedCount = result.messages.length;
+  return result;
+}
+
 // Compute our message-only estimate for a set of messages (for calibration use)
 export function estimateMessages(messages: MessageWithParts[]): number {
   return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
diff --git a/src/index.ts b/src/index.ts
@@ -14,6 +14,7 @@ import {
   setLtmTokens,
   getLtmBudget,
   setForceMinLayer,
+  getLastTransformedCount,
 } from "./gradient";
 import { formatKnowledge } from "./prompt";
 import { createRecallTool } from "./reflect";
@@ -219,7 +220,11 @@ export const LorePlugin: Plugin = async (ctx) => {
                 const msgEstimate = estimateMessages(withParts);
                 const actualInput =
                   msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
-                calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
+                // Use the compressed message count (from the last transform output),
+                // not the total DB count. On layer 0 these are equal. On layers 1-4,
+                // the model only saw the compressed window — calibrate must track that
+                // count so the next turn's delta is computed correctly.
+                calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount() || withParts.length);
               }
             }
           }

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "opencode-lore",`
`3`		`- "version": "0.2.2",`
	`3`	`+ "version": "0.2.3",`
`4`	`4`	`"type": "module",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"description": "Three-tier memory architecture for OpenCode — distillation, not summarization",`