Skip to content

Commit 6af2b88

Browse files
committed
Fix uncalibrated overflow and trailing-message prefill error
- gradient: on first turn (no calibration data), apply a 1.5x safety multiplier to tryFit output before accepting a layer. chars/4 estimates undercount by up to 1.8x on sessions with large tool outputs, causing tryFit to pack too many messages — the window estimates as fitting but actually overflows the API limit. With the multiplier, the gradient escalates to the next layer until the estimated total * 1.5 fits within maxInput. Once calibrated (exact API counts available), the multiplier is not applied. - index: remove the 'break on tool parts' from the trailing-message drop loop. The break left trailing assistant messages with tool parts at the end of the compressed window, causing 'conversation must end with a user message' prefill errors. Always drop trailing non-user messages regardless of tool content — a hard API error is worse than the model re-invoking a tool.
1 parent 2438a07 commit 6af2b88

File tree

3 files changed

+29
-9
lines changed

3 files changed

+29
-9
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "opencode-lore",
3-
"version": "0.2.1",
3+
"version": "0.2.2",
44
"type": "module",
55
"license": "MIT",
66
"description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

src/gradient.ts

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -722,8 +722,27 @@ export function transform(input: {
722722
const maxInput = contextLimit - outputReserved;
723723
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
724724

725+
// True when we have real API token data from a previous turn in this session.
726+
// When false (first turn / session change), chars/4 estimates can undercount by
727+
// up to 1.8x — so tryFit output must be validated with a safety multiplier before
728+
// being used, to prevent sending an apparently-fitting window that actually overflows.
729+
const calibrated = lastKnownInput > 0 && sid === lastKnownSessionID;
730+
731+
// On uncalibrated turns, apply this multiplier to tryFit's estimated total to
732+
// approximate the real token count. 1.5 is conservative but not so aggressive
733+
// that it forces layer 4 on modestly-sized sessions.
734+
const UNCALIBRATED_SAFETY = 1.5;
735+
736+
// Returns true if the tryFit result is safe to use: either we have calibrated
737+
// data (exact) or the estimated total * safety factor fits within maxInput.
738+
function fitsWithSafetyMargin(result: { totalTokens: number } | null): boolean {
739+
if (!result) return false;
740+
if (calibrated) return true;
741+
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
742+
}
743+
725744
let expectedInput: number;
726-
if (lastKnownInput > 0 && sid === lastKnownSessionID) {
745+
if (calibrated) {
727746
// Exact approach: prior API count + estimate of only the new messages.
728747
const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
729748
const newMsgTokens = newMsgCount > 0
@@ -793,7 +812,7 @@ export function transform(input: {
793812
rawBudget,
794813
strip: "none",
795814
});
796-
if (layer1) return { ...layer1, layer: 1, usable, distilledBudget, rawBudget };
815+
if (fitsWithSafetyMargin(layer1)) return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
797816
}
798817

799818
// Layer 1 didn't fit (or was force-skipped) — reset the raw window cache.
@@ -812,9 +831,9 @@ export function transform(input: {
812831
strip: "old-tools",
813832
protectedTurns: 2,
814833
});
815-
if (layer2) {
834+
if (fitsWithSafetyMargin(layer2)) {
816835
urgentDistillation = true;
817-
return { ...layer2, layer: 2, usable, distilledBudget, rawBudget };
836+
return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
818837
}
819838
}
820839

@@ -833,9 +852,9 @@ export function transform(input: {
833852
rawBudget: Math.floor(usable * 0.55),
834853
strip: "all-tools",
835854
});
836-
if (layer3) {
855+
if (fitsWithSafetyMargin(layer3)) {
837856
urgentDistillation = true;
838-
return { ...layer3, layer: 3, usable, distilledBudget, rawBudget };
857+
return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
839858
}
840859

841860
// Layer 4: Emergency — last 2 distillations, last 3 raw messages with tool parts intact.

src/index.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,13 @@ export const LorePlugin: Plugin = async (ctx) => {
389389
// Layer 0 means all messages fit within the context budget — leave them alone
390390
// so the append-only sequence stays intact for prompt caching.
391391
if (result.layer > 0) {
392+
// The API requires the conversation to end with a user message.
393+
// Always drop trailing non-user messages — even assistant messages with
394+
// tool parts. A hard API error is worse than the model re-invoking a tool.
392395
while (
393396
result.messages.length > 0 &&
394397
result.messages.at(-1)!.info.role !== "user"
395398
) {
396-
const last = result.messages.at(-1)!;
397-
if (last.parts.some((p) => p.type === "tool")) break;
398399
const dropped = result.messages.pop()!;
399400
console.error(
400401
"[lore] WARN: dropping trailing",

0 commit comments

Comments
 (0)