Skip to content

Commit 88a509b

Browse files
committed
Fix overflow recovery and chunked compaction
- session.error: remove session.summarize() call on overflow — it sent all messages to the model causing the overflow→compact→overflow stuck loop. Now just distills + sets forceMinLayer(2); gradient layers 2-4 handle compression on the next turn. - compacting hook: run chunked distillation first, inject pre-computed summaries into output.context, instruct model to consolidate summaries rather than re-reading raw messages. Prevents overflow during /compact on large sessions. - distillation.loadForSession(): new export to read back distillation rows without going through gradient internals. - session.error: broaden error detection to match both error.data.message and error.message shapes; add more overflow message patterns; add diagnostic logging to see the actual error shape in stderr.
1 parent db1ec68 commit 88a509b

File tree

2 files changed

+80
-30
lines changed

2 files changed

+80
-30
lines changed

src/distillation.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,32 @@ export type Distillation = {
117117
created_at: number;
118118
};
119119

120+
/** Load all distillations for a session, oldest first. */
121+
export function loadForSession(
122+
projectPath: string,
123+
sessionID: string,
124+
): Distillation[] {
125+
const pid = ensureProject(projectPath);
126+
const rows = db()
127+
.query(
128+
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC",
129+
)
130+
.all(pid, sessionID) as Array<{
131+
id: string;
132+
project_id: string;
133+
session_id: string;
134+
observations: string;
135+
source_ids: string;
136+
generation: number;
137+
token_count: number;
138+
created_at: number;
139+
}>;
140+
return rows.map((r) => ({
141+
...r,
142+
source_ids: JSON.parse(r.source_ids) as string[],
143+
}));
144+
}
145+
120146
function storeDistillation(input: {
121147
projectPath: string;
122148
sessionID: string;

src/index.ts

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -233,43 +233,44 @@ export const LorePlugin: Plugin = async (ctx) => {
233233
// 1. Force the gradient transform to escalate on the next call (skip layer 0/1)
234234
// 2. Force distillation to capture all temporal data before compaction
235235
// 3. Trigger compaction so the session recovers without user intervention
236-
const error = (event.properties as Record<string, unknown>).error as
237-
| { name?: string; data?: { message?: string } }
236+
const rawError = (event.properties as Record<string, unknown>).error;
237+
// Diagnostic: log the full error shape so we can verify our detection matches
238+
console.error("[lore] session.error received:", JSON.stringify(rawError, null, 2));
239+
240+
const error = rawError as
241+
| { name?: string; message?: string; data?: { message?: string } }
238242
| undefined;
243+
// Match both shapes: error.data.message (APIError wrapper) and error.message (direct)
244+
const errorMessage = error?.data?.message ?? error?.message ?? "";
239245
const isPromptTooLong =
240-
error?.name === "APIError" &&
241-
typeof error?.data?.message === "string" &&
242-
(error.data.message.includes("prompt is too long") ||
243-
error.data.message.includes("context length exceeded") ||
244-
error.data.message.includes("maximum context length"));
246+
typeof errorMessage === "string" &&
247+
(errorMessage.includes("prompt is too long") ||
248+
errorMessage.includes("context length exceeded") ||
249+
errorMessage.includes("maximum context length") ||
250+
errorMessage.includes("ContextWindowExceededError") ||
251+
errorMessage.includes("too many tokens"));
252+
253+
console.error(
254+
`[lore] session.error isPromptTooLong=${isPromptTooLong} (name=${error?.name}, message=${errorMessage.substring(0, 120)})`,
255+
);
245256

246257
if (isPromptTooLong) {
247258
const sessionID = (event.properties as Record<string, unknown>).sessionID as
248259
| string
249260
| undefined;
250261
console.error(
251-
`[lore] detected 'prompt too long' error — forcing distillation + compaction (session: ${sessionID?.substring(0, 16)})`,
262+
`[lore] detected 'prompt too long' error — forcing distillation + layer escalation (session: ${sessionID?.substring(0, 16)})`,
252263
);
253264
// Force layer 2 on next transform — layers 0 and 1 were already too large.
265+
// The gradient at layers 2-4 will compress the context enough for the next turn.
266+
// Do NOT call session.summarize() here — it sends all messages to the model,
267+
// which would overflow again and create a stuck compaction loop.
254268
setForceMinLayer(2);
255269

256270
if (sessionID) {
257-
// Force distillation to capture all undistilled messages before
258-
// compaction replaces the session message history.
271+
// Force distillation to capture all undistilled messages into the temporal
272+
// store so they're preserved even if the session is later compacted manually.
259273
await backgroundDistill(sessionID, true);
260-
261-
// Trigger compaction automatically — the compacting hook will inject
262-
// Lore's custom distillation-aware prompt.
263-
try {
264-
const sessions = await ctx.client.session.list();
265-
const session = sessions.data?.find((s) => s.id.startsWith(sessionID));
266-
if (session) {
267-
// providerID/modelID are optional — omit to use the session's current model
268-
await ctx.client.session.summarize({ path: { id: session.id } });
269-
}
270-
} catch (e) {
271-
console.error("[lore] auto-compaction failed:", e);
272-
}
273274
}
274275
}
275276
}
@@ -410,17 +411,25 @@ export const LorePlugin: Plugin = async (ctx) => {
410411
}
411412
},
412413

413-
// Replace compaction prompt with distillation-aware prompt when manual /compact is used.
414-
// Also force distillation first so all temporal data is captured before compaction
415-
// replaces the session message history.
414+
// Replace compaction prompt with distillation-aware prompt when /compact is used.
415+
// Strategy: run chunked distillation first so all messages are captured in segments
416+
// that each fit within the model's context, then inject the pre-computed summaries
417+
// as context so the model consolidates them rather than re-reading all raw messages.
418+
// This prevents the overflow→compaction→overflow stuck loop.
416419
"experimental.session.compacting": async (input, output) => {
417-
// Force distillation to capture any undistilled messages. This is critical:
418-
// compaction will replace all messages with a summary, so we must persist
419-
// everything to Lore's temporal store before that happens.
420+
// Chunked distillation: split all undistilled messages into segments that each
421+
// fit within the model's context window and distill them independently.
422+
// This is safe even when the full session exceeds the context limit.
420423
if (input.sessionID && activeSessions.has(input.sessionID)) {
421424
await backgroundDistill(input.sessionID, true);
422425
}
423426

427+
// Load all distillation summaries produced for this session (oldest first).
428+
// These are the chunked observations — the model will consolidate them.
429+
const distillations = input.sessionID
430+
? distillation.loadForSession(projectPath, input.sessionID)
431+
: [];
432+
424433
const entries = ltm.forProject(projectPath, config().crossProject);
425434
const knowledge = entries.length
426435
? formatKnowledge(
@@ -432,9 +441,24 @@ export const LorePlugin: Plugin = async (ctx) => {
432441
)
433442
: "";
434443

444+
// Inject each distillation chunk as a context string so the model has access
445+
// to pre-computed summaries. Even if the raw messages overflow context, these
446+
// summaries are compact and will fit.
447+
if (distillations.length > 0) {
448+
output.context.push(
449+
`## Lore Pre-computed Session Summaries\n\nThe following ${distillations.length} summary chunk(s) were pre-computed from the conversation history. Use these as the authoritative source — do not re-summarize the raw messages above if they conflict.\n\n` +
450+
distillations
451+
.map(
452+
(d, i) =>
453+
`### Chunk ${i + 1}${d.generation > 0 ? " (consolidated)" : ""}\n${d.observations}`,
454+
)
455+
.join("\n\n"),
456+
);
457+
}
458+
435459
output.prompt = `You are creating a distilled memory summary for an AI coding agent. This summary will be the ONLY context available in the next part of the conversation.
436460
437-
Structure your response as follows:
461+
${distillations.length > 0 ? "Lore has pre-computed chunked summaries of the session history (injected above as context). Consolidate those summaries into a single coherent narrative. Do NOT re-read or re-summarize the raw conversation messages — trust the pre-computed summaries.\n\n" : ""}Structure your response as follows:
438462
439463
## Session History
440464

0 commit comments

Comments
 (0)