-
Notifications
You must be signed in to change notification settings - Fork 41
Fix storage growth from oversized conversations and recording artifacts #1061
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a7b2e5d
3d2d816
95c530f
150e790
1d55bcd
2bbb547
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -622,36 +622,50 @@ export async function shrinkMessagesForLLM(opts: ShrinkOptions): Promise<ShrinkR | |
|
|
||
| let messages = [...opts.messages] | ||
| let tokens = estimateTokensFromMessages(messages) | ||
| const initialTokens = tokens | ||
|
|
||
| if (isDebugLLM()) { | ||
| logLLM("ContextBudget: initial", { providerId, model, maxTokens, targetTokens, estTokens: tokens, count: messages.length }) | ||
| } | ||
|
|
||
| if (tokens <= targetTokens) { | ||
| return { messages, appliedStrategies: applied, estTokensBefore: tokens, estTokensAfter: tokens, maxTokens } | ||
| return { messages, appliedStrategies: applied, estTokensBefore: initialTokens, estTokensAfter: tokens, maxTokens } | ||
| } | ||
|
|
||
| // Tier 0: Aggressive truncation of very large tool responses (>5000 chars) | ||
| // This happens BEFORE summarization to avoid expensive LLM calls on huge payloads | ||
| const AGGRESSIVE_TRUNCATE_THRESHOLD = 5000 | ||
| const tokensBeforeAggressiveTruncate = tokens | ||
| for (let i = 0; i < messages.length; i++) { | ||
| const msg = messages[i] | ||
| if (msg.role === "user" && msg.content && msg.content.length > AGGRESSIVE_TRUNCATE_THRESHOLD) { | ||
| // Check if this looks like a tool result (contains JSON arrays/objects) | ||
| if (msg.content.includes('"url":') || msg.content.includes('"id":')) { | ||
| // Truncate aggressively and add note | ||
| messages[i] = { | ||
| ...msg, | ||
| content: msg.content.substring(0, AGGRESSIVE_TRUNCATE_THRESHOLD) + | ||
| '\n\n... (truncated ' + (msg.content.length - AGGRESSIVE_TRUNCATE_THRESHOLD) + | ||
| ' characters for context management. Key information preserved above.)' | ||
| } | ||
| applied.push("aggressive_truncate") | ||
| tokens = estimateTokensFromMessages(messages) | ||
| if (tokens <= targetTokens) { | ||
| if (isDebugLLM()) logLLM("ContextBudget: after aggressive_truncate", { estTokens: tokens }) | ||
| return { messages, appliedStrategies: applied, estTokensBefore: tokens, estTokensAfter: tokens, maxTokens } | ||
| } | ||
| if (!msg.content || msg.content.length <= AGGRESSIVE_TRUNCATE_THRESHOLD) { | ||
| continue | ||
| } | ||
|
|
||
| // Heuristic: aggressively truncate oversized tool payloads first. | ||
| // JSON-key heuristics are only applied to non-user messages to avoid | ||
| // accidentally truncating user prompts that happen to contain JSON. | ||
| const looksLikeToolPayload = | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Severity: medium 🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. |
||
| msg.role === "tool" || | ||
| (msg.role !== "user" && ( | ||
| msg.content.includes('"stdout"') || | ||
| msg.content.includes('"stderr"') || | ||
| msg.content.includes('"url":') || | ||
| msg.content.includes('"id":') | ||
| )) | ||
|
|
||
| if (looksLikeToolPayload) { | ||
| messages[i] = { | ||
| ...msg, | ||
| content: msg.content.substring(0, AGGRESSIVE_TRUNCATE_THRESHOLD) + | ||
| '\n\n... (truncated ' + (msg.content.length - AGGRESSIVE_TRUNCATE_THRESHOLD) + | ||
| ' characters for context management. Key information preserved above.)' | ||
| } | ||
| applied.push("aggressive_truncate") | ||
| tokens = estimateTokensFromMessages(messages) | ||
| if (tokens <= targetTokens) { | ||
| if (isDebugLLM()) logLLM("ContextBudget: after aggressive_truncate", { estTokens: tokens }) | ||
| return { messages, appliedStrategies: applied, estTokensBefore: tokensBeforeAggressiveTruncate, estTokensAfter: tokens, maxTokens } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -692,7 +706,7 @@ export async function shrinkMessagesForLLM(opts: ShrinkOptions): Promise<ShrinkR | |
|
|
||
| if (tokens <= targetTokens) { | ||
| if (isDebugLLM()) logLLM("ContextBudget: after summarize", { estTokens: tokens }) | ||
| return { messages, appliedStrategies: applied, estTokensBefore: tokens, estTokensAfter: tokens, maxTokens } | ||
| return { messages, appliedStrategies: applied, estTokensBefore: initialTokens, estTokensAfter: tokens, maxTokens } | ||
| } | ||
|
|
||
| // Tier 2: Remove middle messages (keep system, first user, last N) | ||
|
|
@@ -760,7 +774,7 @@ export async function shrinkMessagesForLLM(opts: ShrinkOptions): Promise<ShrinkR | |
|
|
||
| if (tokens <= targetTokens) { | ||
| if (isDebugLLM()) logLLM("ContextBudget: after drop_middle", { estTokens: tokens, kept: messages.length }) | ||
| return { messages, appliedStrategies: applied, estTokensBefore: tokens, estTokensAfter: tokens, maxTokens, toolResultsSummarized } | ||
| return { messages, appliedStrategies: applied, estTokensBefore: initialTokens, estTokensAfter: tokens, maxTokens, toolResultsSummarized } | ||
| } | ||
|
|
||
| // Tier 3: Minimal system prompt | ||
|
|
@@ -780,6 +794,5 @@ export async function shrinkMessagesForLLM(opts: ShrinkOptions): Promise<ShrinkR | |
|
|
||
| if (isDebugLLM()) logLLM("ContextBudget: after minimal_system_prompt", { estTokens: tokens }) | ||
|
|
||
| return { messages, appliedStrategies: applied, estTokensBefore: tokens, estTokensAfter: tokens, maxTokens, toolResultsSummarized } | ||
| return { messages, appliedStrategies: applied, estTokensBefore: initialTokens, estTokensAfter: tokens, maxTokens, toolResultsSummarized } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clampMaxOutputChars()enforces a minimum of 1,000 chars, but the tool schema description only documents the default and max cap; callers trying to aggressively limit output (e.g., for storage control) won’t be able to set values below 1,000 as implied. Consider documenting the minimum behavior to avoid an API-contract mismatch.Severity: low
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.