diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index 6fccb8f..0000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(cat:*)", - "Bash(for f in ~/.local/share/opencode/storage/part/*/*)", - "Bash(do grep -l \"\"type\"\":\"\"reasoning\"\" $f)", - "Bash(done)", - "WebSearch", - "WebFetch(domain:ai-sdk.dev)", - "Bash(npm run typecheck:*)" - ], - "deny": [], - "ask": [] - } -} diff --git a/.gitignore b/.gitignore index 358c623..b1417a4 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,10 @@ Thumbs.db # OpenCode .opencode/ +AGENTS.md + +# Claude +.claude/ # Tests (local development only) tests/ diff --git a/lib/core/janitor.ts b/lib/core/janitor.ts index 5667c21..e4661ce 100644 --- a/lib/core/janitor.ts +++ b/lib/core/janitor.ts @@ -11,6 +11,7 @@ import { sendUnifiedNotification, type NotificationContext } from "../ui/notification" +import { findCurrentAgent } from "../ui/display-utils" export interface SessionStats { totalToolsPruned: number @@ -21,7 +22,7 @@ export interface SessionStats { export interface GCStats { tokensCollected: number - toolsDeduped: number + toolsGCd: number // Tools garbage collected (deduped outputs + error-pruned inputs) } export interface PruningResult { @@ -181,7 +182,7 @@ async function runWithStrategies( totalToolsPruned: currentStats.totalToolsPruned + finalNewlyPrunedIds.length, totalTokensSaved: currentStats.totalTokensSaved + tokensSaved, totalGCTokens: currentStats.totalGCTokens + (gcPending?.tokensCollected ?? 0), - totalGCTools: currentStats.totalGCTools + (gcPending?.toolsDeduped ?? 0) + totalGCTools: currentStats.totalGCTools + (gcPending?.toolsGCd ?? 0) } state.stats.set(sessionID, sessionStats) @@ -205,7 +206,7 @@ async function runWithStrategies( if (finalNewlyPrunedIds.length === 0) { if (notificationSent) { - logger.info("janitor", `GC-only notification: ~${formatTokenCount(gcPending?.tokensCollected ?? 0)} tokens from ${gcPending?.toolsDeduped ?? 0} deduped tools`, { + logger.info("janitor", `GC-only notification: ~${formatTokenCount(gcPending?.tokensCollected ?? 0)} tokens from ${gcPending?.toolsGCd ?? 0} GC'd tools`, { trigger: options.trigger }) } @@ -230,7 +231,7 @@ async function runWithStrategies( } if (gcPending) { logMeta.gcTokens = gcPending.tokensCollected - logMeta.gcTools = gcPending.toolsDeduped + logMeta.gcTools = gcPending.toolsGCd } logger.info("janitor", `Pruned ${prunedCount}/${candidateCount} tools, ${keptCount} kept (~${formatTokenCount(tokensSaved)} tokens)`, logMeta) @@ -437,16 +438,7 @@ export function parseMessages( return { toolCallIds, toolOutputs, toolMetadata } } -function findCurrentAgent(messages: any[]): string | undefined { - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i] - const info = msg.info - if (info?.role === 'user') { - return info.agent || 'build' - } - } - return undefined -} + // ============================================================================ // Helpers diff --git a/lib/core/strategies/error-pruning.ts b/lib/core/strategies/error-pruning.ts new file mode 100644 index 0000000..55b2805 --- /dev/null +++ b/lib/core/strategies/error-pruning.ts @@ -0,0 +1,56 @@ +import { extractParameterKey } from "../../ui/display-utils" +import type { PruningStrategy, StrategyResult, ToolMetadata } from "./types" + +/** + * Minimum number of recent tool calls to protect from error pruning. + * Tools older than this threshold will have their inputs pruned if they errored. + */ +const MIN_AGE_THRESHOLD = 5 + +/** + * Error pruning strategy - prunes tool inputs (arguments) for tools that + * resulted in an error, provided they are older than MIN_AGE_THRESHOLD. + * + * This helps clean up failed attempts (like bad edits, file not found, etc.) + * while keeping recent errors visible for the model to learn from. + */ +export const errorPruningStrategy: PruningStrategy = { + name: "error-pruning", + + detect( + toolMetadata: Map, + unprunedIds: string[], + protectedTools: string[] + ): StrategyResult { + const prunedIds: string[] = [] + const details = new Map() + + // Don't prune the last N tool calls - model may still be iterating + if (unprunedIds.length <= MIN_AGE_THRESHOLD) { + return { prunedIds, details } + } + + const pruneableIds = unprunedIds.slice(0, -MIN_AGE_THRESHOLD) + const protectedToolsLower = protectedTools.map(t => t.toLowerCase()) + + for (const id of pruneableIds) { + const meta = toolMetadata.get(id) + if (!meta) continue + + // Skip protected tools + if (protectedToolsLower.includes(meta.tool.toLowerCase())) continue + + // Check if this tool errored + if (meta.status === "error") { + prunedIds.push(id) + details.set(id, { + toolName: meta.tool, + parameterKey: extractParameterKey(meta), + reason: `error: ${meta.error || "unknown error"}` + }) + } + } + + return { prunedIds, details } + } +} diff --git a/lib/core/strategies/index.ts b/lib/core/strategies/index.ts index c6c9128..cc09e27 100644 --- a/lib/core/strategies/index.ts +++ b/lib/core/strategies/index.ts @@ -4,14 +4,15 @@ import type { PruningStrategy, StrategyResult, ToolMetadata } from "./types" import { deduplicationStrategy } from "./deduplication" +import { errorPruningStrategy } from "./error-pruning" export type { PruningStrategy, StrategyResult, ToolMetadata, StrategyDetail } from "./types" /** All available strategies */ const ALL_STRATEGIES: PruningStrategy[] = [ deduplicationStrategy, + errorPruningStrategy, // Future strategies will be added here: - // errorPruningStrategy, // writeReadStrategy, // partialReadStrategy, ] @@ -24,31 +25,25 @@ export interface RunStrategiesResult { } /** - * Run all enabled strategies and collect pruned IDs. + * Run all GC strategies and collect pruned IDs. + * All strategies in ALL_STRATEGIES are always enabled (garbage collection). * * @param toolMetadata - Map of tool call ID to metadata * @param unprunedIds - Tool call IDs not yet pruned (chronological order) * @param protectedTools - Tool names that should never be pruned - * @param enabledStrategies - Strategy names to run (defaults to all) */ export function runStrategies( toolMetadata: Map, unprunedIds: string[], - protectedTools: string[], - enabledStrategies?: string[] + protectedTools: string[] ): RunStrategiesResult { const byStrategy = new Map() const allPrunedIds = new Set() - // Filter to enabled strategies (or all if not specified) - const strategies = enabledStrategies - ? ALL_STRATEGIES.filter(s => enabledStrategies.includes(s.name)) - : ALL_STRATEGIES - // Track which IDs are still available for each strategy let remainingIds = unprunedIds - for (const strategy of strategies) { + for (const strategy of ALL_STRATEGIES) { const result = strategy.detect(toolMetadata, remainingIds, protectedTools) if (result.prunedIds.length > 0) { diff --git a/lib/core/strategies/types.ts b/lib/core/strategies/types.ts index a013a0d..11f3047 100644 --- a/lib/core/strategies/types.ts +++ b/lib/core/strategies/types.ts @@ -6,6 +6,8 @@ export interface ToolMetadata { tool: string parameters?: any + status?: "pending" | "running" | "completed" | "error" + error?: string } export interface StrategyResult { diff --git a/lib/fetch-wrapper/formats/bedrock.ts b/lib/fetch-wrapper/formats/bedrock.ts index 2aaedc6..99dd498 100644 --- a/lib/fetch-wrapper/formats/bedrock.ts +++ b/lib/fetch-wrapper/formats/bedrock.ts @@ -117,6 +117,39 @@ export const bedrockFormat: FormatDescriptor = { return replaced }, + replaceToolInput(data: any[], toolId: string, prunedMessage: string, _state: PluginState): boolean { + const toolIdLower = toolId.toLowerCase() + let replaced = false + + for (let i = 0; i < data.length; i++) { + const m = data[i] + + // Bedrock format: assistant message with toolUse blocks in content + if (m.role === 'assistant' && Array.isArray(m.content)) { + let messageModified = false + const newContent = m.content.map((block: any) => { + if (block.toolUse && block.toolUse.toolUseId?.toLowerCase() === toolIdLower) { + messageModified = true + return { + ...block, + toolUse: { + ...block.toolUse, + input: { _pruned: prunedMessage } + } + } + } + return block + }) + if (messageModified) { + data[i] = { ...m, content: newContent } + replaced = true + } + } + } + + return replaced + }, + hasToolOutputs(data: any[]): boolean { for (const m of data) { if (m.role === 'user' && Array.isArray(m.content)) { diff --git a/lib/fetch-wrapper/formats/gemini.ts b/lib/fetch-wrapper/formats/gemini.ts index c1c0feb..6cf7680 100644 --- a/lib/fetch-wrapper/formats/gemini.ts +++ b/lib/fetch-wrapper/formats/gemini.ts @@ -160,6 +160,63 @@ export const geminiFormat: FormatDescriptor = { return replaced }, + replaceToolInput(data: any[], toolId: string, prunedMessage: string, state: PluginState): boolean { + let positionMapping: Map | undefined + for (const [_sessionId, mapping] of state.googleToolCallMapping) { + if (mapping && mapping.size > 0) { + positionMapping = mapping + break + } + } + + if (!positionMapping) { + return false + } + + const toolIdLower = toolId.toLowerCase() + const toolPositionCounters = new Map() + let replaced = false + + for (let i = 0; i < data.length; i++) { + const content = data[i] + if (!Array.isArray(content.parts)) continue + + let contentModified = false + const newParts = content.parts.map((part: any) => { + // Gemini format: functionCall blocks in model content + if (part.functionCall) { + const funcName = part.functionCall.name?.toLowerCase() + if (funcName) { + const currentIndex = toolPositionCounters.get(funcName) || 0 + toolPositionCounters.set(funcName, currentIndex + 1) + + const positionKey = `${funcName}:${currentIndex}` + const mappedToolId = positionMapping!.get(positionKey) + + if (mappedToolId?.toLowerCase() === toolIdLower) { + contentModified = true + replaced = true + return { + ...part, + functionCall: { + ...part.functionCall, + args: { _pruned: prunedMessage } + } + } + } + } + } + return part + }) + + if (contentModified) { + data[i] = { ...content, parts: newParts } + } + } + + return replaced + }, + hasToolOutputs(data: any[]): boolean { return data.some((content: any) => Array.isArray(content.parts) && diff --git a/lib/fetch-wrapper/formats/openai-chat.ts b/lib/fetch-wrapper/formats/openai-chat.ts index 2ac3793..cec5517 100644 --- a/lib/fetch-wrapper/formats/openai-chat.ts +++ b/lib/fetch-wrapper/formats/openai-chat.ts @@ -114,6 +114,55 @@ export const openaiChatFormat: FormatDescriptor = { return replaced }, + replaceToolInput(data: any[], toolId: string, prunedMessage: string, _state: PluginState): boolean { + const toolIdLower = toolId.toLowerCase() + let replaced = false + + for (let i = 0; i < data.length; i++) { + const m = data[i] + + // OpenAI Chat format: assistant message with tool_calls array + if (m.role === 'assistant' && Array.isArray(m.tool_calls)) { + let messageModified = false + const newToolCalls = m.tool_calls.map((tc: any) => { + if (tc.id?.toLowerCase() === toolIdLower) { + messageModified = true + return { + ...tc, + function: { + ...tc.function, + arguments: JSON.stringify({ _pruned: prunedMessage }) + } + } + } + return tc + }) + if (messageModified) { + data[i] = { ...m, tool_calls: newToolCalls } + replaced = true + } + } + + // Anthropic format (via OpenAI Chat): tool_use blocks in assistant content + if (m.role === 'assistant' && Array.isArray(m.content)) { + let messageModified = false + const newContent = m.content.map((part: any) => { + if (part.type === 'tool_use' && part.id?.toLowerCase() === toolIdLower) { + messageModified = true + return { ...part, input: { _pruned: prunedMessage } } + } + return part + }) + if (messageModified) { + data[i] = { ...m, content: newContent } + replaced = true + } + } + } + + return replaced + }, + hasToolOutputs(data: any[]): boolean { for (const m of data) { if (m.role === 'tool') return true diff --git a/lib/fetch-wrapper/formats/openai-responses.ts b/lib/fetch-wrapper/formats/openai-responses.ts index 6b84891..360ac0c 100644 --- a/lib/fetch-wrapper/formats/openai-responses.ts +++ b/lib/fetch-wrapper/formats/openai-responses.ts @@ -86,6 +86,22 @@ export const openaiResponsesFormat: FormatDescriptor = { return replaced }, + replaceToolInput(data: any[], toolId: string, prunedMessage: string, _state: PluginState): boolean { + const toolIdLower = toolId.toLowerCase() + let replaced = false + + for (let i = 0; i < data.length; i++) { + const item = data[i] + // OpenAI Responses format: function_call items with call_id and arguments + if (item.type === 'function_call' && item.call_id?.toLowerCase() === toolIdLower) { + data[i] = { ...item, arguments: JSON.stringify({ _pruned: prunedMessage }) } + replaced = true + } + } + + return replaced + }, + hasToolOutputs(data: any[]): boolean { return data.some((item: any) => item.type === 'function_call_output') }, diff --git a/lib/fetch-wrapper/gc-tracker.ts b/lib/fetch-wrapper/gc-tracker.ts index 950a21a..d9b2219 100644 --- a/lib/fetch-wrapper/gc-tracker.ts +++ b/lib/fetch-wrapper/gc-tracker.ts @@ -10,19 +10,77 @@ export function accumulateGCStats( ): void { if (prunedIds.length === 0) return - const toolOutputs = extractToolOutputsFromBody(body, prunedIds) + // Filter out IDs that have already been counted + const newIds = prunedIds.filter(id => !state.gcCountedIds.has(id.toLowerCase())) + if (newIds.length === 0) return + + const toolOutputs = extractToolOutputsFromBody(body, newIds) const tokensCollected = estimateTokensFromOutputs(toolOutputs) - const existing = state.gcPending.get(sessionId) ?? { tokensCollected: 0, toolsDeduped: 0 } + const existing = state.gcPending.get(sessionId) ?? { tokensCollected: 0, toolsGCd: 0 } + + state.gcPending.set(sessionId, { + tokensCollected: existing.tokensCollected + tokensCollected, + toolsGCd: existing.toolsGCd + newIds.length + }) + + // Mark these IDs as counted + for (const id of newIds) { + state.gcCountedIds.add(id.toLowerCase()) + } + + logger.debug("gc-tracker", "Accumulated GC stats (outputs)", { + sessionId: sessionId.substring(0, 8), + outputsDeduped: newIds.length, + tokensThisCycle: tokensCollected, + pendingTotal: state.gcPending.get(sessionId) + }) +} + +/** + * Accumulate GC stats for pruned tool inputs. + * Uses state.toolParameters (from OpenCode API) instead of parsing LLM request body. + */ +export function accumulateGCInputStats( + state: PluginState, + sessionId: string, + prunedIds: string[], + logger: Logger +): void { + if (prunedIds.length === 0) return + + // Filter out IDs that have already been counted + const newIds = prunedIds.filter(id => !state.gcCountedIds.has(id.toLowerCase())) + if (newIds.length === 0) return + + // Get input sizes from state.toolParameters (populated from OpenCode API) + let totalChars = 0 + for (const id of newIds) { + const entry = state.toolParameters.get(id.toLowerCase()) + if (entry?.parameters) { + const paramStr = typeof entry.parameters === 'string' + ? entry.parameters + : JSON.stringify(entry.parameters) + totalChars += paramStr.length + } + } + const tokensCollected = Math.round(totalChars / 4) + + const existing = state.gcPending.get(sessionId) ?? { tokensCollected: 0, toolsGCd: 0 } state.gcPending.set(sessionId, { tokensCollected: existing.tokensCollected + tokensCollected, - toolsDeduped: existing.toolsDeduped + prunedIds.length + toolsGCd: existing.toolsGCd + newIds.length }) - logger.debug("gc-tracker", "Accumulated GC stats", { + // Mark these IDs as counted + for (const id of newIds) { + state.gcCountedIds.add(id.toLowerCase()) + } + + logger.debug("gc-tracker", "Accumulated GC stats (inputs)", { sessionId: sessionId.substring(0, 8), - newlyDeduped: prunedIds.length, + inputsPruned: newIds.length, tokensThisCycle: tokensCollected, pendingTotal: state.gcPending.get(sessionId) }) diff --git a/lib/fetch-wrapper/handler.ts b/lib/fetch-wrapper/handler.ts index ce7a476..034264c 100644 --- a/lib/fetch-wrapper/handler.ts +++ b/lib/fetch-wrapper/handler.ts @@ -3,8 +3,11 @@ import { type PluginState, ensureSessionRestored } from "../state" import type { Logger } from "../logger" import { buildPrunableToolsList, buildEndInjection } from "./prunable-list" import { syncToolCache } from "../state/tool-cache" +import { runStrategies, type ToolMetadata } from "../core/strategies" +import { accumulateGCStats, accumulateGCInputStats } from "./gc-tracker" const PRUNED_CONTENT_MESSAGE = '[Output removed to save context - information superseded or no longer needed]' +const PRUNED_INPUT_MESSAGE = '[Input removed - tool execution failed]' function getMostRecentActiveSession(allSessions: any): any | undefined { const activeSessions = allSessions.data?.filter((s: any) => !s.parentID) || [] @@ -114,6 +117,79 @@ export async function handleFormat( const { allSessions, allPrunedIds } = await getAllPrunedIds(ctx.client, ctx.state, ctx.logger) + // Run automatic strategies (error-pruning, deduplication) to find tools to prune + const toolIds = Array.from(ctx.state.toolParameters.keys()) + const alreadyPruned = sessionId ? (ctx.state.prunedIds.get(sessionId) ?? []) : [] + const alreadyPrunedLower = new Set(alreadyPruned.map(id => id.toLowerCase())) + const unprunedIds = toolIds.filter(id => !alreadyPrunedLower.has(id.toLowerCase())) + + // Build metadata map for ALL tools (error-pruning needs to see all tools, + // even if output was user-pruned, since input pruning is independent) + const toolMetadata = new Map() + for (const id of toolIds) { + const entry = ctx.state.toolParameters.get(id) + if (entry) { + toolMetadata.set(id, { + tool: entry.tool, + parameters: entry.parameters, + status: entry.status, + error: entry.error + }) + } + } + + // Run strategies with unprunedIds - deduplication only sees non-user-pruned tools, + // error-pruning filters internally based on status (and sees all via toolMetadata) + const strategyResult = runStrategies(toolMetadata, unprunedIds, ctx.config.protectedTools) + + // Track error-pruned IDs separately (they get input replacement, not output) + const errorPrunedIds = new Set() + const errorStrategyResult = strategyResult.byStrategy.get("error-pruning") + if (errorStrategyResult && errorStrategyResult.prunedIds.length > 0) { + for (const id of errorStrategyResult.prunedIds) { + errorPrunedIds.add(id.toLowerCase()) + } + ctx.logger.info("fetch", `Error pruning: ${errorStrategyResult.prunedIds.length} failed tool inputs`, { + count: errorStrategyResult.prunedIds.length + }) + } + + // Replace error tool INPUTS (arguments sent to the tool) + let inputReplacedCount = 0 + for (const prunedId of errorPrunedIds) { + if (format.replaceToolInput(data, prunedId, PRUNED_INPUT_MESSAGE, ctx.state)) { + inputReplacedCount++ + } + } + + if (inputReplacedCount > 0) { + ctx.logger.info("fetch", `Replaced error tool inputs (${format.name})`, { + replaced: inputReplacedCount + }) + modified = true + + // Track GC tokens for error-pruned inputs + if (sessionId) { + accumulateGCInputStats(ctx.state, sessionId, Array.from(errorPrunedIds), ctx.logger) + } + } + + // Merge deduplication results into output replacement set + const dedupResult = strategyResult.byStrategy.get("deduplication") + if (dedupResult && dedupResult.prunedIds.length > 0) { + for (const id of dedupResult.prunedIds) { + allPrunedIds.add(id.toLowerCase()) + } + ctx.logger.info("fetch", `Deduplication: ${dedupResult.prunedIds.length} redundant tool outputs`, { + count: dedupResult.prunedIds.length + }) + + // Track GC tokens for deduplicated outputs + if (sessionId) { + accumulateGCStats(ctx.state, sessionId, dedupResult.prunedIds, body, ctx.logger) + } + } + if (allPrunedIds.size === 0) { return { modified, body } } diff --git a/lib/fetch-wrapper/types.ts b/lib/fetch-wrapper/types.ts index 7ea1f83..eeaaed2 100644 --- a/lib/fetch-wrapper/types.ts +++ b/lib/fetch-wrapper/types.ts @@ -17,6 +17,7 @@ export interface FormatDescriptor { injectPrunableList(data: any[], injection: string): boolean extractToolOutputs(data: any[], state: PluginState): ToolOutput[] replaceToolOutput(data: any[], toolId: string, prunedMessage: string, state: PluginState): boolean + replaceToolInput(data: any[], toolId: string, prunedMessage: string, state: PluginState): boolean hasToolOutputs(data: any[]): boolean getLogMetadata(data: any[], replacedCount: number, inputUrl: string): Record } diff --git a/lib/pruning-tool.ts b/lib/pruning-tool.ts index 89f9a03..bac0c8a 100644 --- a/lib/pruning-tool.ts +++ b/lib/pruning-tool.ts @@ -6,6 +6,7 @@ import { resetToolTrackerCount } from "./fetch-wrapper/tool-tracker" import { isSubagentSession } from "./hooks" import { getActualId } from "./state/id-mapping" import { formatPruningResultForTool, sendUnifiedNotification, type NotificationContext } from "./ui/notification" +import { findCurrentAgent } from "./ui/display-utils" import { ensureSessionRestored } from "./state" import { saveSessionState } from "./state/persistence" import type { Logger } from "./logger" @@ -136,19 +137,6 @@ export function createPruningTool( }) } -/** - * Finds the current agent from messages (same logic as janitor.ts). - */ -function findCurrentAgent(messages: any[]): string | undefined { - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i] - const info = msg.info - if (info?.role === 'user') { - return info.agent || 'build' - } - } - return undefined -} /** * Calculates approximate tokens saved by pruning the given tool call IDs. diff --git a/lib/state/index.ts b/lib/state/index.ts index a3c2584..62fae0a 100644 --- a/lib/state/index.ts +++ b/lib/state/index.ts @@ -6,6 +6,7 @@ export interface PluginState { prunedIds: Map stats: Map gcPending: Map + gcCountedIds: Set // Track which tools have already been counted for GC tokens toolParameters: Map model: Map googleToolCallMapping: Map> @@ -34,6 +35,7 @@ export function createPluginState(): PluginState { prunedIds: new Map(), stats: new Map(), gcPending: new Map(), + gcCountedIds: new Set(), toolParameters: new Map(), model: new Map(), googleToolCallMapping: new Map(), diff --git a/lib/ui/display-utils.ts b/lib/ui/display-utils.ts index 6e4e9e2..47cef7d 100644 --- a/lib/ui/display-utils.ts +++ b/lib/ui/display-utils.ts @@ -71,3 +71,18 @@ export function extractParameterKey(metadata: { tool: string, parameters?: any } } return paramStr.substring(0, 50) } + +/** + * Finds the current agent from messages by looking for the most recent user message. + * Used by janitor and pruning-tool to determine notification context. + */ +export function findCurrentAgent(messages: any[]): string | undefined { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + const info = msg.info + if (info?.role === 'user') { + return info.agent || 'build' + } + } + return undefined +} diff --git a/lib/ui/notification.ts b/lib/ui/notification.ts index d9aec9c..c75a0d9 100644 --- a/lib/ui/notification.ts +++ b/lib/ui/notification.ts @@ -56,7 +56,7 @@ export async function sendUnifiedNotification( agent?: string ): Promise { const hasAiPruning = data.aiPrunedCount > 0 - const hasGcActivity = data.gcPending && data.gcPending.toolsDeduped > 0 + const hasGcActivity = data.gcPending && data.gcPending.toolsGCd > 0 if (!hasAiPruning && !hasGcActivity) { return false