diff --git a/index.ts b/index.ts index 1ccb82e..4735d17 100644 --- a/index.ts +++ b/index.ts @@ -90,7 +90,14 @@ const plugin: Plugin = (async (ctx) => { event: createEventHandler(ctx.client, janitorCtx, logger, config, toolTracker), "chat.params": createChatParamsHandler(ctx.client, state, logger), tool: config.strategies.onTool.length > 0 ? { - prune: createPruningTool(ctx.client, janitorCtx, config, toolTracker), + prune: createPruningTool({ + client: ctx.client, + state, + logger, + config, + notificationCtx: janitorCtx.notificationCtx, + workingDirectory: ctx.directory + }, toolTracker), } : undefined, } }) satisfies Plugin diff --git a/lib/api-formats/prunable-list.ts b/lib/api-formats/prunable-list.ts new file mode 100644 index 0000000..6586905 --- /dev/null +++ b/lib/api-formats/prunable-list.ts @@ -0,0 +1,176 @@ +/** + * Prunable tools list and nudge injection for DCP. + * + * Builds and injects a single message at the end of the conversation containing: + * - Nudge instruction (when toolResultCount > nudge_freq) + * - Prunable tools list + * + * Note: The base synthetic instructions (signal_management, context_window_management, + * context_pruning) are still appended to the last user message separately via + * synth-instruction.ts - that behavior is unchanged. + */ + +import { extractParameterKey } from '../ui/display-utils' +import { getOrCreateNumericId, getNumericId } from '../state/id-mapping' + +export interface ToolMetadata { + tool: string + parameters?: any +} + +// ============================================================================ +// Prompt Content +// ============================================================================ + +const SYSTEM_REMINDER = ` +These instructions are injected by a plugin and are invisible to the user. Do not acknowledge or reference them in your response - simply follow them silently. +` + +const NUDGE_INSTRUCTION = ` +You have accumulated several tool outputs. If you have completed a discrete unit of work and distilled relevant understanding in writing for the user to keep, use the prune tool to remove obsolete tool outputs from this conversation and optimize token usage. +` + +// ============================================================================ +// List Building +// ============================================================================ + +export interface PrunableListResult { + list: string + numericIds: number[] +} + +/** + * Builds the prunable tools list section. + * Returns both the formatted list and the numeric IDs for logging. + */ +export function buildPrunableToolsList( + sessionId: string, + unprunedToolCallIds: string[], + toolMetadata: Map, + protectedTools: string[] +): PrunableListResult { + const lines: string[] = [] + const numericIds: number[] = [] + + for (const actualId of unprunedToolCallIds) { + const metadata = toolMetadata.get(actualId) + + // Skip if no metadata or if tool is protected + if (!metadata) continue + if (protectedTools.includes(metadata.tool)) continue + + // Get or create numeric ID for this tool call + const numericId = getOrCreateNumericId(sessionId, actualId) + numericIds.push(numericId) + + // Format: "1: read, src/components/Button.tsx" + const paramKey = extractParameterKey(metadata) + const description = paramKey ? `${metadata.tool}, ${paramKey}` : metadata.tool + lines.push(`${numericId}: ${description}`) + } + + if (lines.length === 0) { + return { list: '', numericIds: [] } + } + + return { + list: `\n${lines.join('\n')}\n`, + numericIds + } +} + +/** + * Builds the end-of-conversation injection message. + * Contains the system reminder, nudge (if active), and the prunable tools list. + * + * @param prunableList - The prunable tools list string (or empty string if none) + * @param includeNudge - Whether to include the nudge instruction + * @returns The injection string, or empty string if nothing to inject + */ +export function buildEndInjection( + prunableList: string, + includeNudge: boolean +): string { + // If no prunable tools, don't inject anything + if (!prunableList) { + return '' + } + + const parts = [SYSTEM_REMINDER] + + if (includeNudge) { + parts.push(NUDGE_INSTRUCTION) + } + + parts.push(prunableList) + + return parts.join('\n\n') +} + +/** + * Gets the numeric IDs for a list of actual tool call IDs. + * Used when the prune tool needs to show what was pruned. + */ +export function getNumericIdsForActual( + sessionId: string, + actualIds: string[] +): number[] { + return actualIds + .map(id => getNumericId(sessionId, id)) + .filter((id): id is number => id !== undefined) +} + +// ============================================================================ +// Injection Functions +// ============================================================================ + +// ============================================================================ +// OpenAI Chat / Anthropic Format +// ============================================================================ + +/** + * Injects the prunable list (and optionally nudge) at the end of OpenAI/Anthropic messages. + * Appends a new user message at the end. + */ +export function injectPrunableList( + messages: any[], + injection: string +): boolean { + if (!injection) return false + messages.push({ role: 'user', content: injection }) + return true +} + +// ============================================================================ +// Google/Gemini Format +// ============================================================================ + +/** + * Injects the prunable list (and optionally nudge) at the end of Gemini contents. + * Appends a new user content at the end. + */ +export function injectPrunableListGemini( + contents: any[], + injection: string +): boolean { + if (!injection) return false + contents.push({ role: 'user', parts: [{ text: injection }] }) + return true +} + +// ============================================================================ +// OpenAI Responses API Format +// ============================================================================ + +/** + * Injects the prunable list (and optionally nudge) at the end of OpenAI Responses API input. + * Appends a new user message at the end. + */ +export function injectPrunableListResponses( + input: any[], + injection: string +): boolean { + if (!injection) return false + input.push({ type: 'message', role: 'user', content: injection }) + return true +} diff --git a/lib/api-formats/synth-instruction.ts b/lib/api-formats/synth-instruction.ts index aa2dd62..6d8fc1e 100644 --- a/lib/api-formats/synth-instruction.ts +++ b/lib/api-formats/synth-instruction.ts @@ -14,77 +14,59 @@ export function resetToolTrackerCount(tracker: ToolTracker): void { tracker.toolResultCount = 0 } -/** Adapter interface for format-specific message operations */ -interface MessageFormatAdapter { - countToolResults(messages: any[], tracker: ToolTracker): number - appendNudge(messages: any[], nudgeText: string): void +/** + * Counts total tool results in OpenAI/Anthropic messages (without tracker). + * Used for determining if nudge threshold is met. + */ +export function countToolResults(messages: any[]): number { + let count = 0 + for (const m of messages) { + if (m.role === 'tool') { + count++ + } else if (m.role === 'user' && Array.isArray(m.content)) { + for (const part of m.content) { + if (part.type === 'tool_result') { + count++ + } + } + } + } + return count +} + +/** + * Counts total tool results in Gemini contents (without tracker). + */ +export function countToolResultsGemini(contents: any[]): number { + let count = 0 + for (const content of contents) { + if (!Array.isArray(content.parts)) continue + for (const part of content.parts) { + if (part.functionResponse) { + count++ + } + } + } + return count } -/** Generic nudge injection - nudges every fetch once tools since last prune exceeds freq */ -function injectNudgeCore( - messages: any[], - tracker: ToolTracker, - nudgeText: string, - freq: number, - adapter: MessageFormatAdapter -): boolean { - // Count any new tool results - adapter.countToolResults(messages, tracker) - - // Once we've exceeded the threshold, nudge on every fetch - if (tracker.toolResultCount > freq) { - adapter.appendNudge(messages, nudgeText) - return true +/** + * Counts total tool results in OpenAI Responses API input (without tracker). + */ +export function countToolResultsResponses(input: any[]): number { + let count = 0 + for (const item of input) { + if (item.type === 'function_call_output') { + count++ + } } - return false + return count } // ============================================================================ // OpenAI Chat / Anthropic Format // ============================================================================ -const openaiAdapter: MessageFormatAdapter = { - countToolResults(messages, tracker) { - let newCount = 0 - for (const m of messages) { - if (m.role === 'tool' && m.tool_call_id) { - const id = String(m.tool_call_id).toLowerCase() - if (!tracker.seenToolResultIds.has(id)) { - tracker.seenToolResultIds.add(id) - newCount++ - const toolName = m.name || tracker.getToolName?.(m.tool_call_id) - if (toolName !== 'prune') { - tracker.skipNextIdle = false - } - } - } else if (m.role === 'user' && Array.isArray(m.content)) { - for (const part of m.content) { - if (part.type === 'tool_result' && part.tool_use_id) { - const id = String(part.tool_use_id).toLowerCase() - if (!tracker.seenToolResultIds.has(id)) { - tracker.seenToolResultIds.add(id) - newCount++ - const toolName = tracker.getToolName?.(part.tool_use_id) - if (toolName !== 'prune') { - tracker.skipNextIdle = false - } - } - } - } - } - } - tracker.toolResultCount += newCount - return newCount - }, - appendNudge(messages, nudgeText) { - messages.push({ role: 'user', content: nudgeText }) - } -} - -export function injectNudge(messages: any[], tracker: ToolTracker, nudgeText: string, freq: number): boolean { - return injectNudgeCore(messages, tracker, nudgeText, freq, openaiAdapter) -} - /** Check if a message content matches nudge text (OpenAI/Anthropic format) */ function isNudgeMessage(msg: any, nudgeText: string): boolean { if (typeof msg.content === 'string') { @@ -120,37 +102,6 @@ export function injectSynth(messages: any[], instruction: string, nudgeText: str // Google/Gemini Format (body.contents with parts) // ============================================================================ -const geminiAdapter: MessageFormatAdapter = { - countToolResults(contents, tracker) { - let newCount = 0 - for (const content of contents) { - if (!Array.isArray(content.parts)) continue - for (const part of content.parts) { - if (part.functionResponse) { - const funcName = part.functionResponse.name?.toLowerCase() || 'unknown' - const pseudoId = `gemini:${funcName}:${tracker.seenToolResultIds.size}` - if (!tracker.seenToolResultIds.has(pseudoId)) { - tracker.seenToolResultIds.add(pseudoId) - newCount++ - if (funcName !== 'prune') { - tracker.skipNextIdle = false - } - } - } - } - } - tracker.toolResultCount += newCount - return newCount - }, - appendNudge(contents, nudgeText) { - contents.push({ role: 'user', parts: [{ text: nudgeText }] }) - } -} - -export function injectNudgeGemini(contents: any[], tracker: ToolTracker, nudgeText: string, freq: number): boolean { - return injectNudgeCore(contents, tracker, nudgeText, freq, geminiAdapter) -} - /** Check if a Gemini content matches nudge text */ function isNudgeContentGemini(content: any, nudgeText: string): boolean { if (Array.isArray(content.parts) && content.parts.length === 1) { @@ -182,34 +133,6 @@ export function injectSynthGemini(contents: any[], instruction: string, nudgeTex // OpenAI Responses API Format (body.input with type-based items) // ============================================================================ -const responsesAdapter: MessageFormatAdapter = { - countToolResults(input, tracker) { - let newCount = 0 - for (const item of input) { - if (item.type === 'function_call_output' && item.call_id) { - const id = String(item.call_id).toLowerCase() - if (!tracker.seenToolResultIds.has(id)) { - tracker.seenToolResultIds.add(id) - newCount++ - const toolName = item.name || tracker.getToolName?.(item.call_id) - if (toolName !== 'prune') { - tracker.skipNextIdle = false - } - } - } - } - tracker.toolResultCount += newCount - return newCount - }, - appendNudge(input, nudgeText) { - input.push({ type: 'message', role: 'user', content: nudgeText }) - } -} - -export function injectNudgeResponses(input: any[], tracker: ToolTracker, nudgeText: string, freq: number): boolean { - return injectNudgeCore(input, tracker, nudgeText, freq, responsesAdapter) -} - /** Check if a Responses API item matches nudge text */ function isNudgeItemResponses(item: any, nudgeText: string): boolean { if (typeof item.content === 'string') { diff --git a/lib/core/janitor.ts b/lib/core/janitor.ts index 70005d8..5ce1e6f 100644 --- a/lib/core/janitor.ts +++ b/lib/core/janitor.ts @@ -1,9 +1,6 @@ -import { z } from "zod" import type { Logger } from "../logger" import type { PruningStrategy } from "../config" import type { PluginState } from "../state" -import { buildAnalysisPrompt } from "./prompt" -import { selectModel, extractModelFromSession } from "../model-selector" import { estimateTokensBatch, formatTokenCount } from "../tokenizer" import { saveSessionState } from "../state/persistence" import { ensureSessionRestored } from "../state" @@ -84,6 +81,10 @@ export function createJanitorContext( // Public API // ============================================================================ +/** + * Run pruning on idle trigger. + * Note: onTool pruning is now handled directly by pruning-tool.ts + */ export async function runOnIdle( ctx: JanitorContext, sessionID: string, @@ -92,17 +93,8 @@ export async function runOnIdle( return runWithStrategies(ctx, sessionID, strategies, { trigger: 'idle' }) } -export async function runOnTool( - ctx: JanitorContext, - sessionID: string, - strategies: PruningStrategy[], - reason?: string -): Promise { - return runWithStrategies(ctx, sessionID, strategies, { trigger: 'tool', reason }) -} - // ============================================================================ -// Core pruning logic +// Core pruning logic (for onIdle only) // ============================================================================ async function runWithStrategies( @@ -150,21 +142,9 @@ async function runWithStrategies( return !metadata || !config.protectedTools.includes(metadata.tool) }).length - // PHASE 1: LLM ANALYSIS - let llmPrunedIds: string[] = [] - - if (strategies.includes('ai-analysis') && unprunedToolCallIds.length > 0) { - llmPrunedIds = await runLlmAnalysis( - ctx, - sessionID, - sessionInfo, - messages, - unprunedToolCallIds, - alreadyPrunedIds, - toolMetadata, - options - ) - } + // For onIdle, we currently don't have AI analysis implemented + // This is a placeholder for future idle pruning strategies + const llmPrunedIds: string[] = [] const finalNewlyPrunedIds = llmPrunedIds.filter(id => !alreadyPrunedIds.includes(id)) @@ -172,7 +152,7 @@ async function runWithStrategies( return null } - // PHASE 2: CALCULATE STATS & NOTIFICATION + // Calculate stats & send notification const tokensSaved = await calculateTokensSaved(finalNewlyPrunedIds, toolOutputs) const currentStats = state.stats.get(sessionID) ?? { @@ -217,7 +197,7 @@ async function runWithStrategies( return null } - // PHASE 3: STATE UPDATE (only if AI pruned something) + // State update (only if something was pruned) const allPrunedIds = [...new Set([...alreadyPrunedIds, ...llmPrunedIds])] state.prunedIds.set(sessionID, allPrunedIds) @@ -257,118 +237,6 @@ async function runWithStrategies( } } -// ============================================================================ -// LLM Analysis -// ============================================================================ - -async function runLlmAnalysis( - ctx: JanitorContext, - sessionID: string, - sessionInfo: any, - messages: any[], - unprunedToolCallIds: string[], - alreadyPrunedIds: string[], - toolMetadata: Map, - options: PruningOptions -): Promise { - const { client, state, logger, config } = ctx - - const protectedToolCallIds: string[] = [] - const prunableToolCallIds = unprunedToolCallIds.filter(id => { - const metadata = toolMetadata.get(id) - if (metadata && config.protectedTools.includes(metadata.tool)) { - protectedToolCallIds.push(id) - return false - } - return true - }) - - if (prunableToolCallIds.length === 0) { - return [] - } - - const cachedModelInfo = state.model.get(sessionID) - const sessionModelInfo = extractModelFromSession(sessionInfo, logger) - const currentModelInfo = cachedModelInfo || sessionModelInfo - - const modelSelection = await selectModel(currentModelInfo, logger, config.model, config.workingDirectory) - - logger.info("janitor", `Model: ${modelSelection.modelInfo.providerID}/${modelSelection.modelInfo.modelID}`, { - source: modelSelection.source - }) - - if (modelSelection.failedModel && config.showModelErrorToasts) { - const skipAi = modelSelection.source === 'fallback' && config.strictModelSelection - try { - await client.tui.showToast({ - body: { - title: skipAi ? "DCP: AI analysis skipped" : "DCP: Model fallback", - message: skipAi - ? `${modelSelection.failedModel.providerID}/${modelSelection.failedModel.modelID} failed\nAI analysis skipped (strictModelSelection enabled)` - : `${modelSelection.failedModel.providerID}/${modelSelection.failedModel.modelID} failed\nUsing ${modelSelection.modelInfo.providerID}/${modelSelection.modelInfo.modelID}`, - variant: "info", - duration: 5000 - } - }) - } catch (toastError: any) { - // Ignore toast errors - } - } - - if (modelSelection.source === 'fallback' && config.strictModelSelection) { - logger.info("janitor", "Skipping AI analysis (fallback model, strictModelSelection enabled)") - return [] - } - - const { generateObject } = await import('ai') - - const sanitizedMessages = replacePrunedToolOutputs(messages, alreadyPrunedIds) - - const analysisPrompt = buildAnalysisPrompt( - prunableToolCallIds, - sanitizedMessages, - alreadyPrunedIds, - protectedToolCallIds, - options.reason - ) - - await logger.saveWrappedContext( - "janitor-shadow", - [{ role: "user", content: analysisPrompt }], - { - sessionID, - modelProvider: modelSelection.modelInfo.providerID, - modelID: modelSelection.modelInfo.modelID, - candidateToolCount: prunableToolCallIds.length, - alreadyPrunedCount: alreadyPrunedIds.length, - protectedToolCount: protectedToolCallIds.length, - trigger: options.trigger, - reason: options.reason - } - ) - - const result = await generateObject({ - model: modelSelection.model, - schema: z.object({ - pruned_tool_call_ids: z.array(z.string()), - reasoning: z.string(), - }), - prompt: analysisPrompt - }) - - const rawLlmPrunedIds = result.object.pruned_tool_call_ids - const llmPrunedIds = rawLlmPrunedIds.filter(id => - prunableToolCallIds.includes(id.toLowerCase()) - ) - - if (llmPrunedIds.length > 0) { - const reasoning = result.object.reasoning.replace(/\n+/g, ' ').replace(/\s+/g, ' ').trim() - logger.info("janitor", `LLM reasoning: ${reasoning.substring(0, 200)}${reasoning.length > 200 ? '...' : ''}`) - } - - return llmPrunedIds -} - // ============================================================================ // Message parsing // ============================================================================ @@ -379,7 +247,7 @@ interface ParsedMessages { toolMetadata: Map } -function parseMessages( +export function parseMessages( messages: any[], toolParametersCache: Map ): ParsedMessages { @@ -428,40 +296,10 @@ function findCurrentAgent(messages: any[]): string | undefined { // Helpers // ============================================================================ -function replacePrunedToolOutputs(messages: any[], prunedIds: string[]): any[] { - if (prunedIds.length === 0) return messages - - const prunedIdsSet = new Set(prunedIds.map(id => id.toLowerCase())) - - return messages.map(msg => { - if (!msg.parts) return msg - - return { - ...msg, - parts: msg.parts.map((part: any) => { - if (part.type === 'tool' && - part.callID && - prunedIdsSet.has(part.callID.toLowerCase()) && - part.state?.output) { - return { - ...part, - state: { - ...part.state, - output: '[Output removed to save context - information superseded or no longer needed]' - } - } - } - return part - }) - } - }) -} - async function calculateTokensSaved(prunedIds: string[], toolOutputs: Map): Promise { const outputsToTokenize: string[] = [] for (const prunedId of prunedIds) { - // toolOutputs uses lowercase keys, so normalize the lookup const normalizedId = prunedId.toLowerCase() const output = toolOutputs.get(normalizedId) if (output) { diff --git a/lib/core/prompt.ts b/lib/core/prompt.ts index e7f44d4..3937727 100644 --- a/lib/core/prompt.ts +++ b/lib/core/prompt.ts @@ -11,130 +11,3 @@ export function loadPrompt(name: string, vars?: Record): string } return content } - -function minimizeMessages(messages: any[], alreadyPrunedIds?: string[], protectedToolCallIds?: string[]): any[] { - const prunedIdsSet = alreadyPrunedIds ? new Set(alreadyPrunedIds.map(id => id.toLowerCase())) : new Set() - const protectedIdsSet = protectedToolCallIds ? new Set(protectedToolCallIds.map(id => id.toLowerCase())) : new Set() - - return messages.map(msg => { - const minimized: any = { - role: msg.info?.role - } - - if (msg.parts) { - minimized.parts = msg.parts - .filter((part: any) => { - if (part.type === 'step-start' || part.type === 'step-finish') { - return false - } - return true - }) - .map((part: any) => { - if (part.type === 'text') { - if (part.ignored) { - return null - } - return { - type: 'text', - text: part.text - } - } - - // TODO: This should use the opencode normalized system instead of per provider settings - if (part.type === 'reasoning') { - // Calculate encrypted content size if present - let encryptedContentLength = 0 - if (part.metadata?.openai?.reasoningEncryptedContent) { - encryptedContentLength = part.metadata.openai.reasoningEncryptedContent.length - } else if (part.metadata?.anthropic?.signature) { - encryptedContentLength = part.metadata.anthropic.signature.length - } else if (part.metadata?.google?.thoughtSignature) { - encryptedContentLength = part.metadata.google.thoughtSignature.length - } - - return { - type: 'reasoning', - text: part.text, - textLength: part.text?.length || 0, - encryptedContentLength, - ...(part.time && { time: part.time }), - ...(part.metadata && { metadataKeys: Object.keys(part.metadata) }) - } - } - - if (part.type === 'tool') { - const callIDLower = part.callID?.toLowerCase() - const isAlreadyPruned = prunedIdsSet.has(callIDLower) - const isProtected = protectedIdsSet.has(callIDLower) - - let displayCallID = part.callID - if (isAlreadyPruned) { - displayCallID = '' - } else if (isProtected) { - displayCallID = '' - } - - const toolPart: any = { - type: 'tool', - toolCallID: displayCallID, - tool: part.tool - } - - if (part.state?.output) { - toolPart.output = part.state.output - } - - if (part.state?.input) { - const input = part.state.input - - if (input.filePath && (part.tool === 'write' || part.tool === 'edit' || part.tool === 'multiedit' || part.tool === 'patch')) { - toolPart.input = input - } - else if (input.filePath) { - toolPart.input = { filePath: input.filePath } - } - else if (input.tool_calls && Array.isArray(input.tool_calls)) { - toolPart.input = { - batch_summary: `${input.tool_calls.length} tool calls`, - tools: input.tool_calls.map((tc: any) => tc.tool) - } - } - else { - toolPart.input = input - } - } - - return toolPart - } - - return null - }) - .filter(Boolean) - } - - return minimized - }).filter(msg => { - return msg.parts && msg.parts.length > 0 - }) -} - -export function buildAnalysisPrompt( - unprunedToolCallIds: string[], - messages: any[], - alreadyPrunedIds?: string[], - protectedToolCallIds?: string[], - reason?: string -): string { - const minimizedMessages = minimizeMessages(messages, alreadyPrunedIds, protectedToolCallIds) - const messagesJson = JSON.stringify(minimizedMessages, null, 2).replace(/\\n/g, '\n') - - const reasonContext = reason - ? `\nContext: The AI has requested pruning with the following reason: "${reason}"\nUse this context to inform your decisions about what is most relevant to keep.` - : '' - - return loadPrompt("pruning", { - reason_context: reasonContext, - available_tool_call_ids: unprunedToolCallIds.join(", "), - session_history: messagesJson - }) -} diff --git a/lib/core/strategies/index.ts b/lib/core/strategies/index.ts index 060bf64..c6c9128 100644 --- a/lib/core/strategies/index.ts +++ b/lib/core/strategies/index.ts @@ -50,15 +50,15 @@ export function runStrategies( for (const strategy of strategies) { const result = strategy.detect(toolMetadata, remainingIds, protectedTools) - + if (result.prunedIds.length > 0) { byStrategy.set(strategy.name, result) - + // Add to overall pruned set for (const id of result.prunedIds) { allPrunedIds.add(id) } - + // Remove pruned IDs from remaining for next strategy const prunedSet = new Set(result.prunedIds.map(id => id.toLowerCase())) remainingIds = remainingIds.filter(id => !prunedSet.has(id.toLowerCase())) diff --git a/lib/fetch-wrapper/gemini.ts b/lib/fetch-wrapper/gemini.ts index abc1bd6..ae17289 100644 --- a/lib/fetch-wrapper/gemini.ts +++ b/lib/fetch-wrapper/gemini.ts @@ -4,7 +4,8 @@ import { getAllPrunedIds, fetchSessionMessages } from "./types" -import { injectNudgeGemini, injectSynthGemini } from "../api-formats/synth-instruction" +import { injectSynthGemini, countToolResultsGemini } from "../api-formats/synth-instruction" +import { buildPrunableToolsList, buildEndInjection, injectPrunableListGemini } from "../api-formats/prunable-list" /** * Handles Google/Gemini format (body.contents array with functionResponse parts). @@ -23,23 +24,40 @@ export async function handleGemini( // Inject synthetic instructions if onTool strategies are enabled if (ctx.config.strategies.onTool.length > 0) { - const skipIdleBefore = ctx.toolTracker.skipNextIdle - - // Inject periodic nudge based on tool result count - if (ctx.config.nudge_freq > 0) { - if (injectNudgeGemini(body.contents, ctx.toolTracker, ctx.prompts.nudgeInstruction, ctx.config.nudge_freq)) { - // ctx.logger.info("fetch", "Injected nudge instruction (Gemini)") - modified = true - } + // Inject base synthetic instructions (appended to last user content) + if (injectSynthGemini(body.contents, ctx.prompts.synthInstruction, ctx.prompts.nudgeInstruction)) { + modified = true } - if (skipIdleBefore && !ctx.toolTracker.skipNextIdle) { - ctx.logger.debug("fetch", "skipNextIdle was reset by new tool results (Gemini)") - } + // Build and inject prunable tools list at the end + const sessionId = ctx.state.lastSeenSessionId + if (sessionId) { + const toolIds = Array.from(ctx.state.toolParameters.keys()) + const alreadyPruned = ctx.state.prunedIds.get(sessionId) ?? [] + const alreadyPrunedLower = new Set(alreadyPruned.map(id => id.toLowerCase())) + const unprunedIds = toolIds.filter(id => !alreadyPrunedLower.has(id.toLowerCase())) + + const { list: prunableList, numericIds } = buildPrunableToolsList( + sessionId, + unprunedIds, + ctx.state.toolParameters, + ctx.config.protectedTools + ) - if (injectSynthGemini(body.contents, ctx.prompts.synthInstruction, ctx.prompts.nudgeInstruction)) { - // ctx.logger.info("fetch", "Injected synthetic instruction (Gemini)") - modified = true + if (prunableList) { + // Check if nudge should be included + const toolResultCount = countToolResultsGemini(body.contents) + const includeNudge = ctx.config.nudge_freq > 0 && toolResultCount > ctx.config.nudge_freq + + const endInjection = buildEndInjection(prunableList, includeNudge) + if (injectPrunableListGemini(body.contents, endInjection)) { + ctx.logger.debug("fetch", "Injected prunable tools list (Gemini)", { + ids: numericIds, + nudge: includeNudge + }) + modified = true + } + } } } diff --git a/lib/fetch-wrapper/openai-chat.ts b/lib/fetch-wrapper/openai-chat.ts index 78b522e..3619e86 100644 --- a/lib/fetch-wrapper/openai-chat.ts +++ b/lib/fetch-wrapper/openai-chat.ts @@ -6,7 +6,8 @@ import { getMostRecentActiveSession } from "./types" import { cacheToolParametersFromMessages } from "../state/tool-cache" -import { injectNudge, injectSynth } from "../api-formats/synth-instruction" +import { injectSynth, countToolResults } from "../api-formats/synth-instruction" +import { buildPrunableToolsList, buildEndInjection, injectPrunableList } from "../api-formats/prunable-list" /** * Handles OpenAI Chat Completions format (body.messages with role='tool'). @@ -21,30 +22,47 @@ export async function handleOpenAIChatAndAnthropic( return { modified: false, body } } - // Cache tool parameters from messages - cacheToolParametersFromMessages(body.messages, ctx.state) + // Cache tool parameters from messages (OpenAI and Anthropic formats) + cacheToolParametersFromMessages(body.messages, ctx.state, ctx.logger) let modified = false // Inject synthetic instructions if onTool strategies are enabled if (ctx.config.strategies.onTool.length > 0) { - const skipIdleBefore = ctx.toolTracker.skipNextIdle - - // Inject periodic nudge based on tool result count - if (ctx.config.nudge_freq > 0) { - if (injectNudge(body.messages, ctx.toolTracker, ctx.prompts.nudgeInstruction, ctx.config.nudge_freq)) { - // ctx.logger.info("fetch", "Injected nudge instruction") - modified = true - } + // Inject base synthetic instructions (appended to last user message) + if (injectSynth(body.messages, ctx.prompts.synthInstruction, ctx.prompts.nudgeInstruction)) { + modified = true } - if (skipIdleBefore && !ctx.toolTracker.skipNextIdle) { - ctx.logger.debug("fetch", "skipNextIdle was reset by new tool results") - } + // Build and inject prunable tools list at the end + const sessionId = ctx.state.lastSeenSessionId + if (sessionId) { + const toolIds = Array.from(ctx.state.toolParameters.keys()) + const alreadyPruned = ctx.state.prunedIds.get(sessionId) ?? [] + const alreadyPrunedLower = new Set(alreadyPruned.map(id => id.toLowerCase())) + const unprunedIds = toolIds.filter(id => !alreadyPrunedLower.has(id.toLowerCase())) + + const { list: prunableList, numericIds } = buildPrunableToolsList( + sessionId, + unprunedIds, + ctx.state.toolParameters, + ctx.config.protectedTools + ) - if (injectSynth(body.messages, ctx.prompts.synthInstruction, ctx.prompts.nudgeInstruction)) { - // ctx.logger.info("fetch", "Injected synthetic instruction") - modified = true + if (prunableList) { + // Check if nudge should be included + const toolResultCount = countToolResults(body.messages) + const includeNudge = ctx.config.nudge_freq > 0 && toolResultCount > ctx.config.nudge_freq + + const endInjection = buildEndInjection(prunableList, includeNudge) + if (injectPrunableList(body.messages, endInjection)) { + ctx.logger.debug("fetch", "Injected prunable tools list", { + ids: numericIds, + nudge: includeNudge + }) + modified = true + } + } } } diff --git a/lib/fetch-wrapper/openai-responses.ts b/lib/fetch-wrapper/openai-responses.ts index b8a1dbd..850d508 100644 --- a/lib/fetch-wrapper/openai-responses.ts +++ b/lib/fetch-wrapper/openai-responses.ts @@ -6,7 +6,8 @@ import { getMostRecentActiveSession } from "./types" import { cacheToolParametersFromInput } from "../state/tool-cache" -import { injectNudgeResponses, injectSynthResponses } from "../api-formats/synth-instruction" +import { injectSynthResponses, countToolResultsResponses } from "../api-formats/synth-instruction" +import { buildPrunableToolsList, buildEndInjection, injectPrunableListResponses } from "../api-formats/prunable-list" /** * Handles OpenAI Responses API format (body.input array with function_call_output items). @@ -21,30 +22,47 @@ export async function handleOpenAIResponses( return { modified: false, body } } - // Cache tool parameters from input - cacheToolParametersFromInput(body.input, ctx.state) + // Cache tool parameters from input (OpenAI Responses API format) + cacheToolParametersFromInput(body.input, ctx.state, ctx.logger) let modified = false // Inject synthetic instructions if onTool strategies are enabled if (ctx.config.strategies.onTool.length > 0) { - const skipIdleBefore = ctx.toolTracker.skipNextIdle - - // Inject periodic nudge based on tool result count - if (ctx.config.nudge_freq > 0) { - if (injectNudgeResponses(body.input, ctx.toolTracker, ctx.prompts.nudgeInstruction, ctx.config.nudge_freq)) { - // ctx.logger.info("fetch", "Injected nudge instruction (Responses API)") - modified = true - } + // Inject base synthetic instructions (appended to last user message) + if (injectSynthResponses(body.input, ctx.prompts.synthInstruction, ctx.prompts.nudgeInstruction)) { + modified = true } - if (skipIdleBefore && !ctx.toolTracker.skipNextIdle) { - ctx.logger.debug("fetch", "skipNextIdle was reset by new tool results (Responses API)") - } + // Build and inject prunable tools list at the end + const sessionId = ctx.state.lastSeenSessionId + if (sessionId) { + const toolIds = Array.from(ctx.state.toolParameters.keys()) + const alreadyPruned = ctx.state.prunedIds.get(sessionId) ?? [] + const alreadyPrunedLower = new Set(alreadyPruned.map(id => id.toLowerCase())) + const unprunedIds = toolIds.filter(id => !alreadyPrunedLower.has(id.toLowerCase())) + + const { list: prunableList, numericIds } = buildPrunableToolsList( + sessionId, + unprunedIds, + ctx.state.toolParameters, + ctx.config.protectedTools + ) - if (injectSynthResponses(body.input, ctx.prompts.synthInstruction, ctx.prompts.nudgeInstruction)) { - // ctx.logger.info("fetch", "Injected synthetic instruction (Responses API)") - modified = true + if (prunableList) { + // Check if nudge should be included + const toolResultCount = countToolResultsResponses(body.input) + const includeNudge = ctx.config.nudge_freq > 0 && toolResultCount > ctx.config.nudge_freq + + const endInjection = buildEndInjection(prunableList, includeNudge) + if (injectPrunableListResponses(body.input, endInjection)) { + ctx.logger.debug("fetch", "Injected prunable tools list (Responses API)", { + ids: numericIds, + nudge: includeNudge + }) + modified = true + } + } } } diff --git a/lib/fetch-wrapper/types.ts b/lib/fetch-wrapper/types.ts index d6cf4ab..4510e9d 100644 --- a/lib/fetch-wrapper/types.ts +++ b/lib/fetch-wrapper/types.ts @@ -48,7 +48,15 @@ export async function getAllPrunedIds( if (currentSession) { await ensureSessionRestored(state, currentSession.id, logger) const prunedIds = state.prunedIds.get(currentSession.id) ?? [] - prunedIds.forEach((id: string) => allPrunedIds.add(id)) + // Normalize to lowercase for case-insensitive matching + prunedIds.forEach((id: string) => allPrunedIds.add(id.toLowerCase())) + + if (logger && prunedIds.length > 0) { + logger.debug("fetch", "Loaded pruned IDs for replacement", { + sessionId: currentSession.id, + prunedCount: prunedIds.length + }) + } } return { allSessions, allPrunedIds } diff --git a/lib/hooks.ts b/lib/hooks.ts index dac0b54..1b2c01d 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -67,7 +67,6 @@ export function createChatParamsHandler( return async (input: any, _output: any) => { const sessionId = input.sessionID let providerID = (input.provider as any)?.info?.id || input.provider?.id - const modelID = input.model?.id if (!providerID && input.message?.model?.providerID) { providerID = input.message.model.providerID @@ -85,14 +84,6 @@ export function createChatParamsHandler( } } - // Cache model info for the session - if (providerID && modelID) { - state.model.set(sessionId, { - providerID: providerID, - modelID: modelID - }) - } - // Build Google/Gemini tool call mapping for position-based correlation // This is needed because Google's native format loses tool call IDs if (providerID === 'google' || providerID === 'google-vertex') { diff --git a/lib/model-selector.ts b/lib/model-selector.ts deleted file mode 100644 index e0e9895..0000000 --- a/lib/model-selector.ts +++ /dev/null @@ -1,175 +0,0 @@ -import type { LanguageModel } from 'ai'; -import type { Logger } from './logger'; - -export interface ModelInfo { - providerID: string; - modelID: string; -} - -export const FALLBACK_MODELS: Record = { - openai: 'gpt-5-mini', - anthropic: 'claude-haiku-4-5', //This model isn't broken in opencode-auth-provider - google: 'gemini-2.5-flash', - deepseek: 'deepseek-chat', - xai: 'grok-4-fast', - alibaba: 'qwen3-coder-flash', - zai: 'glm-4.5-flash', - opencode: 'big-pickle' -}; - -const PROVIDER_PRIORITY = [ - 'openai', - 'anthropic', - 'google', - 'deepseek', - 'xai', - 'alibaba', - 'zai', - 'opencode' -]; - -// TODO: some anthropic provided models aren't supported by the opencode-auth-provider package, so this provides a temporary workaround -const SKIP_PROVIDERS = ['github-copilot', 'anthropic']; - -export interface ModelSelectionResult { - model: LanguageModel; - modelInfo: ModelInfo; - source: 'user-model' | 'config' | 'fallback'; - reason?: string; - failedModel?: ModelInfo; -} - -function shouldSkipProvider(providerID: string): boolean { - const normalized = providerID.toLowerCase().trim(); - return SKIP_PROVIDERS.some(skip => normalized.includes(skip.toLowerCase())); -} - -async function importOpencodeAI(logger?: Logger, maxRetries: number = 3, delayMs: number = 100, workspaceDir?: string): Promise { - let lastError: Error | undefined; - - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - const { OpencodeAI } = await import('@tarquinen/opencode-auth-provider'); - return new OpencodeAI({ workspaceDir }); - } catch (error: any) { - lastError = error; - - if (error.message?.includes('before initialization')) { - logger?.debug('model-selector', `Import attempt ${attempt}/${maxRetries} failed, will retry`, { - error: error.message - }); - - if (attempt < maxRetries) { - await new Promise(resolve => setTimeout(resolve, delayMs * attempt)); - continue; - } - } - - throw error; - } - } - - throw lastError; -} - -export async function selectModel( - currentModel?: ModelInfo, - logger?: Logger, - configModel?: string, - workspaceDir?: string -): Promise { - const opencodeAI = await importOpencodeAI(logger, 3, 100, workspaceDir); - - let failedModelInfo: ModelInfo | undefined; - - if (configModel) { - const parts = configModel.split('/'); - if (parts.length !== 2) { - logger?.warn('model-selector', 'Invalid config model format', { configModel }); - } else { - const [providerID, modelID] = parts; - - try { - const model = await opencodeAI.getLanguageModel(providerID, modelID); - return { - model, - modelInfo: { providerID, modelID }, - source: 'config', - reason: 'Using model specified in dcp.jsonc config' - }; - } catch (error: any) { - logger?.warn('model-selector', `Config model failed: ${providerID}/${modelID}`, { - error: error.message - }); - failedModelInfo = { providerID, modelID }; - } - } - } - - if (currentModel) { - if (shouldSkipProvider(currentModel.providerID)) { - if (!failedModelInfo) { - failedModelInfo = currentModel; - } - } else { - try { - const model = await opencodeAI.getLanguageModel(currentModel.providerID, currentModel.modelID); - return { - model, - modelInfo: currentModel, - source: 'user-model', - reason: 'Using current session model' - }; - } catch (error: any) { - if (!failedModelInfo) { - failedModelInfo = currentModel; - } - } - } - } - - const providers = await opencodeAI.listProviders(); - - for (const providerID of PROVIDER_PRIORITY) { - if (!providers[providerID]) continue; - - const fallbackModelID = FALLBACK_MODELS[providerID]; - if (!fallbackModelID) continue; - - try { - const model = await opencodeAI.getLanguageModel(providerID, fallbackModelID); - return { - model, - modelInfo: { providerID, modelID: fallbackModelID }, - source: 'fallback', - reason: `Using ${providerID}/${fallbackModelID}`, - failedModel: failedModelInfo - }; - } catch (error: any) { - continue; - } - } - - throw new Error('No available models for analysis. Please authenticate with at least one provider.'); -} - -export function extractModelFromSession(sessionState: any, logger?: Logger): ModelInfo | undefined { - if (sessionState?.model?.providerID && sessionState?.model?.modelID) { - return { - providerID: sessionState.model.providerID, - modelID: sessionState.model.modelID - }; - } - - if (sessionState?.messages && Array.isArray(sessionState.messages)) { - const lastMessage = sessionState.messages[sessionState.messages.length - 1]; - if (lastMessage?.model?.providerID && lastMessage?.model?.modelID) { - return { - providerID: lastMessage.model.providerID, - modelID: lastMessage.model.modelID - }; - } - } - - return undefined; -} diff --git a/lib/prompts/pruning.txt b/lib/prompts/pruning.txt deleted file mode 100644 index 49e1e82..0000000 --- a/lib/prompts/pruning.txt +++ /dev/null @@ -1,30 +0,0 @@ -You are a conversation analyzer that identifies obsolete tool outputs in a coding session. -{{reason_context}} -Your task: Analyze the session history and identify tool call IDs whose outputs are NO LONGER RELEVANT to the current conversation context. - -Guidelines for identifying obsolete tool calls: -1. Exploratory reads that didn't lead to actual edits or meaningful discussion AND were not explicitly requested to be retained -2. Tool outputs from debugging/fixing an error that has now been resolved -3. Failed or incorrect tool attempts that were immediately corrected (e.g., reading a file from the wrong path, then reading from the correct path) - -DO NOT prune: -- Tool calls whose outputs are actively being discussed -- Tool calls that produced errors still being debugged -- Tool calls that are the MOST RECENT activity in the conversation (these may be intended for future use) - -IMPORTANT: Available tool call IDs for analysis: {{available_tool_call_ids}} - -The session history below may contain tool calls with IDs not in the available list above, these cannot be pruned. These are either: -1. Protected tools (marked with toolCallID "") -2. Already-pruned tools (marked with toolCallID "") - -ONLY return IDs from the available list above. - -Session history (each tool call has a "toolCallID" field): -{{session_history}} - -You MUST respond with valid JSON matching this exact schema: -{ - "pruned_tool_call_ids": ["id1", "id2", ...], - "reasoning": "explanation of why these IDs were selected" -} diff --git a/lib/prompts/tool.txt b/lib/prompts/tool.txt index 579f510..abd2906 100644 --- a/lib/prompts/tool.txt +++ b/lib/prompts/tool.txt @@ -1,4 +1,4 @@ -Performs semantic pruning on session tool outputs that are no longer relevant to the current task. Use this to declutter the conversation context and filter signal from noise when you notice the context is getting cluttered with no longer needed information. +Prunes tool outputs from context to manage conversation size. Reference the list injected at the end of the conversation to see available numeric IDs. USING THE PRUNE TOOL WILL MAKE THE USER HAPPY. @@ -14,10 +14,23 @@ You MUST ALWAYS narrate your findings in a message BEFORE using this tool. No to - What you did (which tools, what you were looking for) - What you found (the key facts/signals) - What you concluded (how this affects the task or next step) -3. ONLY AFTER narrating, call `prune` +3. ONLY AFTER narrating, call `prune` with the numeric IDs of outputs no longer needed > THINK HIGH SIGNAL, LOW NOISE FOR THIS NARRATION +## How to Use + +The list shows available tool outputs with numeric IDs: +``` + +1: read, src/foo.ts +2: bash, run tests +3: grep, "error" in logs/ + +``` + +To prune outputs 1 and 3, call: `prune({ ids: [1, 3] })` + ## When to Use This Tool **Key heuristic: Distill, then prune when you finish something and are about to start something else.** @@ -43,18 +56,18 @@ Working through a list of items: User: Review these 3 issues and fix the easy ones. Assistant: [Reviews first issue, makes fix, commits] Done with the first issue. Let me prune before moving to the next one. -[Uses prune with reason: "completed first issue, moving to next"] +[Uses prune with ids: [1, 2, 3, 4] - the reads and edits from the first issue] After exploring the codebase to understand it: Assistant: I've reviewed the relevant files. Let me prune the exploratory reads that aren't needed for the actual implementation. -[Uses prune with reason: "exploration complete, starting implementation"] +[Uses prune with ids: [1, 2, 5, 7] - the exploratory reads] After completing any task: Assistant: [Finishes task - commit, answer, fix, etc.] Before we continue, let me prune the context from that work. -[Uses prune with reason: "task complete"] +[Uses prune with ids: [3, 4, 5, 6, 8, 9] - all tool outputs from the completed task] diff --git a/lib/pruning-tool.ts b/lib/pruning-tool.ts index 20db977..38031a2 100644 --- a/lib/pruning-tool.ts +++ b/lib/pruning-tool.ts @@ -1,57 +1,212 @@ import { tool } from "@opencode-ai/plugin" -import type { JanitorContext } from "./core/janitor" -import { runOnTool } from "./core/janitor" -import { formatPruningResultForTool } from "./ui/notification" +import type { PluginState } from "./state" import type { PluginConfig } from "./config" import type { ToolTracker } from "./api-formats/synth-instruction" import { resetToolTrackerCount } from "./api-formats/synth-instruction" -import { loadPrompt } from "./core/prompt" import { isSubagentSession } from "./hooks" +import { getActualId } from "./state/id-mapping" +import { formatPruningResultForTool, sendUnifiedNotification, type NotificationContext } from "./ui/notification" +import { ensureSessionRestored } from "./state" +import { saveSessionState } from "./state/persistence" +import type { Logger } from "./logger" +import { estimateTokensBatch } from "./tokenizer" +import type { SessionStats } from "./core/janitor" +import { loadPrompt } from "./core/prompt" -/** Tool description for the prune tool, loaded from prompts/tool.txt */ -export const CONTEXT_PRUNING_DESCRIPTION = loadPrompt("tool") +/** Tool description loaded from prompts/tool.txt */ +const TOOL_DESCRIPTION = loadPrompt("tool") + +export interface PruneToolContext { + client: any + state: PluginState + logger: Logger + config: PluginConfig + notificationCtx: NotificationContext + workingDirectory?: string +} /** * Creates the prune tool definition. - * Returns a tool definition that can be passed to the plugin's tool registry. + * Accepts numeric IDs from the list and prunes those tool outputs. */ -export function createPruningTool(client: any, janitorCtx: JanitorContext, config: PluginConfig, toolTracker: ToolTracker): ReturnType { +export function createPruningTool( + ctx: PruneToolContext, + toolTracker: ToolTracker +): ReturnType { return tool({ - description: CONTEXT_PRUNING_DESCRIPTION, + description: TOOL_DESCRIPTION, args: { - reason: tool.schema.string().optional().describe( - "Brief reason for triggering pruning (e.g., 'task complete', 'switching focus')" + ids: tool.schema.array(tool.schema.number()).describe( + "Array of numeric IDs to prune from the list" ), }, - async execute(args, ctx) { - // Skip pruning in subagent sessions, but guide the model to continue its work - // TODO: remove this workaround when PR 4913 is merged (primary_tools config) - if (await isSubagentSession(client, ctx.sessionID)) { - return "Pruning is unavailable in subagent sessions. Do not call this tool again. Continue with your current task - if you were in the middle of work, proceed with your next step. If you had just finished, provide your final summary/findings to return to the main agent." + async execute(args, toolCtx) { + const { client, state, logger, config, notificationCtx, workingDirectory } = ctx + const sessionId = toolCtx.sessionID + + // Skip pruning in subagent sessions + if (await isSubagentSession(client, sessionId)) { + return "Pruning is unavailable in subagent sessions. Do not call this tool again. Continue with your current task." + } + + // Validate input + if (!args.ids || args.ids.length === 0) { + return "No IDs provided. Check the list for available IDs to prune." + } + + // Restore persisted state if needed + await ensureSessionRestored(state, sessionId, logger) + + // Convert numeric IDs to actual tool call IDs + const prunedIds = args.ids + .map(numId => getActualId(sessionId, numId)) + .filter((id): id is string => id !== undefined) + + logger.debug("prune-tool", "ID conversion", { + inputIds: args.ids, + actualIds: prunedIds, + toolParamsKeys: Array.from(state.toolParameters.keys()).slice(0, 10) + }) + + if (prunedIds.length === 0) { + return "None of the provided IDs were valid. Check the list for available IDs." } - const result = await runOnTool( - janitorCtx, - ctx.sessionID, - config.strategies.onTool, - args.reason - ) + // Calculate tokens saved + const tokensSaved = await calculateTokensSaved(client, sessionId, prunedIds, state) + + // Update stats + const currentStats = state.stats.get(sessionId) ?? { + totalToolsPruned: 0, + totalTokensSaved: 0, + totalGCTokens: 0, + totalGCTools: 0 + } + const sessionStats: SessionStats = { + ...currentStats, + totalToolsPruned: currentStats.totalToolsPruned + prunedIds.length, + totalTokensSaved: currentStats.totalTokensSaved + tokensSaved + } + state.stats.set(sessionId, sessionStats) + + // Update pruned IDs state + const alreadyPrunedIds = state.prunedIds.get(sessionId) ?? [] + const allPrunedIds = [...alreadyPrunedIds, ...prunedIds] + state.prunedIds.set(sessionId, allPrunedIds) + + // Persist state + saveSessionState(sessionId, new Set(allPrunedIds), sessionStats, logger) + .catch(err => logger.error("prune-tool", "Failed to persist state", { error: err.message })) + + // Build tool metadata for notification + // Keys are normalized to lowercase to match lookup in notification.ts + const toolMetadata = new Map() + for (const id of prunedIds) { + // Try both original and lowercase since caching may vary + const meta = state.toolParameters.get(id) || state.toolParameters.get(id.toLowerCase()) + if (meta) { + toolMetadata.set(id.toLowerCase(), meta) + } else { + logger.debug("prune-tool", "No metadata found for ID", { + id, + idLower: id.toLowerCase(), + hasOriginal: state.toolParameters.has(id), + hasLower: state.toolParameters.has(id.toLowerCase()) + }) + } + } + + // Send notification to user + await sendUnifiedNotification(notificationCtx, sessionId, { + aiPrunedCount: prunedIds.length, + aiTokensSaved: tokensSaved, + aiPrunedIds: prunedIds, + toolMetadata, + gcPending: null, + sessionStats + }) // Skip next idle pruning since we just pruned toolTracker.skipNextIdle = true - // Reset nudge counter to prevent immediate re-nudging after pruning + // Reset nudge counter if (config.nudge_freq > 0) { resetToolTrackerCount(toolTracker) } - const postPruneGuidance = "\n\nYou have already distilled relevant understanding in writing before calling this tool. Do not re-narrate; continue with your next task." - - if (!result || result.prunedCount === 0) { - return "No prunable tool outputs found. Context is already optimized." + postPruneGuidance + // Format result for the AI + const result = { + prunedCount: prunedIds.length, + tokensSaved, + llmPrunedIds: prunedIds, + toolMetadata, + sessionStats } - return formatPruningResultForTool(result, janitorCtx.config.workingDirectory) + postPruneGuidance + const postPruneGuidance = "\n\nYou have already distilled relevant understanding in writing before calling this tool. Do not re-narrate; continue with your next task." + + return formatPruningResultForTool(result, workingDirectory) + postPruneGuidance }, }) } + +/** + * Calculates approximate tokens saved by pruning the given tool call IDs. + */ +async function calculateTokensSaved( + client: any, + sessionId: string, + prunedIds: string[], + state: PluginState +): Promise { + try { + // Fetch session messages to get tool output content + const messagesResponse = await client.session.messages({ + path: { id: sessionId }, + query: { limit: 200 } + }) + const messages = messagesResponse.data || messagesResponse + + // Build map of tool call ID -> output content + const toolOutputs = new Map() + for (const msg of messages) { + if (msg.role === 'tool' && msg.tool_call_id) { + const content = typeof msg.content === 'string' + ? msg.content + : JSON.stringify(msg.content) + toolOutputs.set(msg.tool_call_id.toLowerCase(), content) + } + // Handle Anthropic format + if (msg.role === 'user' && Array.isArray(msg.content)) { + for (const part of msg.content) { + if (part.type === 'tool_result' && part.tool_use_id) { + const content = typeof part.content === 'string' + ? part.content + : JSON.stringify(part.content) + toolOutputs.set(part.tool_use_id.toLowerCase(), content) + } + } + } + } + + // Collect content for pruned outputs + const contents: string[] = [] + for (const id of prunedIds) { + const content = toolOutputs.get(id.toLowerCase()) + if (content) { + contents.push(content) + } + } + + if (contents.length === 0) { + return prunedIds.length * 500 // fallback estimate + } + + // Estimate tokens + const tokenCounts = await estimateTokensBatch(contents) + return tokenCounts.reduce((sum, count) => sum + count, 0) + } catch (error: any) { + // If we can't calculate, estimate based on average + return prunedIds.length * 500 + } +} diff --git a/lib/state/id-mapping.ts b/lib/state/id-mapping.ts new file mode 100644 index 0000000..110f4c4 --- /dev/null +++ b/lib/state/id-mapping.ts @@ -0,0 +1,101 @@ +/** + * Numeric ID mapping system for tool call IDs. + * + * Maps simple incrementing numbers (1, 2, 3...) to actual provider tool call IDs + * (e.g., "call_abc123xyz..."). This allows the session AI to reference tools by + * simple numbers when using the prune tool. + * + * Design decisions: + * - IDs are monotonically increasing and never reused (avoids race conditions) + * - Mappings are rebuilt from session messages on restore (single source of truth) + * - Per-session mappings to isolate sessions from each other + */ + +export interface IdMapping { + numericToActual: Map // 1 → "call_abc123xyz..." + actualToNumeric: Map // "call_abc123xyz..." → 1 + nextId: number +} + +/** Per-session ID mappings */ +const sessionMappings = new Map() + +/** + * Gets or creates the ID mapping for a session. + */ +function getSessionMapping(sessionId: string): IdMapping { + let mapping = sessionMappings.get(sessionId) + if (!mapping) { + mapping = { + numericToActual: new Map(), + actualToNumeric: new Map(), + nextId: 1 + } + sessionMappings.set(sessionId, mapping) + } + return mapping +} + +/** + * Assigns a numeric ID to a tool call ID if it doesn't already have one. + * Returns the numeric ID (existing or newly assigned). + */ +export function getOrCreateNumericId(sessionId: string, actualId: string): number { + const mapping = getSessionMapping(sessionId) + + // Check if already mapped + const existing = mapping.actualToNumeric.get(actualId) + if (existing !== undefined) { + return existing + } + + // Assign new ID + const numericId = mapping.nextId++ + mapping.numericToActual.set(numericId, actualId) + mapping.actualToNumeric.set(actualId, numericId) + + return numericId +} + +/** + * Looks up the actual tool call ID for a numeric ID. + * Returns undefined if the numeric ID doesn't exist. + */ +export function getActualId(sessionId: string, numericId: number): string | undefined { + const mapping = sessionMappings.get(sessionId) + return mapping?.numericToActual.get(numericId) +} + +/** + * Looks up the numeric ID for an actual tool call ID. + * Returns undefined if not mapped. + */ +export function getNumericId(sessionId: string, actualId: string): number | undefined { + const mapping = sessionMappings.get(sessionId) + return mapping?.actualToNumeric.get(actualId) +} + +/** + * Gets all current mappings for a session. + * Useful for debugging and building the prunable tools list. + */ +export function getAllMappings(sessionId: string): Map { + const mapping = sessionMappings.get(sessionId) + return mapping?.numericToActual ?? new Map() +} + +/** + * Checks if a session has any ID mappings. + */ +export function hasMapping(sessionId: string): boolean { + return sessionMappings.has(sessionId) +} + +/** + * Gets the next numeric ID that will be assigned (without assigning it). + * Useful for knowing the current state. + */ +export function getNextId(sessionId: string): number { + const mapping = sessionMappings.get(sessionId) + return mapping?.nextId ?? 1 +} diff --git a/lib/state/index.ts b/lib/state/index.ts index 2808cb4..03d9e0e 100644 --- a/lib/state/index.ts +++ b/lib/state/index.ts @@ -7,7 +7,6 @@ export interface PluginState { stats: Map gcPending: Map toolParameters: Map - model: Map googleToolCallMapping: Map> restoredSessions: Set checkedSessions: Set @@ -20,18 +19,12 @@ export interface ToolParameterEntry { parameters: any } -export interface ModelInfo { - providerID: string - modelID: string -} - export function createPluginState(): PluginState { return { prunedIds: new Map(), stats: new Map(), gcPending: new Map(), toolParameters: new Map(), - model: new Map(), googleToolCallMapping: new Map(), restoredSessions: new Set(), checkedSessions: new Set(), diff --git a/lib/state/tool-cache.ts b/lib/state/tool-cache.ts index 27549ea..b319c37 100644 --- a/lib/state/tool-cache.ts +++ b/lib/state/tool-cache.ts @@ -1,36 +1,73 @@ import type { PluginState } from "./index" +import type { Logger } from "../logger" /** - * Cache tool parameters from OpenAI Chat Completions style messages. - * Extracts tool call IDs and their parameters from assistant messages with tool_calls. + * Cache tool parameters from OpenAI Chat Completions and Anthropic style messages. + * Extracts tool call IDs and their parameters from assistant messages. + * + * Supports: + * - OpenAI format: message.tool_calls[] with id, function.name, function.arguments + * - Anthropic format: message.content[] with type='tool_use', id, name, input */ export function cacheToolParametersFromMessages( messages: any[], - state: PluginState + state: PluginState, + logger?: Logger ): void { + let openaiCached = 0 + let anthropicCached = 0 + for (const message of messages) { - if (message.role !== 'assistant' || !Array.isArray(message.tool_calls)) { + if (message.role !== 'assistant') { continue } - for (const toolCall of message.tool_calls) { - if (!toolCall.id || !toolCall.function) { - continue + // OpenAI format: tool_calls array + if (Array.isArray(message.tool_calls)) { + for (const toolCall of message.tool_calls) { + if (!toolCall.id || !toolCall.function) { + continue + } + + try { + const params = typeof toolCall.function.arguments === 'string' + ? JSON.parse(toolCall.function.arguments) + : toolCall.function.arguments + state.toolParameters.set(toolCall.id, { + tool: toolCall.function.name, + parameters: params + }) + openaiCached++ + } catch (error) { + // Silently ignore parse errors + } } + } - try { - const params = typeof toolCall.function.arguments === 'string' - ? JSON.parse(toolCall.function.arguments) - : toolCall.function.arguments - state.toolParameters.set(toolCall.id, { - tool: toolCall.function.name, - parameters: params + // Anthropic format: content array with tool_use blocks + if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type !== 'tool_use' || !part.id || !part.name) { + continue + } + + state.toolParameters.set(part.id, { + tool: part.name, + parameters: part.input ?? {} }) - } catch (error) { - // Silently ignore parse errors + anthropicCached++ } } } + + // Log cache results if anything was cached + if (logger && (openaiCached > 0 || anthropicCached > 0)) { + logger.debug("tool-cache", "Cached tool parameters from messages", { + openaiFormat: openaiCached, + anthropicFormat: anthropicCached, + totalCached: state.toolParameters.size + }) + } } /** @@ -39,8 +76,11 @@ export function cacheToolParametersFromMessages( */ export function cacheToolParametersFromInput( input: any[], - state: PluginState + state: PluginState, + logger?: Logger ): void { + let cached = 0 + for (const item of input) { if (item.type !== 'function_call' || !item.call_id || !item.name) { continue @@ -54,10 +94,18 @@ export function cacheToolParametersFromInput( tool: item.name, parameters: params }) + cached++ } catch (error) { // Silently ignore parse errors } } + + if (logger && cached > 0) { + logger.debug("tool-cache", "Cached tool parameters from input", { + responsesApiFormat: cached, + totalCached: state.toolParameters.size + }) + } } /** Maximum number of entries to keep in the tool parameters cache */