diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 6a38c24..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,51 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Build Commands - -```bash -npm run build # Clean and compile TypeScript -npm run typecheck # Type check without emitting -npm run dev # Run in OpenCode plugin dev mode -npm run test # Run tests (node --import tsx --test tests/*.test.ts) -``` - -## Architecture - -This is an OpenCode plugin that optimizes token usage by pruning obsolete tool outputs from conversation context. The plugin is non-destructive—pruning state is kept in memory only, with original session data remaining intact. - -### Core Components - -**index.ts** - Plugin entry point. Registers: -- Global fetch wrapper that intercepts LLM requests and replaces pruned tool outputs with placeholder text -- Event handler for `session.status` idle events triggering automatic pruning -- `chat.params` hook to cache session model info -- `context_pruning` tool for AI-initiated pruning - -**lib/janitor.ts** - Orchestrates the two-phase pruning process: -1. Deduplication phase: Fast, zero-cost detection of repeated tool calls (keeps most recent) -2. AI analysis phase: Uses LLM to semantically identify obsolete outputs - -**lib/deduplicator.ts** - Implements duplicate detection by creating normalized signatures from tool name + parameters - -**lib/model-selector.ts** - Model selection cascade: config model → session model → fallback models (with provider priority order) - -**lib/config.ts** - Config loading with precedence: defaults → global (~/.config/opencode/dcp.jsonc) → project (.opencode/dcp.jsonc) - -**lib/prompt.ts** - Builds the analysis prompt with minimized message history for LLM evaluation - -### Key Concepts - -- **Tool call IDs**: Normalized to lowercase for consistent matching -- **Protected tools**: Never pruned (default: task, todowrite, todoread, context_pruning) -- **Batch tool expansion**: When a batch tool is pruned, its child tool calls are also pruned -- **Strategies**: `deduplication` (fast) and `ai-analysis` (thorough), configurable per trigger (`onIdle`, `onTool`) - -### State Management - -Plugin maintains in-memory state per session: -- `prunedIdsState`: Map of session ID → array of pruned tool call IDs -- `statsState`: Map of session ID → cumulative pruning statistics -- `toolParametersCache`: Cached tool parameters extracted from LLM request bodies -- `modelCache`: Cached provider/model info from chat.params hook diff --git a/README.md b/README.md index 5de6ef4..1d93792 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Add to your OpenCode config: ```jsonc // opencode.jsonc { - "plugin": ["@tarquinen/opencode-dcp@0.3.24"] + "plugin": ["@tarquinen/opencode-dcp@0.3.25"] } ``` @@ -31,13 +31,19 @@ DCP implements two complementary strategies: ## Context Pruning Tool -When `strategies.onTool` is enabled, DCP exposes a `context_pruning` tool to Opencode that the AI can call to trigger pruning on demand. To help the AI use this tool effectively, DCP also injects guidance. +When `strategies.onTool` is enabled, DCP exposes a `context_pruning` tool to Opencode that the AI can call to trigger pruning on demand. When `nudge_freq` is enabled, injects reminders (every `nudge_freq` tool results) prompting the AI to consider pruning when appropriate. ## How It Works -DCP is **non-destructive**—pruning state is kept in memory only. When requests go to your LLM, DCP replaces pruned outputs with a placeholder; original session data stays intact. +Your session history is never modified. DCP replaces pruned outputs with a placeholder before sending requests to your LLM. + +## Impact on Prompt Caching + +LLM providers like Anthropic and OpenAI cache prompts based on exact prefix matching. When DCP prunes a tool output, it changes the message content, which invalidates cached prefixes from that point forward. + +**Trade-off:** You lose some cache read benefits but gain larger token savings from reduced context size. In most cases, the token savings outweigh the cache miss cost—especially in long sessions where context bloat becomes significant. ## Configuration @@ -53,7 +59,7 @@ DCP uses its own config file (`~/.config/opencode/dcp.jsonc` or `.opencode/dcp.j | `showModelErrorToasts` | `true` | Show notifications on model fallback | | `strictModelSelection` | `false` | Only run AI analysis with session or configured model (disables fallback models) | | `pruning_summary` | `"detailed"` | `"off"`, `"minimal"`, or `"detailed"` | -| `nudge_freq` | `5` | Remind AI to prune every N tool results (0 = disabled) | +| `nudge_freq` | `10` | How often to remind AI to prune (lower = more frequent) | | `protectedTools` | `["task", "todowrite", "todoread", "context_pruning"]` | Tools that are never pruned | | `strategies.onIdle` | `["deduplication", "ai-analysis"]` | Strategies for automatic pruning | | `strategies.onTool` | `["deduplication", "ai-analysis"]` | Strategies when AI calls `context_pruning` | diff --git a/index.ts b/index.ts index 84ebed9..baee7a7 100644 --- a/index.ts +++ b/index.ts @@ -25,7 +25,7 @@ const plugin: Plugin = (async (ctx) => { // Initialize core components const logger = new Logger(config.debug) const state = createPluginState() - + const janitor = new Janitor( ctx.client, state.prunedIds, @@ -43,6 +43,13 @@ const plugin: Plugin = (async (ctx) => { // Create tool tracker and load prompts for synthetic instruction injection const toolTracker = createToolTracker() + + // Wire up tool name lookup from the cached tool parameters + toolTracker.getToolName = (callId: string) => { + const entry = state.toolParameters.get(callId) + return entry?.tool + } + const prompts = { synthInstruction: loadPrompt("synthetic"), nudgeInstruction: loadPrompt("nudge") @@ -81,10 +88,10 @@ const plugin: Plugin = (async (ctx) => { } return { - event: createEventHandler(ctx.client, janitor, logger, config), + event: createEventHandler(ctx.client, janitor, logger, config, toolTracker), "chat.params": createChatParamsHandler(ctx.client, state, logger), tool: config.strategies.onTool.length > 0 ? { - context_pruning: createPruningTool(janitor, config), + context_pruning: createPruningTool(janitor, config, toolTracker), } : undefined, } }) satisfies Plugin diff --git a/lib/config.ts b/lib/config.ts index 6f39c4a..a7fe23e 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -34,7 +34,7 @@ const defaultConfig: PluginConfig = { showModelErrorToasts: true, strictModelSelection: false, pruning_summary: 'detailed', - nudge_freq: 5, + nudge_freq: 10, strategies: { onIdle: ['deduplication', 'ai-analysis'], onTool: ['deduplication', 'ai-analysis'] @@ -122,7 +122,7 @@ function createDefaultConfig(): void { // Summary display: "off", "minimal", or "detailed" "pruning_summary": "detailed", // How often to nudge the AI to prune (every N tool results, 0 = disabled) - "nudge_freq": 5, + "nudge_freq": 10, // Tools that should never be pruned "protectedTools": ["task", "todowrite", "todoread", "context_pruning"] } diff --git a/lib/fetch-wrapper/gemini.ts b/lib/fetch-wrapper/gemini.ts index d9dd023..f2697a8 100644 --- a/lib/fetch-wrapper/gemini.ts +++ b/lib/fetch-wrapper/gemini.ts @@ -23,6 +23,8 @@ export async function handleGemini( // Inject synthetic instructions if onTool strategies are enabled if (ctx.config.strategies.onTool.length > 0) { + const skipIdleBefore = ctx.toolTracker.skipNextIdle + // Inject periodic nudge based on tool result count if (ctx.config.nudge_freq > 0) { if (injectNudgeGemini(body.contents, ctx.toolTracker, ctx.prompts.nudgeInstruction, ctx.config.nudge_freq)) { @@ -31,7 +33,10 @@ export async function handleGemini( } } - // Inject synthetic instruction into last user content + if (skipIdleBefore && !ctx.toolTracker.skipNextIdle) { + ctx.logger.debug("fetch", "skipNextIdle was reset by new tool results (Gemini)") + } + if (injectSynthGemini(body.contents, ctx.prompts.synthInstruction)) { ctx.logger.info("fetch", "Injected synthetic instruction (Gemini)") modified = true diff --git a/lib/fetch-wrapper/openai-chat.ts b/lib/fetch-wrapper/openai-chat.ts index ba137dc..2483baf 100644 --- a/lib/fetch-wrapper/openai-chat.ts +++ b/lib/fetch-wrapper/openai-chat.ts @@ -28,6 +28,8 @@ export async function handleOpenAIChatAndAnthropic( // Inject synthetic instructions if onTool strategies are enabled if (ctx.config.strategies.onTool.length > 0) { + const skipIdleBefore = ctx.toolTracker.skipNextIdle + // Inject periodic nudge based on tool result count if (ctx.config.nudge_freq > 0) { if (injectNudge(body.messages, ctx.toolTracker, ctx.prompts.nudgeInstruction, ctx.config.nudge_freq)) { @@ -36,7 +38,10 @@ export async function handleOpenAIChatAndAnthropic( } } - // Inject synthetic instruction into last user message + if (skipIdleBefore && !ctx.toolTracker.skipNextIdle) { + ctx.logger.debug("fetch", "skipNextIdle was reset by new tool results") + } + if (injectSynth(body.messages, ctx.prompts.synthInstruction)) { ctx.logger.info("fetch", "Injected synthetic instruction") modified = true diff --git a/lib/fetch-wrapper/openai-responses.ts b/lib/fetch-wrapper/openai-responses.ts index 785852d..0725d22 100644 --- a/lib/fetch-wrapper/openai-responses.ts +++ b/lib/fetch-wrapper/openai-responses.ts @@ -28,6 +28,8 @@ export async function handleOpenAIResponses( // Inject synthetic instructions if onTool strategies are enabled if (ctx.config.strategies.onTool.length > 0) { + const skipIdleBefore = ctx.toolTracker.skipNextIdle + // Inject periodic nudge based on tool result count if (ctx.config.nudge_freq > 0) { if (injectNudgeResponses(body.input, ctx.toolTracker, ctx.prompts.nudgeInstruction, ctx.config.nudge_freq)) { @@ -36,7 +38,10 @@ export async function handleOpenAIResponses( } } - // Inject synthetic instruction into last user message + if (skipIdleBefore && !ctx.toolTracker.skipNextIdle) { + ctx.logger.debug("fetch", "skipNextIdle was reset by new tool results (Responses API)") + } + if (injectSynthResponses(body.input, ctx.prompts.synthInstruction)) { ctx.logger.info("fetch", "Injected synthetic instruction (Responses API)") modified = true diff --git a/lib/hooks.ts b/lib/hooks.ts index aefb10e..6b7fb4d 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -1,11 +1,10 @@ import type { PluginState } from "./state" import type { Logger } from "./logger" import type { Janitor } from "./janitor" -import type { PluginConfig } from "./config" +import type { PluginConfig, PruningStrategy } from "./config" +import type { ToolTracker } from "./synth-instruction" +import { resetToolTrackerCount } from "./synth-instruction" -/** - * Checks if a session is a subagent session. - */ export async function isSubagentSession(client: any, sessionID: string): Promise { try { const result = await client.session.get({ path: { id: sessionID } }) @@ -15,23 +14,43 @@ export async function isSubagentSession(client: any, sessionID: string): Promise } } -/** - * Creates the event handler for session status changes. - */ +function toolStrategiesCoveredByIdle(onIdle: PruningStrategy[], onTool: PruningStrategy[]): boolean { + return onTool.every(strategy => onIdle.includes(strategy)) +} + export function createEventHandler( client: any, janitor: Janitor, logger: Logger, - config: PluginConfig + config: PluginConfig, + toolTracker?: ToolTracker ) { return async ({ event }: { event: any }) => { if (event.type === "session.status" && event.properties.status.type === "idle") { if (await isSubagentSession(client, event.properties.sessionID)) return if (config.strategies.onIdle.length === 0) return - janitor.runOnIdle(event.properties.sessionID, config.strategies.onIdle).catch(err => { + // Skip idle pruning if the last tool used was context_pruning + // and idle strategies cover the same work as tool strategies + if (toolTracker?.skipNextIdle) { + toolTracker.skipNextIdle = false + if (toolStrategiesCoveredByIdle(config.strategies.onIdle, config.strategies.onTool)) { + return + } + } + + try { + const result = await janitor.runOnIdle(event.properties.sessionID, config.strategies.onIdle) + + // Reset nudge counter if idle pruning succeeded and covers tool strategies + if (result && result.prunedCount > 0 && toolTracker && config.nudge_freq > 0) { + if (toolStrategiesCoveredByIdle(config.strategies.onIdle, config.strategies.onTool)) { + resetToolTrackerCount(toolTracker, config.nudge_freq) + } + } + } catch (err: any) { logger.error("janitor", "Failed", { error: err.message }) - }) + } } } } diff --git a/lib/janitor.ts b/lib/janitor.ts index 94208a1..24ffb29 100644 --- a/lib/janitor.ts +++ b/lib/janitor.ts @@ -62,8 +62,8 @@ export class Janitor { } } - async runOnIdle(sessionID: string, strategies: PruningStrategy[]): Promise { - await this.runWithStrategies(sessionID, strategies, { trigger: 'idle' }) + async runOnIdle(sessionID: string, strategies: PruningStrategy[]): Promise { + return await this.runWithStrategies(sessionID, strategies, { trigger: 'idle' }) } async runForTool( diff --git a/lib/prompts/nudge.txt b/lib/prompts/nudge.txt index 2daf389..c68fbcb 100644 --- a/lib/prompts/nudge.txt +++ b/lib/prompts/nudge.txt @@ -1,3 +1,7 @@ + +This nudge is injected by a plugin and is invisible to the user. Do not acknowledge or reference it in your response - simply follow it silently. + + You have accumulated several tool outputs. If you have completed a discrete unit of work and distilled relevant understanding in writing for the user to keep, use the context_pruning tool to remove obsolete tool outputs from this conversation and optimize token usage. diff --git a/lib/prompts/synthetic.txt b/lib/prompts/synthetic.txt index 4a3c80b..a2bc18e 100644 --- a/lib/prompts/synthetic.txt +++ b/lib/prompts/synthetic.txt @@ -1,3 +1,7 @@ + +These instructions are injected by a plugin and are invisible to the user. Do not acknowledge or reference them in your response - simply follow them silently. + + After calling a series of tools, YOU ALWAYS HAVE TO distill signals from their results in your next message. This ensures tracability of your reasoning, informs the user of your findings, and is PARAMOUNT to best context window management practices. diff --git a/lib/prompts/tool.txt b/lib/prompts/tool.txt index d1fd302..e71e7cb 100644 --- a/lib/prompts/tool.txt +++ b/lib/prompts/tool.txt @@ -2,11 +2,27 @@ Performs semantic pruning on session tool outputs that are no longer relevant to USING THE CONTEXT_PRUNING TOOL WILL MAKE THE USER HAPPY. +## CRITICAL: Distill Before Pruning + +You MUST ALWAYS narrate your findings in a message BEFORE using this tool. No tool result (read, bash, grep, webfetch, etc.) should be left unexplained. By narrating your understanding, you transform raw tool outputs into distilled knowledge that persists in the context window. + +**Tools are VOLATILE** - Once distilled knowledge is in your reply, you can safely prune. Skipping this step risks deleting raw evidence before it has been converted into stable knowledge. + +**Distillation workflow:** +1. Call tools to investigate/explore +2. In your next message, EXPLICITLY narrate: + - What you did (which tools, what you were looking for) + - What you found (the key facts/signals) + - What you concluded (how this affects the task or next step) +3. ONLY AFTER narrating, call `context_pruning` + +> THINK HIGH SIGNAL, LOW NOISE FOR THIS NARRATION + ## When to Use This Tool -**Key heuristic: Prune when you finish something and are about to start something else.** +**Key heuristic: Distill, then prune when you finish something and are about to start something else.** -Ask yourself: "Have I just completed a discrete unit of work?" If yes, prune before moving on. +Ask yourself: "Have I just completed a discrete unit of work?" If yes, narrate your findings, then prune before moving on. **After completing a unit of work:** - Made a commit diff --git a/lib/pruning-tool.ts b/lib/pruning-tool.ts index 933aa36..010576d 100644 --- a/lib/pruning-tool.ts +++ b/lib/pruning-tool.ts @@ -1,6 +1,8 @@ import { tool } from "@opencode-ai/plugin" import type { Janitor } from "./janitor" import type { PluginConfig } from "./config" +import type { ToolTracker } from "./synth-instruction" +import { resetToolTrackerCount } from "./synth-instruction" import { loadPrompt } from "./prompt" /** Tool description for the context_pruning tool, loaded from prompts/tool.txt */ @@ -10,7 +12,7 @@ export const CONTEXT_PRUNING_DESCRIPTION = loadPrompt("tool") * Creates the context_pruning tool definition. * Returns a tool definition that can be passed to the plugin's tool registry. */ -export function createPruningTool(janitor: Janitor, config: PluginConfig): ReturnType { +export function createPruningTool(janitor: Janitor, config: PluginConfig, toolTracker: ToolTracker): ReturnType { return tool({ description: CONTEXT_PRUNING_DESCRIPTION, args: { @@ -25,11 +27,21 @@ export function createPruningTool(janitor: Janitor, config: PluginConfig): Retur args.reason ) + // Skip next idle pruning since we just pruned + toolTracker.skipNextIdle = true + + // Reset nudge counter to prevent immediate re-nudging after pruning + if (config.nudge_freq > 0) { + resetToolTrackerCount(toolTracker, config.nudge_freq) + } + + const postPruneGuidance = "\n\nYou have already distilled relevant understanding in writing before calling this tool. Do not re-narrate; continue with your next task." + if (!result || result.prunedCount === 0) { - return "No prunable tool outputs found. Context is already optimized.\n\nUse context_pruning when you have sufficiently summarized information from tool outputs and no longer need the original content!" + return "No prunable tool outputs found. Context is already optimized." + postPruneGuidance } - return janitor.formatPruningResultForTool(result) + "\n\nKeep using context_pruning when you have sufficiently summarized information from tool outputs and no longer need the original content!" + return janitor.formatPruningResultForTool(result) + postPruneGuidance }, }) } diff --git a/lib/synth-instruction.ts b/lib/synth-instruction.ts index 5427e4c..2a87780 100644 --- a/lib/synth-instruction.ts +++ b/lib/synth-instruction.ts @@ -1,10 +1,17 @@ export interface ToolTracker { seenToolResultIds: Set toolResultCount: number + skipNextIdle: boolean + getToolName?: (callId: string) => string | undefined } export function createToolTracker(): ToolTracker { - return { seenToolResultIds: new Set(), toolResultCount: 0 } + return { seenToolResultIds: new Set(), toolResultCount: 0, skipNextIdle: false } +} + +export function resetToolTrackerCount(tracker: ToolTracker, freq: number): void { + const currentBucket = Math.floor(tracker.toolResultCount / freq) + tracker.toolResultCount = currentBucket * freq } /** Adapter interface for format-specific message operations */ @@ -47,6 +54,10 @@ const openaiAdapter: MessageFormatAdapter = { if (!tracker.seenToolResultIds.has(id)) { tracker.seenToolResultIds.add(id) newCount++ + const toolName = m.name || tracker.getToolName?.(m.tool_call_id) + if (toolName !== 'context_pruning') { + tracker.skipNextIdle = false + } } } else if (m.role === 'user' && Array.isArray(m.content)) { for (const part of m.content) { @@ -55,6 +66,10 @@ const openaiAdapter: MessageFormatAdapter = { if (!tracker.seenToolResultIds.has(id)) { tracker.seenToolResultIds.add(id) newCount++ + const toolName = tracker.getToolName?.(part.tool_use_id) + if (toolName !== 'context_pruning') { + tracker.skipNextIdle = false + } } } } @@ -117,6 +132,9 @@ const geminiAdapter: MessageFormatAdapter = { if (!tracker.seenToolResultIds.has(pseudoId)) { tracker.seenToolResultIds.add(pseudoId) newCount++ + if (funcName !== 'context_pruning') { + tracker.skipNextIdle = false + } } } } @@ -161,6 +179,10 @@ const responsesAdapter: MessageFormatAdapter = { if (!tracker.seenToolResultIds.has(id)) { tracker.seenToolResultIds.add(id) newCount++ + const toolName = item.name || tracker.getToolName?.(item.call_id) + if (toolName !== 'context_pruning') { + tracker.skipNextIdle = false + } } } } diff --git a/package-lock.json b/package-lock.json index da387cc..9b94433 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@tarquinen/opencode-dcp", - "version": "0.3.24", + "version": "0.3.25", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@tarquinen/opencode-dcp", - "version": "0.3.24", + "version": "0.3.25", "license": "MIT", "dependencies": { "@ai-sdk/openai-compatible": "^1.0.27", diff --git a/package.json b/package.json index fe7ec4f..6242e48 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "$schema": "https://json.schemastore.org/package.json", "name": "@tarquinen/opencode-dcp", - "version": "0.3.24", + "version": "0.3.25", "type": "module", "description": "OpenCode plugin that optimizes token usage by pruning obsolete tool outputs from conversation context", "main": "./dist/index.js",