Merge pull request #34 from Tarquinen/strip-reasoning

jorgenwh · web-flow · commit 260afd1a871f · 2025-11-28T22:03:21.000+01:00
fix fetch wrapping for reasoning anthropic models
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -0,0 +1,15 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(cat:*)",
+      "Bash(for f in ~/.local/share/opencode/storage/part/*/*)",
+      "Bash(do grep -l \"\"type\"\":\"\"reasoning\"\" $f)",
+      "Bash(done)",
+      "WebSearch",
+      "WebFetch(domain:ai-sdk.dev)",
+      "Bash(npm run typecheck:*)"
+    ],
+    "deny": [],
+    "ask": []
+  }
+}
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,51 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Build Commands
+
+```bash
+npm run build       # Clean and compile TypeScript
+npm run typecheck   # Type check without emitting
+npm run dev         # Run in OpenCode plugin dev mode
+npm run test        # Run tests (node --import tsx --test tests/*.test.ts)
+```
+
+## Architecture
+
+This is an OpenCode plugin that optimizes token usage by pruning obsolete tool outputs from conversation context. The plugin is non-destructive—pruning state is kept in memory only, with original session data remaining intact.
+
+### Core Components
+
+**index.ts** - Plugin entry point. Registers:
+- Global fetch wrapper that intercepts LLM requests and replaces pruned tool outputs with placeholder text
+- Event handler for `session.status` idle events triggering automatic pruning
+- `chat.params` hook to cache session model info
+- `context_pruning` tool for AI-initiated pruning
+
+**lib/janitor.ts** - Orchestrates the two-phase pruning process:
+1. Deduplication phase: Fast, zero-cost detection of repeated tool calls (keeps most recent)
+2. AI analysis phase: Uses LLM to semantically identify obsolete outputs
+
+**lib/deduplicator.ts** - Implements duplicate detection by creating normalized signatures from tool name + parameters
+
+**lib/model-selector.ts** - Model selection cascade: config model → session model → fallback models (with provider priority order)
+
+**lib/config.ts** - Config loading with precedence: defaults → global (~/.config/opencode/dcp.jsonc) → project (.opencode/dcp.jsonc)
+
+**lib/prompt.ts** - Builds the analysis prompt with minimized message history for LLM evaluation
+
+### Key Concepts
+
+- **Tool call IDs**: Normalized to lowercase for consistent matching
+- **Protected tools**: Never pruned (default: task, todowrite, todoread, context_pruning)
+- **Batch tool expansion**: When a batch tool is pruned, its child tool calls are also pruned
+- **Strategies**: `deduplication` (fast) and `ai-analysis` (thorough), configurable per trigger (`onIdle`, `onTool`)
+
+### State Management
+
+Plugin maintains in-memory state per session:
+- `prunedIdsState`: Map of session ID → array of pruned tool call IDs
+- `statsState`: Map of session ID → cumulative pruning statistics
+- `toolParametersCache`: Cached tool parameters extracted from LLM request bodies
+- `modelCache`: Cached provider/model info from chat.params hook
diff --git a/index.ts b/index.ts
@@ -63,10 +63,22 @@ const plugin: Plugin = (async (ctx) => {
         if (init?.body && typeof init.body === 'string') {
             try {
                 const body = JSON.parse(init.body)
+
                 if (body.messages && Array.isArray(body.messages)) {
                     cacheToolParameters(body.messages)
 
-                    const toolMessages = body.messages.filter((m: any) => m.role === 'tool')
+                    // Check for tool messages in both formats:
+                    // 1. OpenAI style: role === 'tool'
+                    // 2. Anthropic style: role === 'user' with content containing tool_result
+                    const toolMessages = body.messages.filter((m: any) => {
+                        if (m.role === 'tool') return true
+                        if (m.role === 'user' && Array.isArray(m.content)) {
+                            for (const part of m.content) {
+                                if (part.type === 'tool_result') return true
+                            }
+                        }
+                        return false
+                    })
 
                     const allSessions = await ctx.client.session.list()
                     const allPrunedIds = new Set<string>()
@@ -83,13 +95,34 @@ const plugin: Plugin = (async (ctx) => {
                         let replacedCount = 0
 
                         body.messages = body.messages.map((m: any) => {
+                            // OpenAI style: role === 'tool' with tool_call_id
                             if (m.role === 'tool' && allPrunedIds.has(m.tool_call_id?.toLowerCase())) {
                                 replacedCount++
                                 return {
                                     ...m,
                                     content: '[Output removed to save context - information superseded or no longer needed]'
                                 }
                             }
+
+                            // Anthropic style: role === 'user' with content array containing tool_result
+                            if (m.role === 'user' && Array.isArray(m.content)) {
+                                let messageModified = false
+                                const newContent = m.content.map((part: any) => {
+                                    if (part.type === 'tool_result' && allPrunedIds.has(part.tool_use_id?.toLowerCase())) {
+                                        messageModified = true
+                                        replacedCount++
+                                        return {
+                                            ...part,
+                                            content: '[Output removed to save context - information superseded or no longer needed]'
+                                        }
+                                    }
+                                    return part
+                                })
+                                if (messageModified) {
+                                    return { ...m, content: newContent }
+                                }
+                            }
+
                             return m
                         })
 
diff --git a/lib/config.ts b/lib/config.ts
@@ -5,7 +5,7 @@ import { parse } from 'jsonc-parser'
 import { Logger } from './logger'
 import type { PluginInput } from '@opencode-ai/plugin'
 
-export type PruningStrategy = "deduplication" | "ai-analysis"
+export type PruningStrategy = "deduplication" | "ai-analysis" | "strip-reasoning"
 
 export interface PluginConfig {
     enabled: boolean
@@ -34,8 +34,8 @@ const defaultConfig: PluginConfig = {
     strictModelSelection: false,
     pruning_summary: 'detailed',
     strategies: {
-        onIdle: ['deduplication', 'ai-analysis'],
-        onTool: ['deduplication', 'ai-analysis']
+        onIdle: ['deduplication', 'ai-analysis', "strip-reasoning"],
+        onTool: ['deduplication', 'ai-analysis', "strip-reasoning"]
     }
 }
 
diff --git a/lib/janitor.ts b/lib/janitor.ts
@@ -15,6 +15,7 @@ export interface SessionStats {
 export interface PruningResult {
     prunedCount: number
     tokensSaved: number
+    thinkingIds: string[]
     deduplicatedIds: string[]
     llmPrunedIds: string[]
     deduplicationDetails: Map<string, any>
@@ -155,6 +156,12 @@ export class Janitor {
                 return !metadata || !this.protectedTools.includes(metadata.tool)
             }).length
 
+            // PHASE 1.5: STRIP-REASONING
+            let reasoningPrunedIds: string[] = []
+
+            if (strategies.includes('strip-reasoning')) {
+            }
+
             // PHASE 2: LLM ANALYSIS
             let llmPrunedIds: string[] = []
 
@@ -329,6 +336,7 @@ export class Janitor {
             return {
                 prunedCount: finalNewlyPrunedIds.length,
                 tokensSaved,
+                thinkingIds: [],
                 deduplicatedIds,
                 llmPrunedIds: expandedLlmPrunedIds,
                 deduplicationDetails,