fix: prevent session state leakage and exclude protected tools from counts

Tarquinen · Tarquinen · commit 56a13a92c6e3 · 2025-12-03T13:22:31.000-05:00
- Add clearAllMappings() to id-mapping.ts for resetting module-level state
- Detect session changes in hooks.ts and clear ID mappings + tool cache
- Make API format handlers mutually exclusive (else if) to prevent double-processing
- Exclude protected tools from 'total' count in replacement logs
- Pass protectedTools set to trackNewToolResults functions for nudge frequency
diff --git a/lib/api-formats/synth-instruction.ts b/lib/api-formats/synth-instruction.ts
@@ -16,25 +16,33 @@ export function resetToolTrackerCount(tracker: ToolTracker): void {
 
 /**
  * Track new tool results in OpenAI/Anthropic messages.
- * Increments toolResultCount only for tools not already seen.
+ * Increments toolResultCount only for tools not already seen and not protected.
  * Returns the number of NEW tools found (since last call).
  */
-export function trackNewToolResults(messages: any[], tracker: ToolTracker): number {
+export function trackNewToolResults(messages: any[], tracker: ToolTracker, protectedTools: Set<string>): number {
     let newCount = 0
     for (const m of messages) {
         if (m.role === 'tool' && m.tool_call_id) {
             if (!tracker.seenToolResultIds.has(m.tool_call_id)) {
                 tracker.seenToolResultIds.add(m.tool_call_id)
-                tracker.toolResultCount++
-                newCount++
+                // Skip protected tools for nudge frequency counting
+                const toolName = tracker.getToolName?.(m.tool_call_id)
+                if (!toolName || !protectedTools.has(toolName)) {
+                    tracker.toolResultCount++
+                    newCount++
+                }
             }
         } else if (m.role === 'user' && Array.isArray(m.content)) {
             for (const part of m.content) {
                 if (part.type === 'tool_result' && part.tool_use_id) {
                     if (!tracker.seenToolResultIds.has(part.tool_use_id)) {
                         tracker.seenToolResultIds.add(part.tool_use_id)
-                        tracker.toolResultCount++
-                        newCount++
+                        // Skip protected tools for nudge frequency counting
+                        const toolName = tracker.getToolName?.(part.tool_use_id)
+                        if (!toolName || !protectedTools.has(toolName)) {
+                            tracker.toolResultCount++
+                            newCount++
+                        }
                     }
                 }
             }
@@ -48,7 +56,7 @@ export function trackNewToolResults(messages: any[], tracker: ToolTracker): numb
  * Uses position-based tracking since Gemini doesn't have tool call IDs.
  * Returns the number of NEW tools found (since last call).
  */
-export function trackNewToolResultsGemini(contents: any[], tracker: ToolTracker): number {
+export function trackNewToolResultsGemini(contents: any[], tracker: ToolTracker, protectedTools: Set<string>): number {
     let newCount = 0
     let positionCounter = 0
     for (const content of contents) {
@@ -60,8 +68,12 @@ export function trackNewToolResultsGemini(contents: any[], tracker: ToolTracker)
                 positionCounter++
                 if (!tracker.seenToolResultIds.has(positionId)) {
                     tracker.seenToolResultIds.add(positionId)
-                    tracker.toolResultCount++
-                    newCount++
+                    // Skip protected tools for nudge frequency counting
+                    const toolName = part.functionResponse.name
+                    if (!toolName || !protectedTools.has(toolName)) {
+                        tracker.toolResultCount++
+                        newCount++
+                    }
                 }
             }
         }
@@ -73,14 +85,18 @@ export function trackNewToolResultsGemini(contents: any[], tracker: ToolTracker)
  * Track new tool results in OpenAI Responses API input.
  * Returns the number of NEW tools found (since last call).
  */
-export function trackNewToolResultsResponses(input: any[], tracker: ToolTracker): number {
+export function trackNewToolResultsResponses(input: any[], tracker: ToolTracker, protectedTools: Set<string>): number {
     let newCount = 0
     for (const item of input) {
         if (item.type === 'function_call_output' && item.call_id) {
             if (!tracker.seenToolResultIds.has(item.call_id)) {
                 tracker.seenToolResultIds.add(item.call_id)
-                tracker.toolResultCount++
-                newCount++
+                // Skip protected tools for nudge frequency counting
+                const toolName = tracker.getToolName?.(item.call_id)
+                if (!toolName || !protectedTools.has(toolName)) {
+                    tracker.toolResultCount++
+                    newCount++
+                }
             }
         }
     }
diff --git a/lib/fetch-wrapper/gemini.ts b/lib/fetch-wrapper/gemini.ts
@@ -46,7 +46,8 @@ export async function handleGemini(
 
             if (prunableList) {
                 // Track new tool results and check if nudge threshold is met
-                trackNewToolResultsGemini(body.contents, ctx.toolTracker)
+                const protectedSet = new Set(ctx.config.protectedTools)
+                trackNewToolResultsGemini(body.contents, ctx.toolTracker, protectedSet)
                 const includeNudge = ctx.config.nudge_freq > 0 && ctx.toolTracker.toolResultCount > ctx.config.nudge_freq
 
                 const endInjection = buildEndInjection(prunableList, includeNudge)
@@ -99,6 +100,8 @@ export async function handleGemini(
     const toolPositionCounters = new Map<string, number>()
     let replacedCount = 0
     let totalFunctionResponses = 0
+    let prunableFunctionResponses = 0
+    const protectedToolsLower = new Set(ctx.config.protectedTools.map(t => t.toLowerCase()))
 
     body.contents = body.contents.map((content: any) => {
         if (!Array.isArray(content.parts)) return content
@@ -109,6 +112,11 @@ export async function handleGemini(
                 totalFunctionResponses++
                 const funcName = part.functionResponse.name?.toLowerCase()
 
+                // Count as prunable if not a protected tool
+                if (!funcName || !protectedToolsLower.has(funcName)) {
+                    prunableFunctionResponses++
+                }
+
                 if (funcName) {
                     // Get current position for this tool name and increment counter
                     const currentIndex = toolPositionCounters.get(funcName) || 0
@@ -148,7 +156,7 @@ export async function handleGemini(
     if (replacedCount > 0) {
         ctx.logger.info("fetch", "Replaced pruned tool outputs (Google/Gemini)", {
             replaced: replacedCount,
-            total: totalFunctionResponses
+            total: prunableFunctionResponses
         })
 
         if (ctx.logger.enabled) {
diff --git a/lib/fetch-wrapper/index.ts b/lib/fetch-wrapper/index.ts
@@ -58,26 +58,24 @@ export function installFetchWrapper(
                 // Capture tool IDs before handlers run to track what gets cached this request
                 const toolIdsBefore = new Set(state.toolParameters.keys())
 
-                // Try each format handler in order
-                // OpenAI Chat Completions & Anthropic style (body.messages)
-                if (body.messages && Array.isArray(body.messages)) {
-                    const result = await handleOpenAIChatAndAnthropic(body, ctx, inputUrl)
+                // Try each format handler - mutually exclusive to avoid double-processing
+                // OpenAI Responses API style (body.input) - check first as it may also have messages
+                if (body.input && Array.isArray(body.input)) {
+                    const result = await handleOpenAIResponses(body, ctx, inputUrl)
                     if (result.modified) {
                         modified = true
                     }
                 }
-
-                // Google/Gemini style (body.contents)
-                if (body.contents && Array.isArray(body.contents)) {
-                    const result = await handleGemini(body, ctx, inputUrl)
+                // OpenAI Chat Completions & Anthropic style (body.messages)
+                else if (body.messages && Array.isArray(body.messages)) {
+                    const result = await handleOpenAIChatAndAnthropic(body, ctx, inputUrl)
                     if (result.modified) {
                         modified = true
                     }
                 }
-
-                // OpenAI Responses API style (body.input)
-                if (body.input && Array.isArray(body.input)) {
-                    const result = await handleOpenAIResponses(body, ctx, inputUrl)
+                // Google/Gemini style (body.contents)
+                else if (body.contents && Array.isArray(body.contents)) {
+                    const result = await handleGemini(body, ctx, inputUrl)
                     if (result.modified) {
                         modified = true
                     }
diff --git a/lib/fetch-wrapper/openai-chat.ts b/lib/fetch-wrapper/openai-chat.ts
@@ -51,7 +51,8 @@ export async function handleOpenAIChatAndAnthropic(
 
             if (prunableList) {
                 // Track new tool results and check if nudge threshold is met
-                trackNewToolResults(body.messages, ctx.toolTracker)
+                const protectedSet = new Set(ctx.config.protectedTools)
+                trackNewToolResults(body.messages, ctx.toolTracker, protectedSet)
                 const includeNudge = ctx.config.nudge_freq > 0 && ctx.toolTracker.toolResultCount > ctx.config.nudge_freq
 
                 const endInjection = buildEndInjection(prunableList, includeNudge)
@@ -70,6 +71,9 @@ export async function handleOpenAIChatAndAnthropic(
     // Check for tool messages in both formats:
     // 1. OpenAI style: role === 'tool'
     // 2. Anthropic style: role === 'user' with content containing tool_result
+    const protectedToolsLower = new Set(ctx.config.protectedTools.map(t => t.toLowerCase()))
+    
+    // Count all tool messages
     const toolMessages = body.messages.filter((m: any) => {
         if (m.role === 'tool') return true
         if (m.role === 'user' && Array.isArray(m.content)) {
@@ -79,6 +83,29 @@ export async function handleOpenAIChatAndAnthropic(
         }
         return false
     })
+    
+    // Count only prunable (non-protected) tool messages for the total
+    let prunableToolCount = 0
+    for (const m of body.messages) {
+        if (m.role === 'tool') {
+            // Get tool name from cached metadata
+            const toolId = m.tool_call_id?.toLowerCase()
+            const metadata = toolId ? ctx.state.toolParameters.get(toolId) : undefined
+            if (!metadata || !protectedToolsLower.has(metadata.tool.toLowerCase())) {
+                prunableToolCount++
+            }
+        } else if (m.role === 'user' && Array.isArray(m.content)) {
+            for (const part of m.content) {
+                if (part.type === 'tool_result') {
+                    const toolId = part.tool_use_id?.toLowerCase()
+                    const metadata = toolId ? ctx.state.toolParameters.get(toolId) : undefined
+                    if (!metadata || !protectedToolsLower.has(metadata.tool.toLowerCase())) {
+                        prunableToolCount++
+                    }
+                }
+            }
+        }
+    }
 
     const { allSessions, allPrunedIds } = await getAllPrunedIds(ctx.client, ctx.state, ctx.logger)
 
@@ -123,7 +150,7 @@ export async function handleOpenAIChatAndAnthropic(
     if (replacedCount > 0) {
         ctx.logger.info("fetch", "Replaced pruned tool outputs", {
             replaced: replacedCount,
-            total: toolMessages.length
+            total: prunableToolCount
         })
 
         if (ctx.logger.enabled) {
diff --git a/lib/fetch-wrapper/openai-responses.ts b/lib/fetch-wrapper/openai-responses.ts
@@ -51,7 +51,8 @@ export async function handleOpenAIResponses(
 
             if (prunableList) {
                 // Track new tool results and check if nudge threshold is met
-                trackNewToolResultsResponses(body.input, ctx.toolTracker)
+                const protectedSet = new Set(ctx.config.protectedTools)
+                trackNewToolResultsResponses(body.input, ctx.toolTracker, protectedSet)
                 const includeNudge = ctx.config.nudge_freq > 0 && ctx.toolTracker.toolResultCount > ctx.config.nudge_freq
 
                 const endInjection = buildEndInjection(prunableList, includeNudge)
@@ -80,6 +81,16 @@ export async function handleOpenAIResponses(
         return { modified, body }
     }
 
+    // Count only prunable (non-protected) function outputs for the total
+    const protectedToolsLower = new Set(ctx.config.protectedTools.map(t => t.toLowerCase()))
+    let prunableFunctionOutputCount = 0
+    for (const item of functionOutputs) {
+        const toolName = item.name?.toLowerCase()
+        if (!toolName || !protectedToolsLower.has(toolName)) {
+            prunableFunctionOutputCount++
+        }
+    }
+
     let replacedCount = 0
 
     body.input = body.input.map((item: any) => {
@@ -96,7 +107,7 @@ export async function handleOpenAIResponses(
     if (replacedCount > 0) {
         ctx.logger.info("fetch", "Replaced pruned tool outputs (Responses API)", {
             replaced: replacedCount,
-            total: functionOutputs.length
+            total: prunableFunctionOutputCount
         })
 
         if (ctx.logger.enabled) {
diff --git a/lib/hooks.ts b/lib/hooks.ts
@@ -5,6 +5,7 @@ import { runOnIdle } from "./core/janitor"
 import type { PluginConfig, PruningStrategy } from "./config"
 import type { ToolTracker } from "./api-formats/synth-instruction"
 import { resetToolTrackerCount } from "./api-formats/synth-instruction"
+import { clearAllMappings } from "./state/id-mapping"
 
 export async function isSubagentSession(client: any, sessionID: string): Promise<boolean> {
     try {
@@ -72,6 +73,18 @@ export function createChatParamsHandler(
             providerID = input.message.model.providerID
         }
 
+        // Detect session change and reset per-session state
+        if (state.lastSeenSessionId && state.lastSeenSessionId !== sessionId) {
+            logger.info("chat.params", "Session changed, resetting state", {
+                from: state.lastSeenSessionId.substring(0, 8),
+                to: sessionId.substring(0, 8)
+            })
+            // Clear ID mappings from previous session
+            clearAllMappings()
+            // Clear tool parameters cache (not session-scoped, so must be cleared)
+            state.toolParameters.clear()
+        }
+
         // Track the last seen session ID for fetch wrapper correlation
         state.lastSeenSessionId = sessionId
 
diff --git a/lib/state/id-mapping.ts b/lib/state/id-mapping.ts
@@ -91,6 +91,22 @@ export function hasMapping(sessionId: string): boolean {
     return sessionMappings.has(sessionId)
 }
 
+/**
+ * Clears all ID mappings for a specific session.
+ * Call this when a session ends or when switching to a new session.
+ */
+export function clearSessionMapping(sessionId: string): void {
+    sessionMappings.delete(sessionId)
+}
+
+/**
+ * Clears all session mappings.
+ * Call this when switching sessions to ensure clean state.
+ */
+export function clearAllMappings(): void {
+    sessionMappings.clear()
+}
+
 /**
  * Gets the next numeric ID that will be assigned (without assigning it).
  * Useful for knowing the current state.