🤖 Fix: Preserve thinking blocks during stream interrupts (#377)

kylecarbs · web-flow · commit cc43552a292c · 2025-10-21T20:21:12.000Z
## Problem

When Extended Thinking is enabled and a stream is interrupted during the
thinking phase (before any text is generated), the retry would enter an
infinite loop with the error:

```
messages.87.content.0.type: Expected 'thinking' or 'redacted_thinking', but found 'text'
```

This manifested as a ×19 retry loop (or more) where each retry would
fail with the same error.

## Root Cause

1. Stream interrupted during thinking phase → partial message has only
`reasoning` parts, no `text` parts
2. `filterEmptyAssistantMessages` filtered out reasoning-only messages
3. On retry, API received history without thinking blocks
4. API rejected request because Extended Thinking requires thinking
blocks in history
5. Error triggered another retry → infinite loop

## Solution

Modified `filterEmptyAssistantMessages` to accept a
`preserveReasoningOnly` parameter. When Extended Thinking is enabled
(`thinkingLevel` is set), reasoning-only messages are preserved in the
history to comply with API requirements.

## Testing

- Added 5 comprehensive test cases covering various scenarios
- All 152 existing tests pass
- Verified reasoning-only messages are:
  - Filtered out when thinking is disabled (existing behavior)
  - Preserved when thinking is enabled (new behavior)

_Generated with `cmux`_
diff --git a/src/services/aiService.ts b/src/services/aiService.ts
@@ -428,7 +428,10 @@ export class AIService extends EventEmitter {
       const toolNamesForSentinel = Object.keys(earlyTools);
 
       // Filter out assistant messages with only reasoning (no text/tools)
-      const filteredMessages = filterEmptyAssistantMessages(messages);
+      // EXCEPTION: When extended thinking is enabled, preserve reasoning-only messages
+      // to comply with Extended Thinking API requirements
+      const preserveReasoningOnly = Boolean(thinkingLevel);
+      const filteredMessages = filterEmptyAssistantMessages(messages, preserveReasoningOnly);
       log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`);
       log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages);
 
diff --git a/src/utils/messages/modelMessageTransform.test.ts b/src/utils/messages/modelMessageTransform.test.ts
@@ -5,6 +5,7 @@ import {
   validateAnthropicCompliance,
   addInterruptedSentinel,
   injectModeTransition,
+  filterEmptyAssistantMessages,
 } from "./modelMessageTransform";
 import type { CmuxMessage } from "@/types/message";
 
@@ -951,3 +952,156 @@ describe("injectModeTransition", () => {
     });
   });
 });
+
+describe("filterEmptyAssistantMessages", () => {
+  it("should filter out assistant messages with only reasoning when preserveReasoningOnly=false", () => {
+    const messages: CmuxMessage[] = [
+      {
+        id: "user-1",
+        role: "user",
+        parts: [{ type: "text", text: "Hello" }],
+        metadata: { timestamp: 1000 },
+      },
+      {
+        id: "assistant-1",
+        role: "assistant",
+        parts: [{ type: "reasoning", text: "Let me think about this..." }],
+        metadata: { timestamp: 2000 },
+      },
+    ];
+
+    const result = filterEmptyAssistantMessages(messages, false);
+
+    // Reasoning-only message should be filtered out
+    expect(result.length).toBe(1);
+    expect(result[0].id).toBe("user-1");
+  });
+
+  it("should filter out assistant messages with empty parts array (placeholder messages)", () => {
+    const messages: CmuxMessage[] = [
+      {
+        id: "user-1",
+        role: "user",
+        parts: [{ type: "text", text: "Hello" }],
+        metadata: { timestamp: 1000 },
+      },
+      {
+        id: "assistant-1",
+        role: "assistant",
+        parts: [], // Empty placeholder message
+        metadata: { timestamp: 2000 },
+      },
+      {
+        id: "assistant-2",
+        role: "assistant",
+        parts: [], // Another empty placeholder
+        metadata: { timestamp: 3000 },
+      },
+    ];
+
+    // Empty messages should be filtered out regardless of preserveReasoningOnly
+    const result1 = filterEmptyAssistantMessages(messages, false);
+    expect(result1.length).toBe(1);
+    expect(result1[0].id).toBe("user-1");
+
+    const result2 = filterEmptyAssistantMessages(messages, true);
+    expect(result2.length).toBe(1);
+    expect(result2[0].id).toBe("user-1");
+  });
+
+  it("should preserve assistant messages with only reasoning when preserveReasoningOnly=true", () => {
+    const messages: CmuxMessage[] = [
+      {
+        id: "user-1",
+        role: "user",
+        parts: [{ type: "text", text: "Hello" }],
+        metadata: { timestamp: 1000 },
+      },
+      {
+        id: "assistant-1",
+        role: "assistant",
+        parts: [{ type: "reasoning", text: "Let me think about this..." }],
+        metadata: { timestamp: 2000 },
+      },
+    ];
+
+    const result = filterEmptyAssistantMessages(messages, true);
+
+    // Reasoning-only message should be preserved when preserveReasoningOnly=true
+    expect(result.length).toBe(2);
+    expect(result[1].id).toBe("assistant-1");
+    expect(result[1].parts).toEqual([{ type: "reasoning", text: "Let me think about this..." }]);
+  });
+
+  it("should preserve assistant messages with text content regardless of preserveReasoningOnly", () => {
+    const messages: CmuxMessage[] = [
+      {
+        id: "assistant-1",
+        role: "assistant",
+        parts: [
+          { type: "reasoning", text: "Thinking..." },
+          { type: "text", text: "Here's my answer" },
+        ],
+        metadata: { timestamp: 2000 },
+      },
+    ];
+
+    // With preserveReasoningOnly=false
+    const result1 = filterEmptyAssistantMessages(messages, false);
+    expect(result1.length).toBe(1);
+    expect(result1[0].id).toBe("assistant-1");
+
+    // With preserveReasoningOnly=true
+    const result2 = filterEmptyAssistantMessages(messages, true);
+    expect(result2.length).toBe(1);
+    expect(result2[0].id).toBe("assistant-1");
+  });
+
+  it("should filter out assistant messages with only empty text regardless of preserveReasoningOnly", () => {
+    const messages: CmuxMessage[] = [
+      {
+        id: "assistant-1",
+        role: "assistant",
+        parts: [{ type: "text", text: "" }],
+        metadata: { timestamp: 2000 },
+      },
+    ];
+
+    // With preserveReasoningOnly=false
+    const result1 = filterEmptyAssistantMessages(messages, false);
+    expect(result1.length).toBe(0);
+
+    // With preserveReasoningOnly=true
+    const result2 = filterEmptyAssistantMessages(messages, true);
+    expect(result2.length).toBe(0);
+  });
+
+  it("should preserve messages interrupted during thinking phase when preserveReasoningOnly=true", () => {
+    // Simulates an interrupted stream during Extended Thinking
+    const messages: CmuxMessage[] = [
+      {
+        id: "user-1",
+        role: "user",
+        parts: [{ type: "text", text: "Solve this problem" }],
+        metadata: { timestamp: 1000 },
+      },
+      {
+        id: "assistant-1",
+        role: "assistant",
+        parts: [{ type: "reasoning", text: "Let me analyze this step by step..." }],
+        metadata: { timestamp: 2000, partial: true },
+      },
+    ];
+
+    // When thinking is disabled, filter out reasoning-only message
+    const result1 = filterEmptyAssistantMessages(messages, false);
+    expect(result1.length).toBe(1);
+    expect(result1[0].id).toBe("user-1");
+
+    // When thinking is enabled, preserve it for API compliance
+    const result2 = filterEmptyAssistantMessages(messages, true);
+    expect(result2.length).toBe(2);
+    expect(result2[1].id).toBe("assistant-1");
+    expect(result2[1].metadata?.partial).toBe(true);
+  });
+});
diff --git a/src/utils/messages/modelMessageTransform.ts b/src/utils/messages/modelMessageTransform.ts
@@ -7,27 +7,57 @@ import type { ModelMessage, AssistantModelMessage, ToolModelMessage } from "ai";
 import type { CmuxMessage } from "@/types/message";
 
 /**
- * Filter out assistant messages that only contain reasoning parts (no text or tool parts).
- * These messages are invalid for the API and provide no value to the model.
- * This happens when a message is interrupted during thinking before producing any text.
+ * Filter out assistant messages that are empty or only contain reasoning parts.
+ * Empty messages (no parts or only empty text) and reasoning-only messages are
+ * invalid for the API and provide no value to the model.
+ *
+ * Common scenarios:
+ * 1. Placeholder messages with empty parts arrays (stream interrupted before any content)
+ * 2. Messages interrupted during thinking phase before producing text
+ *
+ * EXCEPTION: When extended thinking is enabled, preserve reasoning-only messages.
+ * The Extended Thinking API requires thinking blocks to be present in message history,
+ * even if they were interrupted before producing text content.
  *
  * Note: This function filters out reasoning-only messages but does NOT strip reasoning
  * parts from messages that have other content. Reasoning parts are handled differently
  * per provider (see stripReasoningForOpenAI).
+ *
+ * @param messages - The messages to filter
+ * @param preserveReasoningOnly - If true, keep reasoning-only messages (for Extended Thinking)
  */
-export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] {
+export function filterEmptyAssistantMessages(
+  messages: CmuxMessage[],
+  preserveReasoningOnly = false
+): CmuxMessage[] {
   return messages.filter((msg) => {
     // Keep all non-assistant messages
     if (msg.role !== "assistant") {
       return true;
     }
 
+    // Filter out messages with no parts at all (placeholder messages)
+    if (msg.parts?.length === 0) {
+      return false;
+    }
+
     // Keep assistant messages that have at least one text or tool part
     const hasContent = msg.parts.some(
       (part) => (part.type === "text" && part.text) || part.type === "dynamic-tool"
     );
 
-    return hasContent;
+    if (hasContent) {
+      return true;
+    }
+
+    // If preserveReasoningOnly is enabled, keep messages with reasoning parts
+    // (needed for Extended Thinking API compliance)
+    if (preserveReasoningOnly) {
+      const hasReasoning = msg.parts.some((part) => part.type === "reasoning");
+      return hasReasoning;
+    }
+
+    return false;
   });
 }