Skip to content

Commit cc43552

Browse files
authored
🤖 Fix: Preserve thinking blocks during stream interrupts (#377)
## Problem When Extended Thinking is enabled and a stream is interrupted during the thinking phase (before any text is generated), the retry would enter an infinite loop with the error: ``` messages.87.content.0.type: Expected 'thinking' or 'redacted_thinking', but found 'text' ``` This manifested as a ×19 retry loop (or more) where each retry would fail with the same error. ## Root Cause 1. Stream interrupted during thinking phase → partial message has only `reasoning` parts, no `text` parts 2. `filterEmptyAssistantMessages` filtered out reasoning-only messages 3. On retry, API received history without thinking blocks 4. API rejected request because Extended Thinking requires thinking blocks in history 5. Error triggered another retry → infinite loop ## Solution Modified `filterEmptyAssistantMessages` to accept a `preserveReasoningOnly` parameter. When Extended Thinking is enabled (`thinkingLevel` is set), reasoning-only messages are preserved in the history to comply with API requirements. ## Testing - Added 5 comprehensive test cases covering various scenarios - All 152 existing tests pass - Verified reasoning-only messages are: - Filtered out when thinking is disabled (existing behavior) - Preserved when thinking is enabled (new behavior) _Generated with `cmux`_
1 parent dee39f5 commit cc43552

File tree

3 files changed

+193
-6
lines changed

3 files changed

+193
-6
lines changed

src/services/aiService.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,10 @@ export class AIService extends EventEmitter {
428428
const toolNamesForSentinel = Object.keys(earlyTools);
429429

430430
// Filter out assistant messages with only reasoning (no text/tools)
431-
const filteredMessages = filterEmptyAssistantMessages(messages);
431+
// EXCEPTION: When extended thinking is enabled, preserve reasoning-only messages
432+
// to comply with Extended Thinking API requirements
433+
const preserveReasoningOnly = Boolean(thinkingLevel);
434+
const filteredMessages = filterEmptyAssistantMessages(messages, preserveReasoningOnly);
432435
log.debug(`Filtered ${messages.length - filteredMessages.length} empty assistant messages`);
433436
log.debug_obj(`${workspaceId}/1a_filtered_messages.json`, filteredMessages);
434437

src/utils/messages/modelMessageTransform.test.ts

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
validateAnthropicCompliance,
66
addInterruptedSentinel,
77
injectModeTransition,
8+
filterEmptyAssistantMessages,
89
} from "./modelMessageTransform";
910
import type { CmuxMessage } from "@/types/message";
1011

@@ -951,3 +952,156 @@ describe("injectModeTransition", () => {
951952
});
952953
});
953954
});
955+
956+
describe("filterEmptyAssistantMessages", () => {
957+
it("should filter out assistant messages with only reasoning when preserveReasoningOnly=false", () => {
958+
const messages: CmuxMessage[] = [
959+
{
960+
id: "user-1",
961+
role: "user",
962+
parts: [{ type: "text", text: "Hello" }],
963+
metadata: { timestamp: 1000 },
964+
},
965+
{
966+
id: "assistant-1",
967+
role: "assistant",
968+
parts: [{ type: "reasoning", text: "Let me think about this..." }],
969+
metadata: { timestamp: 2000 },
970+
},
971+
];
972+
973+
const result = filterEmptyAssistantMessages(messages, false);
974+
975+
// Reasoning-only message should be filtered out
976+
expect(result.length).toBe(1);
977+
expect(result[0].id).toBe("user-1");
978+
});
979+
980+
it("should filter out assistant messages with empty parts array (placeholder messages)", () => {
981+
const messages: CmuxMessage[] = [
982+
{
983+
id: "user-1",
984+
role: "user",
985+
parts: [{ type: "text", text: "Hello" }],
986+
metadata: { timestamp: 1000 },
987+
},
988+
{
989+
id: "assistant-1",
990+
role: "assistant",
991+
parts: [], // Empty placeholder message
992+
metadata: { timestamp: 2000 },
993+
},
994+
{
995+
id: "assistant-2",
996+
role: "assistant",
997+
parts: [], // Another empty placeholder
998+
metadata: { timestamp: 3000 },
999+
},
1000+
];
1001+
1002+
// Empty messages should be filtered out regardless of preserveReasoningOnly
1003+
const result1 = filterEmptyAssistantMessages(messages, false);
1004+
expect(result1.length).toBe(1);
1005+
expect(result1[0].id).toBe("user-1");
1006+
1007+
const result2 = filterEmptyAssistantMessages(messages, true);
1008+
expect(result2.length).toBe(1);
1009+
expect(result2[0].id).toBe("user-1");
1010+
});
1011+
1012+
it("should preserve assistant messages with only reasoning when preserveReasoningOnly=true", () => {
1013+
const messages: CmuxMessage[] = [
1014+
{
1015+
id: "user-1",
1016+
role: "user",
1017+
parts: [{ type: "text", text: "Hello" }],
1018+
metadata: { timestamp: 1000 },
1019+
},
1020+
{
1021+
id: "assistant-1",
1022+
role: "assistant",
1023+
parts: [{ type: "reasoning", text: "Let me think about this..." }],
1024+
metadata: { timestamp: 2000 },
1025+
},
1026+
];
1027+
1028+
const result = filterEmptyAssistantMessages(messages, true);
1029+
1030+
// Reasoning-only message should be preserved when preserveReasoningOnly=true
1031+
expect(result.length).toBe(2);
1032+
expect(result[1].id).toBe("assistant-1");
1033+
expect(result[1].parts).toEqual([{ type: "reasoning", text: "Let me think about this..." }]);
1034+
});
1035+
1036+
it("should preserve assistant messages with text content regardless of preserveReasoningOnly", () => {
1037+
const messages: CmuxMessage[] = [
1038+
{
1039+
id: "assistant-1",
1040+
role: "assistant",
1041+
parts: [
1042+
{ type: "reasoning", text: "Thinking..." },
1043+
{ type: "text", text: "Here's my answer" },
1044+
],
1045+
metadata: { timestamp: 2000 },
1046+
},
1047+
];
1048+
1049+
// With preserveReasoningOnly=false
1050+
const result1 = filterEmptyAssistantMessages(messages, false);
1051+
expect(result1.length).toBe(1);
1052+
expect(result1[0].id).toBe("assistant-1");
1053+
1054+
// With preserveReasoningOnly=true
1055+
const result2 = filterEmptyAssistantMessages(messages, true);
1056+
expect(result2.length).toBe(1);
1057+
expect(result2[0].id).toBe("assistant-1");
1058+
});
1059+
1060+
it("should filter out assistant messages with only empty text regardless of preserveReasoningOnly", () => {
1061+
const messages: CmuxMessage[] = [
1062+
{
1063+
id: "assistant-1",
1064+
role: "assistant",
1065+
parts: [{ type: "text", text: "" }],
1066+
metadata: { timestamp: 2000 },
1067+
},
1068+
];
1069+
1070+
// With preserveReasoningOnly=false
1071+
const result1 = filterEmptyAssistantMessages(messages, false);
1072+
expect(result1.length).toBe(0);
1073+
1074+
// With preserveReasoningOnly=true
1075+
const result2 = filterEmptyAssistantMessages(messages, true);
1076+
expect(result2.length).toBe(0);
1077+
});
1078+
1079+
it("should preserve messages interrupted during thinking phase when preserveReasoningOnly=true", () => {
1080+
// Simulates an interrupted stream during Extended Thinking
1081+
const messages: CmuxMessage[] = [
1082+
{
1083+
id: "user-1",
1084+
role: "user",
1085+
parts: [{ type: "text", text: "Solve this problem" }],
1086+
metadata: { timestamp: 1000 },
1087+
},
1088+
{
1089+
id: "assistant-1",
1090+
role: "assistant",
1091+
parts: [{ type: "reasoning", text: "Let me analyze this step by step..." }],
1092+
metadata: { timestamp: 2000, partial: true },
1093+
},
1094+
];
1095+
1096+
// When thinking is disabled, filter out reasoning-only message
1097+
const result1 = filterEmptyAssistantMessages(messages, false);
1098+
expect(result1.length).toBe(1);
1099+
expect(result1[0].id).toBe("user-1");
1100+
1101+
// When thinking is enabled, preserve it for API compliance
1102+
const result2 = filterEmptyAssistantMessages(messages, true);
1103+
expect(result2.length).toBe(2);
1104+
expect(result2[1].id).toBe("assistant-1");
1105+
expect(result2[1].metadata?.partial).toBe(true);
1106+
});
1107+
});

src/utils/messages/modelMessageTransform.ts

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,57 @@ import type { ModelMessage, AssistantModelMessage, ToolModelMessage } from "ai";
77
import type { CmuxMessage } from "@/types/message";
88

99
/**
10-
* Filter out assistant messages that only contain reasoning parts (no text or tool parts).
11-
* These messages are invalid for the API and provide no value to the model.
12-
* This happens when a message is interrupted during thinking before producing any text.
10+
* Filter out assistant messages that are empty or only contain reasoning parts.
11+
* Empty messages (no parts or only empty text) and reasoning-only messages are
12+
* invalid for the API and provide no value to the model.
13+
*
14+
* Common scenarios:
15+
* 1. Placeholder messages with empty parts arrays (stream interrupted before any content)
16+
* 2. Messages interrupted during thinking phase before producing text
17+
*
18+
* EXCEPTION: When extended thinking is enabled, preserve reasoning-only messages.
19+
* The Extended Thinking API requires thinking blocks to be present in message history,
20+
* even if they were interrupted before producing text content.
1321
*
1422
* Note: This function filters out reasoning-only messages but does NOT strip reasoning
1523
* parts from messages that have other content. Reasoning parts are handled differently
1624
* per provider (see stripReasoningForOpenAI).
25+
*
26+
* @param messages - The messages to filter
27+
* @param preserveReasoningOnly - If true, keep reasoning-only messages (for Extended Thinking)
1728
*/
18-
export function filterEmptyAssistantMessages(messages: CmuxMessage[]): CmuxMessage[] {
29+
export function filterEmptyAssistantMessages(
30+
messages: CmuxMessage[],
31+
preserveReasoningOnly = false
32+
): CmuxMessage[] {
1933
return messages.filter((msg) => {
2034
// Keep all non-assistant messages
2135
if (msg.role !== "assistant") {
2236
return true;
2337
}
2438

39+
// Filter out messages with no parts at all (placeholder messages)
40+
if (msg.parts?.length === 0) {
41+
return false;
42+
}
43+
2544
// Keep assistant messages that have at least one text or tool part
2645
const hasContent = msg.parts.some(
2746
(part) => (part.type === "text" && part.text) || part.type === "dynamic-tool"
2847
);
2948

30-
return hasContent;
49+
if (hasContent) {
50+
return true;
51+
}
52+
53+
// If preserveReasoningOnly is enabled, keep messages with reasoning parts
54+
// (needed for Extended Thinking API compliance)
55+
if (preserveReasoningOnly) {
56+
const hasReasoning = msg.parts.some((part) => part.type === "reasoning");
57+
return hasReasoning;
58+
}
59+
60+
return false;
3161
});
3262
}
3363

0 commit comments

Comments
 (0)