fix: handle empty stream responses from GLM models

roomote · roomote · commit 8663724320fe · 2025-10-03T05:55:09.000Z
- Add graceful handling for empty API responses in Task.ts - Implement GLM-specific fallback responses in OpenAI and base providers - Add retry logic for GLM models when empty streams occur - Add comprehensive tests for empty stream scenarios Fixes #8482
diff --git a/src/api/providers/__tests__/glm-empty-stream.spec.ts b/src/api/providers/__tests__/glm-empty-stream.spec.ts
@@ -0,0 +1,193 @@
+import { describe, it, expect, vi, beforeEach } from "vitest"
+import { OpenAiHandler } from "../openai"
+import { BaseOpenAiCompatibleProvider } from "../base-openai-compatible-provider"
+
+describe("GLM Empty Stream Handling", () => {
+	describe("OpenAiHandler", () => {
+		it("should provide fallback response for GLM models with empty streams", async () => {
+			const mockClient = {
+				chat: {
+					completions: {
+						create: vi.fn().mockImplementation(async function* () {
+							// Simulate empty stream - only usage, no content
+							yield {
+								choices: [{ delta: {} }],
+								usage: {
+									prompt_tokens: 100,
+									completion_tokens: 0,
+								},
+							}
+						}),
+					},
+				},
+			}
+
+			const handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "glm-4.6",
+				openAiStreamingEnabled: true,
+			})
+
+			// Replace the client with our mock
+			;(handler as any).client = mockClient
+
+			const chunks = []
+			const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have a fallback text response
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toContain("trouble generating a response")
+
+			// Should still have usage metrics
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks).toHaveLength(1)
+		})
+
+		it("should not provide fallback for non-GLM models with empty streams", async () => {
+			const mockClient = {
+				chat: {
+					completions: {
+						create: vi.fn().mockImplementation(async function* () {
+							// Simulate empty stream - only usage, no content
+							yield {
+								choices: [{ delta: {} }],
+								usage: {
+									prompt_tokens: 100,
+									completion_tokens: 0,
+								},
+							}
+						}),
+					},
+				},
+			}
+
+			const handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-4",
+				openAiStreamingEnabled: true,
+			})
+
+			// Replace the client with our mock
+			;(handler as any).client = mockClient
+
+			const chunks = []
+			const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should NOT have a fallback text response for non-GLM models
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(0)
+
+			// Should still have usage metrics
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks).toHaveLength(1)
+		})
+	})
+
+	describe("BaseOpenAiCompatibleProvider", () => {
+		class TestProvider extends BaseOpenAiCompatibleProvider<"glm-4.6" | "other-model"> {
+			constructor(modelId: "glm-4.6" | "other-model") {
+				super({
+					providerName: "Test",
+					baseURL: "https://test.com",
+					apiKey: "test-key",
+					defaultProviderModelId: modelId,
+					providerModels: {
+						"glm-4.6": {
+							maxTokens: 4096,
+							contextWindow: 8192,
+							supportsPromptCache: false,
+							inputPrice: 0,
+							outputPrice: 0,
+						},
+						"other-model": {
+							maxTokens: 4096,
+							contextWindow: 8192,
+							supportsPromptCache: false,
+							inputPrice: 0,
+							outputPrice: 0,
+						},
+					},
+					apiModelId: modelId,
+				})
+			}
+		}
+
+		it("should provide fallback response for GLM models with empty streams", async () => {
+			const provider = new TestProvider("glm-4.6")
+
+			// Mock the client
+			const mockClient = {
+				chat: {
+					completions: {
+						create: vi.fn().mockImplementation(async function* () {
+							// Simulate empty stream
+							yield {
+								choices: [{ delta: {} }],
+								usage: {
+									prompt_tokens: 100,
+									completion_tokens: 0,
+								},
+							}
+						}),
+					},
+				},
+			}
+			;(provider as any).client = mockClient
+
+			const chunks = []
+			const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should have a fallback text response
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(1)
+			expect(textChunks[0].text).toContain("trouble generating a response")
+		})
+
+		it("should not provide fallback for non-GLM models", async () => {
+			const provider = new TestProvider("other-model")
+
+			// Mock the client
+			const mockClient = {
+				chat: {
+					completions: {
+						create: vi.fn().mockImplementation(async function* () {
+							// Simulate empty stream
+							yield {
+								choices: [{ delta: {} }],
+								usage: {
+									prompt_tokens: 100,
+									completion_tokens: 0,
+								},
+							}
+						}),
+					},
+				},
+			}
+			;(provider as any).client = mockClient
+
+			const chunks = []
+			const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])
+
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Should NOT have a fallback text response
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks).toHaveLength(0)
+		})
+	})
+})
diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts
@@ -98,11 +98,14 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
 		const stream = await this.createStream(systemPrompt, messages, metadata)
+		let hasContent = false
+		const modelId = this.getModel().id
 
 		for await (const chunk of stream) {
 			const delta = chunk.choices[0]?.delta
 
 			if (delta?.content) {
+				hasContent = true
 				yield {
 					type: "text",
 					text: delta.content,
@@ -117,6 +120,18 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 				}
 			}
 		}
+
+		// For GLM models that may return empty streams, provide a fallback response
+		if (
+			!hasContent &&
+			modelId &&
+			(modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm"))
+		) {
+			yield {
+				type: "text",
+				text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.",
+			}
+		}
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -189,17 +189,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			)
 
 			let lastUsage
+			let hasContent = false
 
 			for await (const chunk of stream) {
 				const delta = chunk.choices[0]?.delta ?? {}
 
 				if (delta.content) {
+					hasContent = true
 					for (const chunk of matcher.update(delta.content)) {
 						yield chunk
 					}
 				}
 
 				if ("reasoning_content" in delta && delta.reasoning_content) {
+					hasContent = true
 					yield {
 						type: "reasoning",
 						text: (delta.reasoning_content as string | undefined) || "",
@@ -211,9 +214,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 
 			for (const chunk of matcher.final()) {
+				hasContent = true
 				yield chunk
 			}
 
+			// For GLM models that may return empty streams, provide a fallback response
+			if (
+				!hasContent &&
+				modelId &&
+				(modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm"))
+			) {
+				yield {
+					type: "text",
+					text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.",
+				}
+			}
+
 			if (lastUsage) {
 				yield this.processUsageMetrics(lastUsage, modelInfo)
 			}
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -2339,17 +2339,55 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					continue
 				} else {
 					// If there's no assistant_responses, that means we got no text
-					// or tool_use content blocks from API which we should assume is
-					// an error.
-					await this.say(
-						"error",
-						"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.",
-					)
+					// or tool_use content blocks from API. This can happen with some models
+					// like GLM-4.6 that may return empty streams occasionally.
+					// Instead of treating this as a fatal error, we'll handle it gracefully.
+					const modelId = this.api.getModel().id
+					const isGLMModel =
+						modelId && (modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm"))
+
+					if (isGLMModel) {
+						// For GLM models, treat empty response as a temporary issue and retry
+						await this.say(
+							"error",
+							"The GLM model returned an empty response. This can happen occasionally with GLM models. Retrying with a clarification request...",
+						)
 
-					await this.addToApiConversationHistory({
-						role: "assistant",
-						content: [{ type: "text", text: "Failure: I did not provide a response." }],
-					})
+						// Add a minimal assistant response to maintain conversation flow
+						await this.addToApiConversationHistory({
+							role: "assistant",
+							content: [
+								{
+									type: "text",
+									text: "I encountered an issue generating a response. Let me try again.",
+								},
+							],
+						})
+
+						// Add a user message prompting the model to respond
+						this.userMessageContent.push({
+							type: "text",
+							text: "Please provide a response to the previous request. If you're having trouble, break down the task into smaller steps.",
+						})
+
+						// Continue the loop to retry with the clarification
+						stack.push({
+							userContent: [...this.userMessageContent],
+							includeFileDetails: false,
+						})
+						continue
+					} else {
+						// For other models, log a more informative error
+						await this.say(
+							"error",
+							"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output. Consider checking your API configuration or trying a different model.",
+						)
+
+						await this.addToApiConversationHistory({
+							role: "assistant",
+							content: [{ type: "text", text: "Failure: I did not provide a response." }],
+						})
+					}
 				}
 
 				// If we reach here without continuing, return false (will always be false for now)