diff --git a/src/api/providers/__tests__/glm-empty-stream.spec.ts b/src/api/providers/__tests__/glm-empty-stream.spec.ts new file mode 100644 index 0000000000..ba17c6df07 --- /dev/null +++ b/src/api/providers/__tests__/glm-empty-stream.spec.ts @@ -0,0 +1,193 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { OpenAiHandler } from "../openai" +import { BaseOpenAiCompatibleProvider } from "../base-openai-compatible-provider" + +describe("GLM Empty Stream Handling", () => { + describe("OpenAiHandler", () => { + it("should provide fallback response for GLM models with empty streams", async () => { + const mockClient = { + chat: { + completions: { + create: vi.fn().mockImplementation(async function* () { + // Simulate empty stream - only usage, no content + yield { + choices: [{ delta: {} }], + usage: { + prompt_tokens: 100, + completion_tokens: 0, + }, + } + }), + }, + }, + } + + const handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "glm-4.6", + openAiStreamingEnabled: true, + }) + + // Replace the client with our mock + ;(handler as any).client = mockClient + + const chunks = [] + const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have a fallback text response + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toContain("trouble generating a response") + + // Should still have usage metrics + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + }) + + it("should not provide fallback for non-GLM models with empty streams", async () => { + const mockClient = { + chat: { + completions: { + create: vi.fn().mockImplementation(async function* () { + // Simulate empty stream - only usage, no content + yield { + choices: [{ delta: {} }], + usage: { + prompt_tokens: 100, + completion_tokens: 0, + }, + } + }), + }, + }, + } + + const handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-4", + openAiStreamingEnabled: true, + }) + + // Replace the client with our mock + ;(handler as any).client = mockClient + + const chunks = [] + const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should NOT have a fallback text response for non-GLM models + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(0) + + // Should still have usage metrics + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + }) + }) + + describe("BaseOpenAiCompatibleProvider", () => { + class TestProvider extends BaseOpenAiCompatibleProvider<"glm-4.6" | "other-model"> { + constructor(modelId: "glm-4.6" | "other-model") { + super({ + providerName: "Test", + baseURL: "https://test.com", + apiKey: "test-key", + defaultProviderModelId: modelId, + providerModels: { + "glm-4.6": { + maxTokens: 4096, + contextWindow: 8192, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + "other-model": { + maxTokens: 4096, + contextWindow: 8192, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + }, + }, + apiModelId: modelId, + }) + } + } + + it("should provide fallback response for GLM models with empty streams", async () => { + const provider = new TestProvider("glm-4.6") + + // Mock the client + const mockClient = { + chat: { + completions: { + create: vi.fn().mockImplementation(async function* () { + // Simulate empty stream + yield { + choices: [{ delta: {} }], + usage: { + prompt_tokens: 100, + completion_tokens: 0, + }, + } + }), + }, + }, + } + ;(provider as any).client = mockClient + + const chunks = [] + const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should have a fallback text response + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toContain("trouble generating a response") + }) + + it("should not provide fallback for non-GLM models", async () => { + const provider = new TestProvider("other-model") + + // Mock the client + const mockClient = { + chat: { + completions: { + create: vi.fn().mockImplementation(async function* () { + // Simulate empty stream + yield { + choices: [{ delta: {} }], + usage: { + prompt_tokens: 100, + completion_tokens: 0, + }, + } + }), + }, + }, + } + ;(provider as any).client = mockClient + + const chunks = [] + const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should NOT have a fallback text response + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(0) + }) + }) +}) diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fb6c5d0377..0f881d03b1 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -98,11 +98,14 @@ export abstract class BaseOpenAiCompatibleProvider metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { const stream = await this.createStream(systemPrompt, messages, metadata) + let hasContent = false + const modelId = this.getModel().id for await (const chunk of stream) { const delta = chunk.choices[0]?.delta if (delta?.content) { + hasContent = true yield { type: "text", text: delta.content, @@ -117,6 +120,18 @@ export abstract class BaseOpenAiCompatibleProvider } } } + + // For GLM models that may return empty streams, provide a fallback response + if ( + !hasContent && + modelId && + (modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm")) + ) { + yield { + type: "text", + text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.", + } + } } async completePrompt(prompt: string): Promise { diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index aebe671712..8b35f293b5 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -189,17 +189,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ) let lastUsage + let hasContent = false for await (const chunk of stream) { const delta = chunk.choices[0]?.delta ?? {} if (delta.content) { + hasContent = true for (const chunk of matcher.update(delta.content)) { yield chunk } } if ("reasoning_content" in delta && delta.reasoning_content) { + hasContent = true yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "", @@ -211,9 +214,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } for (const chunk of matcher.final()) { + hasContent = true yield chunk } + // For GLM models that may return empty streams, provide a fallback response + if ( + !hasContent && + modelId && + (modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm")) + ) { + yield { + type: "text", + text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.", + } + } + if (lastUsage) { yield this.processUsageMetrics(lastUsage, modelInfo) } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 851df91e6c..a1fc8519c1 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2339,17 +2339,55 @@ export class Task extends EventEmitter implements TaskLike { continue } else { // If there's no assistant_responses, that means we got no text - // or tool_use content blocks from API which we should assume is - // an error. - await this.say( - "error", - "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.", - ) + // or tool_use content blocks from API. This can happen with some models + // like GLM-4.6 that may return empty streams occasionally. + // Instead of treating this as a fatal error, we'll handle it gracefully. + const modelId = this.api.getModel().id + const isGLMModel = + modelId && (modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm")) + + if (isGLMModel) { + // For GLM models, treat empty response as a temporary issue and retry + await this.say( + "error", + "The GLM model returned an empty response. This can happen occasionally with GLM models. Retrying with a clarification request...", + ) - await this.addToApiConversationHistory({ - role: "assistant", - content: [{ type: "text", text: "Failure: I did not provide a response." }], - }) + // Add a minimal assistant response to maintain conversation flow + await this.addToApiConversationHistory({ + role: "assistant", + content: [ + { + type: "text", + text: "I encountered an issue generating a response. Let me try again.", + }, + ], + }) + + // Add a user message prompting the model to respond + this.userMessageContent.push({ + type: "text", + text: "Please provide a response to the previous request. If you're having trouble, break down the task into smaller steps.", + }) + + // Continue the loop to retry with the clarification + stack.push({ + userContent: [...this.userMessageContent], + includeFileDetails: false, + }) + continue + } else { + // For other models, log a more informative error + await this.say( + "error", + "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output. Consider checking your API configuration or trying a different model.", + ) + + await this.addToApiConversationHistory({ + role: "assistant", + content: [{ type: "text", text: "Failure: I did not provide a response." }], + }) + } } // If we reach here without continuing, return false (will always be false for now)