diff --git a/src/api/providers/__tests__/gemini-thinking-only.spec.ts b/src/api/providers/__tests__/gemini-thinking-only.spec.ts new file mode 100644 index 00000000000..6d25a1a2a41 --- /dev/null +++ b/src/api/providers/__tests__/gemini-thinking-only.spec.ts @@ -0,0 +1,244 @@ +// npx vitest run src/api/providers/__tests__/gemini-thinking-only.spec.ts + +import { describe, it, expect, vi, beforeEach } from "vitest" +import { GeminiHandler } from "../gemini" +import type { ApiHandlerOptions } from "../../../shared/api" + +describe("GeminiHandler - Thinking-only responses", () => { + let handler: GeminiHandler + let mockClient: any + + beforeEach(() => { + // Create a mock client + mockClient = { + models: { + generateContentStream: vi.fn(), + }, + } + + // Create handler with mocked client + handler = new GeminiHandler({ + apiProvider: "gemini", + geminiApiKey: "test-key", + apiModelId: "gemini-2.5-pro", + } as ApiHandlerOptions) + + // Replace the client with our mock + ;(handler as any).client = mockClient + }) + + it("should yield empty text when only reasoning content is provided", async () => { + // Mock a stream that only contains reasoning/thinking content + const mockStream = { + async *[Symbol.asyncIterator]() { + // First chunk with only thinking content + yield { + candidates: [ + { + content: { + parts: [ + { + thought: true, + text: "Let me think about this problem...", + }, + ], + }, + }, + ], + } + + // Second chunk with more thinking + yield { + candidates: [ + { + content: { + parts: [ + { + thought: true, + text: "I need to consider the tool usage...", + }, + ], + }, + }, + ], + } + + // Final chunk with usage metadata but no actual content + yield { + usageMetadata: { + promptTokenCount: 100, + candidatesTokenCount: 50, + thoughtsTokenCount: 30, + }, + } + }, + } + + mockClient.models.generateContentStream.mockResolvedValue(mockStream) + + // Collect all chunks from the stream + const chunks: any[] = [] + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify we got reasoning chunks + const reasoningChunks = chunks.filter((c) => c.type === "reasoning") + expect(reasoningChunks).toHaveLength(2) + expect(reasoningChunks[0].text).toBe("Let me think about this problem...") + expect(reasoningChunks[1].text).toBe("I need to consider the tool usage...") + + // Verify we got at least one text chunk (even if empty) to prevent the error + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("") + + // Verify we got usage metadata + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks).toHaveLength(1) + expect(usageChunks[0].inputTokens).toBe(100) + expect(usageChunks[0].outputTokens).toBe(50) + }) + + it("should not add empty text when actual content is provided", async () => { + // Mock a stream that contains both reasoning and actual content + const mockStream = { + async *[Symbol.asyncIterator]() { + // First chunk with thinking + yield { + candidates: [ + { + content: { + parts: [ + { + thought: true, + text: "Thinking about the response...", + }, + ], + }, + }, + ], + } + + // Second chunk with actual content + yield { + candidates: [ + { + content: { + parts: [ + { + text: "Here is my actual response.", + }, + ], + }, + }, + ], + } + + // Usage metadata + yield { + usageMetadata: { + promptTokenCount: 100, + candidatesTokenCount: 50, + }, + } + }, + } + + mockClient.models.generateContentStream.mockResolvedValue(mockStream) + + // Collect all chunks from the stream + const chunks: any[] = [] + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify we got reasoning chunk + const reasoningChunks = chunks.filter((c) => c.type === "reasoning") + expect(reasoningChunks).toHaveLength(1) + + // Verify we got actual text content (not empty) + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("Here is my actual response.") + + // Should NOT have an additional empty text chunk + const emptyTextChunks = textChunks.filter((c) => c.text === "") + expect(emptyTextChunks).toHaveLength(0) + }) + + it("should handle mixed thinking and content in same part", async () => { + // Mock a stream with mixed content + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + candidates: [ + { + content: { + parts: [ + { + thought: true, + text: "Analyzing the request...", + }, + { + text: "I'll help you with that.", + }, + { + thought: true, + text: "Considering tool usage...", + }, + ], + }, + }, + ], + } + }, + } + + mockClient.models.generateContentStream.mockResolvedValue(mockStream) + + // Collect all chunks from the stream + const chunks: any[] = [] + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Verify we got both reasoning and text chunks + const reasoningChunks = chunks.filter((c) => c.type === "reasoning") + expect(reasoningChunks).toHaveLength(2) + + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("I'll help you with that.") + }) + + it("should handle empty stream gracefully", async () => { + // Mock an empty stream + const mockStream = { + async *[Symbol.asyncIterator]() { + // Yield nothing + }, + } + + mockClient.models.generateContentStream.mockResolvedValue(mockStream) + + // Collect all chunks from the stream + const chunks: any[] = [] + const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }]) + + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Should yield at least an empty text chunk to prevent errors + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks).toHaveLength(1) + expect(textChunks[0].text).toBe("") + }) +}) diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 5e547edbdc6..c2698b5e1b0 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -94,6 +94,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined let pendingGroundingMetadata: GroundingMetadata | undefined + let hasYieldedContent = false // Track if we've yielded any actual content for await (const chunk of result) { // Process candidates and their parts to separate thoughts from content @@ -115,6 +116,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl // This is regular content if (part.text) { yield { type: "text", text: part.text } + hasYieldedContent = true } } } @@ -124,6 +126,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl // Fallback to the original text property if no candidates structure else if (chunk.text) { yield { type: "text", text: chunk.text } + hasYieldedContent = true } if (chunk.usageMetadata) { @@ -131,6 +134,12 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl } } + // If we only got reasoning content and no actual text, yield an empty text chunk + // This ensures the assistant message won't be empty + if (!hasYieldedContent) { + yield { type: "text", text: "" } + } + if (pendingGroundingMetadata) { const citations = this.extractCitationsOnly(pendingGroundingMetadata) if (citations) { diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index cb6694b7f04..43eab340f57 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2019,10 +2019,19 @@ export class Task extends EventEmitter implements TaskLike { // If there's no assistant_responses, that means we got no text // or tool_use content blocks from API which we should assume is // an error. - await this.say( - "error", - "Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.", - ) + const modelId = getModelId(this.apiConfiguration) + const isGeminiModel = modelId?.includes("gemini") ?? false + + let errorMessage = "Unexpected API Response: The language model did not provide any assistant messages." + + if (isGeminiModel) { + errorMessage += + " This can occur with Gemini models when they are in 'thinking' mode but don't produce any actual response content. The model may need to be prompted again or the request may need to be retried." + } else { + errorMessage += " This may indicate an issue with the API or the model's output." + } + + await this.say("error", errorMessage) await this.addToApiConversationHistory({ role: "assistant",