fix: handle Gemini thinking-only responses to prevent empty assistant message error

roomote · roomote · commit f32a78d17242 · 2025-08-12T13:19:50.000Z
- Add tracking for whether any actual content (not just reasoning) was yielded in Gemini handler - Yield empty text chunk if only reasoning content was provided to ensure assistantMessage is not empty - Improve error message in Task.ts to be more informative for Gemini-specific issues - Add comprehensive tests for thinking-only response scenarios Fixes #6986
diff --git a/src/api/providers/__tests__/gemini-thinking-only.spec.ts b/src/api/providers/__tests__/gemini-thinking-only.spec.ts
@@ -0,0 +1,244 @@
+// npx vitest run src/api/providers/__tests__/gemini-thinking-only.spec.ts
+
+import { describe, it, expect, vi, beforeEach } from "vitest"
+import { GeminiHandler } from "../gemini"
+import type { ApiHandlerOptions } from "../../../shared/api"
+
+describe("GeminiHandler - Thinking-only responses", () => {
+	let handler: GeminiHandler
+	let mockClient: any
+
+	beforeEach(() => {
+		// Create a mock client
+		mockClient = {
+			models: {
+				generateContentStream: vi.fn(),
+			},
+		}
+
+		// Create handler with mocked client
+		handler = new GeminiHandler({
+			apiProvider: "gemini",
+			geminiApiKey: "test-key",
+			apiModelId: "gemini-2.5-pro",
+		} as ApiHandlerOptions)
+
+		// Replace the client with our mock
+		;(handler as any).client = mockClient
+	})
+
+	it("should yield empty text when only reasoning content is provided", async () => {
+		// Mock a stream that only contains reasoning/thinking content
+		const mockStream = {
+			async *[Symbol.asyncIterator]() {
+				// First chunk with only thinking content
+				yield {
+					candidates: [
+						{
+							content: {
+								parts: [
+									{
+										thought: true,
+										text: "Let me think about this problem...",
+									},
+								],
+							},
+						},
+					],
+				}
+
+				// Second chunk with more thinking
+				yield {
+					candidates: [
+						{
+							content: {
+								parts: [
+									{
+										thought: true,
+										text: "I need to consider the tool usage...",
+									},
+								],
+							},
+						},
+					],
+				}
+
+				// Final chunk with usage metadata but no actual content
+				yield {
+					usageMetadata: {
+						promptTokenCount: 100,
+						candidatesTokenCount: 50,
+						thoughtsTokenCount: 30,
+					},
+				}
+			},
+		}
+
+		mockClient.models.generateContentStream.mockResolvedValue(mockStream)
+
+		// Collect all chunks from the stream
+		const chunks: any[] = []
+		const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])
+
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Verify we got reasoning chunks
+		const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
+		expect(reasoningChunks).toHaveLength(2)
+		expect(reasoningChunks[0].text).toBe("Let me think about this problem...")
+		expect(reasoningChunks[1].text).toBe("I need to consider the tool usage...")
+
+		// Verify we got at least one text chunk (even if empty) to prevent the error
+		const textChunks = chunks.filter((c) => c.type === "text")
+		expect(textChunks).toHaveLength(1)
+		expect(textChunks[0].text).toBe("")
+
+		// Verify we got usage metadata
+		const usageChunks = chunks.filter((c) => c.type === "usage")
+		expect(usageChunks).toHaveLength(1)
+		expect(usageChunks[0].inputTokens).toBe(100)
+		expect(usageChunks[0].outputTokens).toBe(50)
+	})
+
+	it("should not add empty text when actual content is provided", async () => {
+		// Mock a stream that contains both reasoning and actual content
+		const mockStream = {
+			async *[Symbol.asyncIterator]() {
+				// First chunk with thinking
+				yield {
+					candidates: [
+						{
+							content: {
+								parts: [
+									{
+										thought: true,
+										text: "Thinking about the response...",
+									},
+								],
+							},
+						},
+					],
+				}
+
+				// Second chunk with actual content
+				yield {
+					candidates: [
+						{
+							content: {
+								parts: [
+									{
+										text: "Here is my actual response.",
+									},
+								],
+							},
+						},
+					],
+				}
+
+				// Usage metadata
+				yield {
+					usageMetadata: {
+						promptTokenCount: 100,
+						candidatesTokenCount: 50,
+					},
+				}
+			},
+		}
+
+		mockClient.models.generateContentStream.mockResolvedValue(mockStream)
+
+		// Collect all chunks from the stream
+		const chunks: any[] = []
+		const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])
+
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Verify we got reasoning chunk
+		const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
+		expect(reasoningChunks).toHaveLength(1)
+
+		// Verify we got actual text content (not empty)
+		const textChunks = chunks.filter((c) => c.type === "text")
+		expect(textChunks).toHaveLength(1)
+		expect(textChunks[0].text).toBe("Here is my actual response.")
+
+		// Should NOT have an additional empty text chunk
+		const emptyTextChunks = textChunks.filter((c) => c.text === "")
+		expect(emptyTextChunks).toHaveLength(0)
+	})
+
+	it("should handle mixed thinking and content in same part", async () => {
+		// Mock a stream with mixed content
+		const mockStream = {
+			async *[Symbol.asyncIterator]() {
+				yield {
+					candidates: [
+						{
+							content: {
+								parts: [
+									{
+										thought: true,
+										text: "Analyzing the request...",
+									},
+									{
+										text: "I'll help you with that.",
+									},
+									{
+										thought: true,
+										text: "Considering tool usage...",
+									},
+								],
+							},
+						},
+					],
+				}
+			},
+		}
+
+		mockClient.models.generateContentStream.mockResolvedValue(mockStream)
+
+		// Collect all chunks from the stream
+		const chunks: any[] = []
+		const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])
+
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Verify we got both reasoning and text chunks
+		const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
+		expect(reasoningChunks).toHaveLength(2)
+
+		const textChunks = chunks.filter((c) => c.type === "text")
+		expect(textChunks).toHaveLength(1)
+		expect(textChunks[0].text).toBe("I'll help you with that.")
+	})
+
+	it("should handle empty stream gracefully", async () => {
+		// Mock an empty stream
+		const mockStream = {
+			async *[Symbol.asyncIterator]() {
+				// Yield nothing
+			},
+		}
+
+		mockClient.models.generateContentStream.mockResolvedValue(mockStream)
+
+		// Collect all chunks from the stream
+		const chunks: any[] = []
+		const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])
+
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should yield at least an empty text chunk to prevent errors
+		const textChunks = chunks.filter((c) => c.type === "text")
+		expect(textChunks).toHaveLength(1)
+		expect(textChunks[0].text).toBe("")
+	})
+})
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
@@ -94,6 +94,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 
 			let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined
 			let pendingGroundingMetadata: GroundingMetadata | undefined
+			let hasYieldedContent = false // Track if we've yielded any actual content
 
 			for await (const chunk of result) {
 				// Process candidates and their parts to separate thoughts from content
@@ -115,6 +116,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 								// This is regular content
 								if (part.text) {
 									yield { type: "text", text: part.text }
+									hasYieldedContent = true
 								}
 							}
 						}
@@ -124,13 +126,20 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 				// Fallback to the original text property if no candidates structure
 				else if (chunk.text) {
 					yield { type: "text", text: chunk.text }
+					hasYieldedContent = true
 				}
 
 				if (chunk.usageMetadata) {
 					lastUsageMetadata = chunk.usageMetadata
 				}
 			}
 
+			// If we only got reasoning content and no actual text, yield an empty text chunk
+			// This ensures the assistant message won't be empty
+			if (!hasYieldedContent) {
+				yield { type: "text", text: "" }
+			}
+
 			if (pendingGroundingMetadata) {
 				const citations = this.extractCitationsOnly(pendingGroundingMetadata)
 				if (citations) {
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -2019,10 +2019,19 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				// If there's no assistant_responses, that means we got no text
 				// or tool_use content blocks from API which we should assume is
 				// an error.
-				await this.say(
-					"error",
-					"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.",
-				)
+				const modelId = getModelId(this.apiConfiguration)
+				const isGeminiModel = modelId?.includes("gemini") ?? false
+
+				let errorMessage = "Unexpected API Response: The language model did not provide any assistant messages."
+
+				if (isGeminiModel) {
+					errorMessage +=
+						" This can occur with Gemini models when they are in 'thinking' mode but don't produce any actual response content. The model may need to be prompted again or the request may need to be retried."
+				} else {
+					errorMessage += " This may indicate an issue with the API or the model's output."
+				}
+
+				await this.say("error", errorMessage)
 
 				await this.addToApiConversationHistory({
 					role: "assistant",

Original file line number	Diff line number	Diff line change
`@@ -94,6 +94,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl`
`94`	`94`
`95`	`95`	`let lastUsageMetadata: GenerateContentResponseUsageMetadata \| undefined`
`96`	`96`	`let pendingGroundingMetadata: GroundingMetadata \| undefined`
	`97`	`+ let hasYieldedContent = false // Track if we've yielded any actual content`
`97`	`98`
`98`	`99`	`for await (const chunk of result) {`
`99`	`100`	`// Process candidates and their parts to separate thoughts from content`
`@@ -115,6 +116,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl`
`115`	`116`	`// This is regular content`
`116`	`117`	`if (part.text) {`
`117`	`118`	`yield { type: "text", text: part.text }`
	`119`	`+ hasYieldedContent = true`
`118`	`120`	`}`
`119`	`121`	`}`
`120`	`122`	`}`
`@@ -124,13 +126,20 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl`
`124`	`126`	`// Fallback to the original text property if no candidates structure`
`125`	`127`	`else if (chunk.text) {`
`126`	`128`	`yield { type: "text", text: chunk.text }`
	`129`	`+ hasYieldedContent = true`
`127`	`130`	`}`
`128`	`131`
`129`	`132`	`if (chunk.usageMetadata) {`
`130`	`133`	`lastUsageMetadata = chunk.usageMetadata`
`131`	`134`	`}`
`132`	`135`	`}`
`133`	`136`
	`137`	`+ // If we only got reasoning content and no actual text, yield an empty text chunk`
	`138`	`+ // This ensures the assistant message won't be empty`
	`139`	`+ if (!hasYieldedContent) {`
	`140`	`+ yield { type: "text", text: "" }`
	`141`	`+ }`
	`142`	`+`
`134`	`143`	`if (pendingGroundingMetadata) {`
`135`	`144`	`const citations = this.extractCitationsOnly(pendingGroundingMetadata)`
`136`	`145`	`if (citations) {`