Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 244 additions & 0 deletions src/api/providers/__tests__/gemini-thinking-only.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
// npx vitest run src/api/providers/__tests__/gemini-thinking-only.spec.ts
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice comprehensive test coverage for the thinking-only scenarios! Consider whether these tests should be integrated into the main gemini.spec.ts file for better organization, or if keeping them separate is intentional for clarity. Both approaches have merit - separate files make the specific issue easier to find, while integration keeps all Gemini tests together.


import { describe, it, expect, vi, beforeEach } from "vitest"
import { GeminiHandler } from "../gemini"
import type { ApiHandlerOptions } from "../../../shared/api"

describe("GeminiHandler - Thinking-only responses", () => {
let handler: GeminiHandler
let mockClient: any

beforeEach(() => {
// Create a mock client
mockClient = {
models: {
generateContentStream: vi.fn(),
},
}

// Create handler with mocked client
handler = new GeminiHandler({
apiProvider: "gemini",
geminiApiKey: "test-key",
apiModelId: "gemini-2.5-pro",
} as ApiHandlerOptions)

// Replace the client with our mock
;(handler as any).client = mockClient
})

it("should yield empty text when only reasoning content is provided", async () => {
// Mock a stream that only contains reasoning/thinking content
const mockStream = {
async *[Symbol.asyncIterator]() {
// First chunk with only thinking content
yield {
candidates: [
{
content: {
parts: [
{
thought: true,
text: "Let me think about this problem...",
},
],
},
},
],
}

// Second chunk with more thinking
yield {
candidates: [
{
content: {
parts: [
{
thought: true,
text: "I need to consider the tool usage...",
},
],
},
},
],
}

// Final chunk with usage metadata but no actual content
yield {
usageMetadata: {
promptTokenCount: 100,
candidatesTokenCount: 50,
thoughtsTokenCount: 30,
},
}
},
}

mockClient.models.generateContentStream.mockResolvedValue(mockStream)

// Collect all chunks from the stream
const chunks: any[] = []
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify we got reasoning chunks
const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
expect(reasoningChunks).toHaveLength(2)
expect(reasoningChunks[0].text).toBe("Let me think about this problem...")
expect(reasoningChunks[1].text).toBe("I need to consider the tool usage...")

// Verify we got at least one text chunk (even if empty) to prevent the error
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("")

// Verify we got usage metadata
const usageChunks = chunks.filter((c) => c.type === "usage")
expect(usageChunks).toHaveLength(1)
expect(usageChunks[0].inputTokens).toBe(100)
expect(usageChunks[0].outputTokens).toBe(50)
})

it("should not add empty text when actual content is provided", async () => {
// Mock a stream that contains both reasoning and actual content
const mockStream = {
async *[Symbol.asyncIterator]() {
// First chunk with thinking
yield {
candidates: [
{
content: {
parts: [
{
thought: true,
text: "Thinking about the response...",
},
],
},
},
],
}

// Second chunk with actual content
yield {
candidates: [
{
content: {
parts: [
{
text: "Here is my actual response.",
},
],
},
},
],
}

// Usage metadata
yield {
usageMetadata: {
promptTokenCount: 100,
candidatesTokenCount: 50,
},
}
},
}

mockClient.models.generateContentStream.mockResolvedValue(mockStream)

// Collect all chunks from the stream
const chunks: any[] = []
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify we got reasoning chunk
const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
expect(reasoningChunks).toHaveLength(1)

// Verify we got actual text content (not empty)
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("Here is my actual response.")

// Should NOT have an additional empty text chunk
const emptyTextChunks = textChunks.filter((c) => c.text === "")
expect(emptyTextChunks).toHaveLength(0)
})

it("should handle mixed thinking and content in same part", async () => {
// Mock a stream with mixed content
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
candidates: [
{
content: {
parts: [
{
thought: true,
text: "Analyzing the request...",
},
{
text: "I'll help you with that.",
},
{
thought: true,
text: "Considering tool usage...",
},
],
},
},
],
}
},
}

mockClient.models.generateContentStream.mockResolvedValue(mockStream)

// Collect all chunks from the stream
const chunks: any[] = []
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify we got both reasoning and text chunks
const reasoningChunks = chunks.filter((c) => c.type === "reasoning")
expect(reasoningChunks).toHaveLength(2)

const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("I'll help you with that.")
})

it("should handle empty stream gracefully", async () => {
// Mock an empty stream
const mockStream = {
async *[Symbol.asyncIterator]() {
// Yield nothing
},
}

mockClient.models.generateContentStream.mockResolvedValue(mockStream)

// Collect all chunks from the stream
const chunks: any[] = []
const stream = handler.createMessage("System prompt", [{ role: "user", content: "Test message" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Should yield at least an empty text chunk to prevent errors
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toBe("")
})
})
9 changes: 9 additions & 0 deletions src/api/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl

let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined
let pendingGroundingMetadata: GroundingMetadata | undefined
let hasYieldedContent = false // Track if we've yielded any actual content

for await (const chunk of result) {
// Process candidates and their parts to separate thoughts from content
Expand All @@ -115,6 +116,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
// This is regular content
if (part.text) {
yield { type: "text", text: part.text }
hasYieldedContent = true
}
}
}
Expand All @@ -124,13 +126,20 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
// Fallback to the original text property if no candidates structure
else if (chunk.text) {
yield { type: "text", text: chunk.text }
hasYieldedContent = true
}

if (chunk.usageMetadata) {
lastUsageMetadata = chunk.usageMetadata
}
}

// If we only got reasoning content and no actual text, yield an empty text chunk
// This ensures the assistant message won't be empty
if (!hasYieldedContent) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a comment here explaining why we yield an empty text chunk? Something like:

Suggested change
if (!hasYieldedContent) {
// If we only got reasoning content and no actual text, yield an empty text chunk
// This ensures the assistant message won't be empty and prevents the
// "language model did not provide any assistant messages" error
if (!hasYieldedContent) {
yield { type: "text", text: "" }
}

This would help future maintainers understand the reasoning behind this fix.

yield { type: "text", text: "" }
}

if (pendingGroundingMetadata) {
const citations = this.extractCitationsOnly(pendingGroundingMetadata)
if (citations) {
Expand Down
17 changes: 13 additions & 4 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2019,10 +2019,19 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
// If there's no assistant_responses, that means we got no text
// or tool_use content blocks from API which we should assume is
// an error.
await this.say(
"error",
"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.",
)
const modelId = getModelId(this.apiConfiguration)
const isGeminiModel = modelId?.includes("gemini") ?? false

let errorMessage = "Unexpected API Response: The language model did not provide any assistant messages."

if (isGeminiModel) {
errorMessage +=
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enhanced error message is helpful! Could we make it even more specific by mentioning this often happens with tool function calls? Something like:

Suggested change
errorMessage +=
if (isGeminiModel) {
errorMessage +=
" This can occur with Gemini models when they are in 'thinking' mode but don't produce any actual response content, often during tool function calls. The model may need to be prompted again or the request may need to be retried."
} else {

" This can occur with Gemini models when they are in 'thinking' mode but don't produce any actual response content. The model may need to be prompted again or the request may need to be retried."
} else {
errorMessage += " This may indicate an issue with the API or the model's output."
}

await this.say("error", errorMessage)

await this.addToApiConversationHistory({
role: "assistant",
Expand Down
Loading