feat: add thinking/reasoning data support to LiteLLM provider

roomote · roomote · commit dab8d4c9b8fa · 2025-10-03T20:16:53.000Z
- Handle reasoning, thinking, and reasoning_content fields in streaming responses - Pass through thinking data from underlying models as reasoning chunks - Add comprehensive test coverage for all thinking data scenarios - Fix handling of chunks without choices array Fixes #8497
diff --git a/src/api/providers/__tests__/lite-llm.spec.ts b/src/api/providers/__tests__/lite-llm.spec.ts
@@ -387,4 +387,245 @@ describe("LiteLLMHandler", () => {
 			expect(createCall.max_completion_tokens).toBeUndefined()
 		})
 	})
+
+	describe("thinking/reasoning data handling", () => {
+		beforeEach(() => {
+			// Ensure handler is properly initialized for each test
+			vi.clearAllMocks()
+			handler = new LiteLLMHandler(mockOptions)
+		})
+
+		it("should handle reasoning field in delta", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
+
+			// Mock the stream response with reasoning content
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { reasoning: "Let me think about this..." } }],
+					}
+					yield {
+						choices: [{ delta: { content: "The answer is 42" } }],
+					}
+					yield {
+						usage: {
+							prompt_tokens: 10,
+							completion_tokens: 5,
+						},
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const chunk of generator) {
+				results.push(chunk)
+			}
+
+			// Verify reasoning chunk was yielded
+			expect(results[0]).toEqual({
+				type: "reasoning",
+				text: "Let me think about this...",
+			})
+			expect(results[1]).toEqual({
+				type: "text",
+				text: "The answer is 42",
+			})
+		})
+
+		it("should handle thinking field in delta", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
+
+			// Mock the stream response with thinking content
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { thinking: "Processing the request..." } }],
+					}
+					yield {
+						choices: [{ delta: { content: "Here's the solution" } }],
+					}
+					yield {
+						usage: {
+							prompt_tokens: 10,
+							completion_tokens: 5,
+						},
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const chunk of generator) {
+				results.push(chunk)
+			}
+
+			// Verify thinking chunk was yielded as reasoning
+			expect(results[0]).toEqual({
+				type: "reasoning",
+				text: "Processing the request...",
+			})
+			expect(results[1]).toEqual({
+				type: "text",
+				text: "Here's the solution",
+			})
+		})
+
+		it("should handle reasoning_content field in delta", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
+
+			// Mock the stream response with reasoning_content
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { reasoning_content: "Analyzing the problem..." } }],
+					}
+					yield {
+						choices: [{ delta: { content: "Solution found" } }],
+					}
+					yield {
+						usage: {
+							prompt_tokens: 10,
+							completion_tokens: 5,
+						},
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const chunk of generator) {
+				results.push(chunk)
+			}
+
+			// Verify reasoning_content chunk was yielded as reasoning
+			expect(results[0]).toEqual({
+				type: "reasoning",
+				text: "Analyzing the problem...",
+			})
+			expect(results[1]).toEqual({
+				type: "text",
+				text: "Solution found",
+			})
+		})
+
+		it("should handle mixed reasoning and text content", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Complex question" }]
+
+			// Mock the stream response with mixed content
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { reasoning: "First, let me understand..." } }],
+					}
+					yield {
+						choices: [{ delta: { content: "Based on my analysis" } }],
+					}
+					yield {
+						choices: [{ delta: { thinking: "Considering alternatives..." } }],
+					}
+					yield {
+						choices: [{ delta: { content: ", the answer is clear." } }],
+					}
+					yield {
+						usage: {
+							prompt_tokens: 15,
+							completion_tokens: 10,
+						},
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const chunk of generator) {
+				results.push(chunk)
+			}
+
+			// Verify all chunks were yielded in correct order
+			expect(results[0]).toEqual({
+				type: "reasoning",
+				text: "First, let me understand...",
+			})
+			expect(results[1]).toEqual({
+				type: "text",
+				text: "Based on my analysis",
+			})
+			expect(results[2]).toEqual({
+				type: "reasoning",
+				text: "Considering alternatives...",
+			})
+			expect(results[3]).toEqual({
+				type: "text",
+				text: ", the answer is clear.",
+			})
+		})
+
+		it("should ignore non-string reasoning fields", async () => {
+			const systemPrompt = "You are a helpful assistant"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }]
+
+			// Mock the stream response with invalid reasoning types
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						choices: [{ delta: { reasoning: null } }],
+					}
+					yield {
+						choices: [{ delta: { thinking: 123 } }],
+					}
+					yield {
+						choices: [{ delta: { reasoning_content: { nested: "object" } } }],
+					}
+					yield {
+						choices: [{ delta: { content: "Valid response" } }],
+					}
+					yield {
+						usage: {
+							prompt_tokens: 5,
+							completion_tokens: 3,
+						},
+					}
+				},
+			}
+
+			mockCreate.mockReturnValue({
+				withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
+			})
+
+			const generator = handler.createMessage(systemPrompt, messages)
+			const results = []
+			for await (const chunk of generator) {
+				results.push(chunk)
+			}
+
+			// Should only have the valid text content
+			const contentChunks = results.filter((r) => r.type === "text" || r.type === "reasoning")
+			expect(contentChunks).toHaveLength(1)
+			expect(contentChunks[0]).toEqual({
+				type: "text",
+				text: "Valid response",
+			})
+		})
+	})
 })
diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts
@@ -142,11 +142,34 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
 			let lastUsage
 
 			for await (const chunk of completion) {
-				const delta = chunk.choices[0]?.delta
+				// Handle chunks that might not have choices array (e.g., usage-only chunks)
+				const delta = chunk.choices?.[0]?.delta
 				const usage = chunk.usage as LiteLLMUsage
 
-				if (delta?.content) {
-					yield { type: "text", text: delta.content }
+				// Check for reasoning/thinking content in the delta
+				// LiteLLM may pass through reasoning content from underlying models
+				if (delta) {
+					if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
+						yield { type: "reasoning", text: delta.reasoning }
+					}
+
+					// Also check for thinking content (alternative field name)
+					if ("thinking" in delta && delta.thinking && typeof delta.thinking === "string") {
+						yield { type: "reasoning", text: delta.thinking }
+					}
+
+					// Check for reasoning_content (another possible field name)
+					if (
+						"reasoning_content" in delta &&
+						delta.reasoning_content &&
+						typeof delta.reasoning_content === "string"
+					) {
+						yield { type: "reasoning", text: delta.reasoning_content }
+					}
+
+					if (delta.content) {
+						yield { type: "text", text: delta.content }
+					}
 				}
 
 				if (usage) {