Skip to content

Commit dab8d4c

Browse files
committed
feat: add thinking/reasoning data support to LiteLLM provider
- Handle reasoning, thinking, and reasoning_content fields in streaming responses - Pass through thinking data from underlying models as reasoning chunks - Add comprehensive test coverage for all thinking data scenarios - Fix handling of chunks without choices array Fixes #8497
1 parent 97f9686 commit dab8d4c

File tree

2 files changed

+267
-3
lines changed

2 files changed

+267
-3
lines changed

src/api/providers/__tests__/lite-llm.spec.ts

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,4 +387,245 @@ describe("LiteLLMHandler", () => {
387387
expect(createCall.max_completion_tokens).toBeUndefined()
388388
})
389389
})
390+
391+
describe("thinking/reasoning data handling", () => {
392+
beforeEach(() => {
393+
// Ensure handler is properly initialized for each test
394+
vi.clearAllMocks()
395+
handler = new LiteLLMHandler(mockOptions)
396+
})
397+
398+
it("should handle reasoning field in delta", async () => {
399+
const systemPrompt = "You are a helpful assistant"
400+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
401+
402+
// Mock the stream response with reasoning content
403+
const mockStream = {
404+
async *[Symbol.asyncIterator]() {
405+
yield {
406+
choices: [{ delta: { reasoning: "Let me think about this..." } }],
407+
}
408+
yield {
409+
choices: [{ delta: { content: "The answer is 42" } }],
410+
}
411+
yield {
412+
usage: {
413+
prompt_tokens: 10,
414+
completion_tokens: 5,
415+
},
416+
}
417+
},
418+
}
419+
420+
mockCreate.mockReturnValue({
421+
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
422+
})
423+
424+
const generator = handler.createMessage(systemPrompt, messages)
425+
const results = []
426+
for await (const chunk of generator) {
427+
results.push(chunk)
428+
}
429+
430+
// Verify reasoning chunk was yielded
431+
expect(results[0]).toEqual({
432+
type: "reasoning",
433+
text: "Let me think about this...",
434+
})
435+
expect(results[1]).toEqual({
436+
type: "text",
437+
text: "The answer is 42",
438+
})
439+
})
440+
441+
it("should handle thinking field in delta", async () => {
442+
const systemPrompt = "You are a helpful assistant"
443+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
444+
445+
// Mock the stream response with thinking content
446+
const mockStream = {
447+
async *[Symbol.asyncIterator]() {
448+
yield {
449+
choices: [{ delta: { thinking: "Processing the request..." } }],
450+
}
451+
yield {
452+
choices: [{ delta: { content: "Here's the solution" } }],
453+
}
454+
yield {
455+
usage: {
456+
prompt_tokens: 10,
457+
completion_tokens: 5,
458+
},
459+
}
460+
},
461+
}
462+
463+
mockCreate.mockReturnValue({
464+
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
465+
})
466+
467+
const generator = handler.createMessage(systemPrompt, messages)
468+
const results = []
469+
for await (const chunk of generator) {
470+
results.push(chunk)
471+
}
472+
473+
// Verify thinking chunk was yielded as reasoning
474+
expect(results[0]).toEqual({
475+
type: "reasoning",
476+
text: "Processing the request...",
477+
})
478+
expect(results[1]).toEqual({
479+
type: "text",
480+
text: "Here's the solution",
481+
})
482+
})
483+
484+
it("should handle reasoning_content field in delta", async () => {
485+
const systemPrompt = "You are a helpful assistant"
486+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
487+
488+
// Mock the stream response with reasoning_content
489+
const mockStream = {
490+
async *[Symbol.asyncIterator]() {
491+
yield {
492+
choices: [{ delta: { reasoning_content: "Analyzing the problem..." } }],
493+
}
494+
yield {
495+
choices: [{ delta: { content: "Solution found" } }],
496+
}
497+
yield {
498+
usage: {
499+
prompt_tokens: 10,
500+
completion_tokens: 5,
501+
},
502+
}
503+
},
504+
}
505+
506+
mockCreate.mockReturnValue({
507+
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
508+
})
509+
510+
const generator = handler.createMessage(systemPrompt, messages)
511+
const results = []
512+
for await (const chunk of generator) {
513+
results.push(chunk)
514+
}
515+
516+
// Verify reasoning_content chunk was yielded as reasoning
517+
expect(results[0]).toEqual({
518+
type: "reasoning",
519+
text: "Analyzing the problem...",
520+
})
521+
expect(results[1]).toEqual({
522+
type: "text",
523+
text: "Solution found",
524+
})
525+
})
526+
527+
it("should handle mixed reasoning and text content", async () => {
528+
const systemPrompt = "You are a helpful assistant"
529+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Complex question" }]
530+
531+
// Mock the stream response with mixed content
532+
const mockStream = {
533+
async *[Symbol.asyncIterator]() {
534+
yield {
535+
choices: [{ delta: { reasoning: "First, let me understand..." } }],
536+
}
537+
yield {
538+
choices: [{ delta: { content: "Based on my analysis" } }],
539+
}
540+
yield {
541+
choices: [{ delta: { thinking: "Considering alternatives..." } }],
542+
}
543+
yield {
544+
choices: [{ delta: { content: ", the answer is clear." } }],
545+
}
546+
yield {
547+
usage: {
548+
prompt_tokens: 15,
549+
completion_tokens: 10,
550+
},
551+
}
552+
},
553+
}
554+
555+
mockCreate.mockReturnValue({
556+
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
557+
})
558+
559+
const generator = handler.createMessage(systemPrompt, messages)
560+
const results = []
561+
for await (const chunk of generator) {
562+
results.push(chunk)
563+
}
564+
565+
// Verify all chunks were yielded in correct order
566+
expect(results[0]).toEqual({
567+
type: "reasoning",
568+
text: "First, let me understand...",
569+
})
570+
expect(results[1]).toEqual({
571+
type: "text",
572+
text: "Based on my analysis",
573+
})
574+
expect(results[2]).toEqual({
575+
type: "reasoning",
576+
text: "Considering alternatives...",
577+
})
578+
expect(results[3]).toEqual({
579+
type: "text",
580+
text: ", the answer is clear.",
581+
})
582+
})
583+
584+
it("should ignore non-string reasoning fields", async () => {
585+
const systemPrompt = "You are a helpful assistant"
586+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }]
587+
588+
// Mock the stream response with invalid reasoning types
589+
const mockStream = {
590+
async *[Symbol.asyncIterator]() {
591+
yield {
592+
choices: [{ delta: { reasoning: null } }],
593+
}
594+
yield {
595+
choices: [{ delta: { thinking: 123 } }],
596+
}
597+
yield {
598+
choices: [{ delta: { reasoning_content: { nested: "object" } } }],
599+
}
600+
yield {
601+
choices: [{ delta: { content: "Valid response" } }],
602+
}
603+
yield {
604+
usage: {
605+
prompt_tokens: 5,
606+
completion_tokens: 3,
607+
},
608+
}
609+
},
610+
}
611+
612+
mockCreate.mockReturnValue({
613+
withResponse: vi.fn().mockResolvedValue({ data: mockStream }),
614+
})
615+
616+
const generator = handler.createMessage(systemPrompt, messages)
617+
const results = []
618+
for await (const chunk of generator) {
619+
results.push(chunk)
620+
}
621+
622+
// Should only have the valid text content
623+
const contentChunks = results.filter((r) => r.type === "text" || r.type === "reasoning")
624+
expect(contentChunks).toHaveLength(1)
625+
expect(contentChunks[0]).toEqual({
626+
type: "text",
627+
text: "Valid response",
628+
})
629+
})
630+
})
390631
})

src/api/providers/lite-llm.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,34 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa
142142
let lastUsage
143143

144144
for await (const chunk of completion) {
145-
const delta = chunk.choices[0]?.delta
145+
// Handle chunks that might not have choices array (e.g., usage-only chunks)
146+
const delta = chunk.choices?.[0]?.delta
146147
const usage = chunk.usage as LiteLLMUsage
147148

148-
if (delta?.content) {
149-
yield { type: "text", text: delta.content }
149+
// Check for reasoning/thinking content in the delta
150+
// LiteLLM may pass through reasoning content from underlying models
151+
if (delta) {
152+
if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
153+
yield { type: "reasoning", text: delta.reasoning }
154+
}
155+
156+
// Also check for thinking content (alternative field name)
157+
if ("thinking" in delta && delta.thinking && typeof delta.thinking === "string") {
158+
yield { type: "reasoning", text: delta.thinking }
159+
}
160+
161+
// Check for reasoning_content (another possible field name)
162+
if (
163+
"reasoning_content" in delta &&
164+
delta.reasoning_content &&
165+
typeof delta.reasoning_content === "string"
166+
) {
167+
yield { type: "reasoning", text: delta.reasoning_content }
168+
}
169+
170+
if (delta.content) {
171+
yield { type: "text", text: delta.content }
172+
}
150173
}
151174

152175
if (usage) {

0 commit comments

Comments
 (0)