diff --git a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts new file mode 100644 index 000000000000..007d5c58f4e9 --- /dev/null +++ b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts @@ -0,0 +1,411 @@ +import { describe, it, expect, vi, beforeEach, type Mock } from "vitest" +import OpenAI from "openai" +import { Anthropic } from "@anthropic-ai/sdk" + +import type { ModelInfo } from "@roo-code/types" +import type { ApiHandlerOptions } from "../../../shared/api" + +import { BaseOpenAiCompatibleProvider } from "../base-openai-compatible-provider" + +// Mock OpenAI module +vi.mock("openai", () => { + const mockCreate = vi.fn() + const MockOpenAI = vi.fn().mockImplementation(() => ({ + chat: { + completions: { + create: mockCreate, + }, + }, + })) + return { default: MockOpenAI } +}) + +// Create a concrete implementation for testing +class TestOpenAiCompatibleProvider extends BaseOpenAiCompatibleProvider<"test-model" | "glm-4.6"> { + constructor(options: ApiHandlerOptions) { + super({ + ...options, + providerName: "TestProvider", + baseURL: options.openAiBaseUrl || "https://api.test.com/v1", + defaultProviderModelId: "test-model", + providerModels: { + "test-model": { + maxTokens: 4096, + contextWindow: 8192, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0.01, + outputPrice: 0.02, + }, + "glm-4.6": { + maxTokens: 8192, + contextWindow: 128000, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0.015, + outputPrice: 0.03, + }, + }, + }) + } +} + +describe("BaseOpenAiCompatibleProvider", () => { + let provider: TestOpenAiCompatibleProvider + let mockOpenAIInstance: any + let mockCreate: Mock + + beforeEach(() => { + vi.clearAllMocks() + mockOpenAIInstance = new (OpenAI as any)() + mockCreate = mockOpenAIInstance.chat.completions.create + }) + + describe("GLM-4.6 thinking token support", () => { + it("should detect GLM-4.6 model correctly", () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + // Test the isGLM46Model method + expect((provider as any).isGLM46Model("glm-4.6")).toBe(true) + expect((provider as any).isGLM46Model("GLM-4.6")).toBe(true) + expect((provider as any).isGLM46Model("glm-4-6")).toBe(true) + expect((provider as any).isGLM46Model("GLM-4-6")).toBe(true) + expect((provider as any).isGLM46Model("test-model")).toBe(false) + expect((provider as any).isGLM46Model("gpt-4")).toBe(false) + }) + + it("should NOT add thinking parameter by default for GLM-4.6 model (for ik_llama.cpp compatibility)", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + // Mock the stream response + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Test response" } }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Verify that the create method was called WITHOUT thinking parameter by default + expect(mockCreate).toHaveBeenCalledWith( + expect.not.objectContaining({ + thinking: expect.anything(), + }), + undefined, + ) + }) + + it("should add thinking parameter only when explicitly enabled", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + // @ts-ignore + openAiEnableThinkingParameter: true, // Explicitly enable thinking parameter + }) + + // Mock the stream response + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Test response" } }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Now it should include the thinking parameter + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-4.6", + thinking: { type: "enabled" }, + stream: true, + }), + undefined, + ) + }) + + it("should not add thinking parameter for non-GLM-4.6 models even if enabled", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "test-model", + // @ts-ignore + openAiEnableThinkingParameter: true, + }) + + // Mock the stream response + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Test response" } }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Verify that the create method was called without thinking parameter + expect(mockCreate).toHaveBeenCalledWith( + expect.not.objectContaining({ + thinking: expect.anything(), + }), + undefined, + ) + }) + + it("should parse thinking tokens from GLM-4.6 response using XML tags", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + // Mock the stream response with thinking tokens + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { choices: [{ delta: { content: "" } }], usage: null } + yield { choices: [{ delta: { content: "Let me analyze this problem..." } }], usage: null } + yield { choices: [{ delta: { content: "" } }], usage: null } + yield { choices: [{ delta: { content: "The answer is 42." } }], usage: null } + yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 20 } } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "What is the meaning of life?" }, + ] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Verify that thinking tokens were parsed correctly + const reasoningChunks = results.filter((r) => r.type === "reasoning") + const textChunks = results.filter((r) => r.type === "text") + + expect(reasoningChunks.length).toBeGreaterThan(0) + expect(reasoningChunks.some((c) => c.text?.includes("Let me analyze this problem"))).toBe(true) + expect(textChunks.some((c) => c.text === "The answer is 42.")).toBe(true) + }) + + it("should handle reasoning_content in delta for models that support it (ik_llama.cpp compatibility)", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + // Mock the stream response with reasoning_content (as ik_llama.cpp might provide) + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { choices: [{ delta: { reasoning_content: "Thinking about the problem..." } }], usage: null } + yield { choices: [{ delta: { content: "The solution is simple." } }], usage: null } + yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 15 } } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Verify that reasoning_content was handled correctly + const reasoningChunks = results.filter((r) => r.type === "reasoning") + const textChunks = results.filter((r) => r.type === "text") + + expect(reasoningChunks.some((c) => c.text === "Thinking about the problem...")).toBe(true) + expect(textChunks.some((c) => c.text === "The solution is simple.")).toBe(true) + }) + + it("should handle mixed reasoning formats (both XML and reasoning_content)", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + // Mock the stream response with both formats + const mockStream = { + async *[Symbol.asyncIterator]() { + // First some reasoning_content + yield { choices: [{ delta: { reasoning_content: "Initial thoughts..." } }], usage: null } + // Then XML-wrapped thinking + yield { choices: [{ delta: { content: "Deep analysis" } }], usage: null } + // Finally the actual response + yield { choices: [{ delta: { content: "Here's the answer." } }], usage: null } + yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 20 } } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Complex question" }] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Verify that both types of reasoning were captured + const reasoningChunks = results.filter((r) => r.type === "reasoning") + const textChunks = results.filter((r) => r.type === "text") + + expect(reasoningChunks.some((c) => c.text === "Initial thoughts...")).toBe(true) + expect(reasoningChunks.some((c) => c.text === "Deep analysis")).toBe(true) + expect(textChunks.some((c) => c.text === "Here's the answer.")).toBe(true) + }) + + it("should handle non-GLM-4.6 models without XML parsing", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "test-model", + }) + + // Mock the stream response with XML-like content that shouldn't be parsed + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { choices: [{ delta: { content: "This is not parsed" } }], usage: null } + yield { choices: [{ delta: { content: "Regular response" } }], usage: null } + yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 15 } } + }, + } + mockCreate.mockResolvedValue(mockStream) + + // Create a message + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }] + + const stream = provider.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // For non-GLM-4.6, XML should not be parsed as reasoning + const reasoningChunks = results.filter((r) => r.type === "reasoning") + const textChunks = results.filter((r) => r.type === "text") + + expect(reasoningChunks.length).toBe(0) + expect(textChunks.some((c) => c.text === "This is not parsed")).toBe(true) + expect(textChunks.some((c) => c.text === "Regular response")).toBe(true) + }) + }) + + describe("completePrompt", () => { + it("should complete prompt successfully", async () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "test-model", + }) + + const mockResponse = { + choices: [{ message: { content: "Completed response" } }], + } + mockCreate.mockResolvedValue(mockResponse) + + const result = await provider.completePrompt("Test prompt") + + expect(result).toBe("Completed response") + expect(mockCreate).toHaveBeenCalledWith({ + model: "test-model", + messages: [{ role: "user", content: "Test prompt" }], + }) + }) + }) + + describe("getModel", () => { + it("should return correct model info", () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + const model = provider.getModel() + + expect(model.id).toBe("glm-4.6") + expect(model.info.maxTokens).toBe(8192) + expect(model.info.contextWindow).toBe(128000) + }) + + it("should use default model when apiModelId is not provided", () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + }) + + const model = provider.getModel() + + expect(model.id).toBe("test-model") + expect(model.info.maxTokens).toBe(4096) + }) + }) + + describe("shouldAddThinkingParameter", () => { + it("should return false by default for compatibility", () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + }) + + expect((provider as any).shouldAddThinkingParameter()).toBe(false) + }) + + it("should return true when explicitly enabled", () => { + provider = new TestOpenAiCompatibleProvider({ + apiKey: "test-key", + apiModelId: "glm-4.6", + // @ts-ignore + openAiEnableThinkingParameter: true, + }) + + expect((provider as any).shouldAddThinkingParameter()).toBe(true) + }) + }) +}) diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fb6c5d03770e..14be9513d24c 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -6,6 +6,7 @@ import type { ModelInfo } from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { XmlMatcher } from "../../utils/xml-matcher" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { DEFAULT_HEADERS } from "./constants" @@ -85,6 +86,13 @@ export abstract class BaseOpenAiCompatibleProvider stream_options: { include_usage: true }, } + // Add thinking parameter for GLM-4.6 model only if explicitly enabled + // This allows for compatibility with endpoints that don't support this parameter + if (this.isGLM46Model(model) && this.shouldAddThinkingParameter()) { + // @ts-ignore - GLM-4.6 specific parameter + params.thinking = { type: "enabled" } + } + try { return this.client.chat.completions.create(params, requestOptions) } catch (error) { @@ -98,14 +106,45 @@ export abstract class BaseOpenAiCompatibleProvider metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { const stream = await this.createStream(systemPrompt, messages, metadata) + const { id: model } = this.getModel() + const isGLM46 = this.isGLM46Model(model) + + // Use XmlMatcher for GLM-4.6 to parse thinking tokens from content + // This works regardless of whether the endpoint supports the thinking parameter + const matcher = isGLM46 + ? new XmlMatcher( + "think", + (chunk) => + ({ + type: chunk.matched ? "reasoning" : "text", + text: chunk.data, + }) as const, + ) + : null for await (const chunk of stream) { - const delta = chunk.choices[0]?.delta + const delta = chunk.choices?.[0]?.delta if (delta?.content) { + if (isGLM46 && matcher) { + // Parse thinking tokens for GLM-4.6 from content + for (const parsedChunk of matcher.update(delta.content)) { + yield parsedChunk + } + } else { + yield { + type: "text", + text: delta.content, + } + } + } + + // Handle reasoning_content if present (for models/endpoints that support it directly) + // This ensures compatibility with various implementations including ik_llama.cpp + if (delta && "reasoning_content" in delta && delta.reasoning_content) { yield { - type: "text", - text: delta.content, + type: "reasoning", + text: (delta.reasoning_content as string | undefined) || "", } } @@ -117,6 +156,13 @@ export abstract class BaseOpenAiCompatibleProvider } } } + + // Finalize any remaining content from the matcher + if (isGLM46 && matcher) { + for (const parsedChunk of matcher.final()) { + yield parsedChunk + } + } } async completePrompt(prompt: string): Promise { @@ -142,4 +188,25 @@ export abstract class BaseOpenAiCompatibleProvider return { id, info: this.providerModels[id] } } + + /** + * Check if the model is GLM-4.6 which may require special handling for thinking tokens + */ + protected isGLM46Model(modelId: string): boolean { + // Check for various GLM-4.6 model naming patterns + const lowerModel = modelId.toLowerCase() + return lowerModel.includes("glm-4.6") || lowerModel.includes("glm-4-6") || lowerModel === "glm-4.6" + } + + /** + * Determine whether to add the thinking parameter to the request + * This can be overridden by subclasses or configured via options + * Default is conservative (false) to ensure compatibility with endpoints like ik_llama.cpp + */ + protected shouldAddThinkingParameter(): boolean { + // Only add thinking parameter if explicitly enabled via configuration + // This ensures compatibility with endpoints that don't support this parameter + // @ts-ignore - Check for future configuration option + return this.options.openAiEnableThinkingParameter === true + } }