diff --git a/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts
new file mode 100644
index 000000000000..007d5c58f4e9
--- /dev/null
+++ b/src/api/providers/__tests__/base-openai-compatible-provider.spec.ts
@@ -0,0 +1,411 @@
+import { describe, it, expect, vi, beforeEach, type Mock } from "vitest"
+import OpenAI from "openai"
+import { Anthropic } from "@anthropic-ai/sdk"
+
+import type { ModelInfo } from "@roo-code/types"
+import type { ApiHandlerOptions } from "../../../shared/api"
+
+import { BaseOpenAiCompatibleProvider } from "../base-openai-compatible-provider"
+
+// Mock OpenAI module
+vi.mock("openai", () => {
+ const mockCreate = vi.fn()
+ const MockOpenAI = vi.fn().mockImplementation(() => ({
+ chat: {
+ completions: {
+ create: mockCreate,
+ },
+ },
+ }))
+ return { default: MockOpenAI }
+})
+
+// Create a concrete implementation for testing
+class TestOpenAiCompatibleProvider extends BaseOpenAiCompatibleProvider<"test-model" | "glm-4.6"> {
+ constructor(options: ApiHandlerOptions) {
+ super({
+ ...options,
+ providerName: "TestProvider",
+ baseURL: options.openAiBaseUrl || "https://api.test.com/v1",
+ defaultProviderModelId: "test-model",
+ providerModels: {
+ "test-model": {
+ maxTokens: 4096,
+ contextWindow: 8192,
+ supportsImages: false,
+ supportsPromptCache: false,
+ inputPrice: 0.01,
+ outputPrice: 0.02,
+ },
+ "glm-4.6": {
+ maxTokens: 8192,
+ contextWindow: 128000,
+ supportsImages: true,
+ supportsPromptCache: false,
+ inputPrice: 0.015,
+ outputPrice: 0.03,
+ },
+ },
+ })
+ }
+}
+
+describe("BaseOpenAiCompatibleProvider", () => {
+ let provider: TestOpenAiCompatibleProvider
+ let mockOpenAIInstance: any
+ let mockCreate: Mock
+
+ beforeEach(() => {
+ vi.clearAllMocks()
+ mockOpenAIInstance = new (OpenAI as any)()
+ mockCreate = mockOpenAIInstance.chat.completions.create
+ })
+
+ describe("GLM-4.6 thinking token support", () => {
+ it("should detect GLM-4.6 model correctly", () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ // Test the isGLM46Model method
+ expect((provider as any).isGLM46Model("glm-4.6")).toBe(true)
+ expect((provider as any).isGLM46Model("GLM-4.6")).toBe(true)
+ expect((provider as any).isGLM46Model("glm-4-6")).toBe(true)
+ expect((provider as any).isGLM46Model("GLM-4-6")).toBe(true)
+ expect((provider as any).isGLM46Model("test-model")).toBe(false)
+ expect((provider as any).isGLM46Model("gpt-4")).toBe(false)
+ })
+
+ it("should NOT add thinking parameter by default for GLM-4.6 model (for ik_llama.cpp compatibility)", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ // Mock the stream response
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ yield {
+ choices: [{ delta: { content: "Test response" } }],
+ usage: { prompt_tokens: 10, completion_tokens: 5 },
+ }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // Verify that the create method was called WITHOUT thinking parameter by default
+ expect(mockCreate).toHaveBeenCalledWith(
+ expect.not.objectContaining({
+ thinking: expect.anything(),
+ }),
+ undefined,
+ )
+ })
+
+ it("should add thinking parameter only when explicitly enabled", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ // @ts-ignore
+ openAiEnableThinkingParameter: true, // Explicitly enable thinking parameter
+ })
+
+ // Mock the stream response
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ yield {
+ choices: [{ delta: { content: "Test response" } }],
+ usage: { prompt_tokens: 10, completion_tokens: 5 },
+ }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // Now it should include the thinking parameter
+ expect(mockCreate).toHaveBeenCalledWith(
+ expect.objectContaining({
+ model: "glm-4.6",
+ thinking: { type: "enabled" },
+ stream: true,
+ }),
+ undefined,
+ )
+ })
+
+ it("should not add thinking parameter for non-GLM-4.6 models even if enabled", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "test-model",
+ // @ts-ignore
+ openAiEnableThinkingParameter: true,
+ })
+
+ // Mock the stream response
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ yield {
+ choices: [{ delta: { content: "Test response" } }],
+ usage: { prompt_tokens: 10, completion_tokens: 5 },
+ }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // Verify that the create method was called without thinking parameter
+ expect(mockCreate).toHaveBeenCalledWith(
+ expect.not.objectContaining({
+ thinking: expect.anything(),
+ }),
+ undefined,
+ )
+ })
+
+ it("should parse thinking tokens from GLM-4.6 response using XML tags", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ // Mock the stream response with thinking tokens
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ yield { choices: [{ delta: { content: "" } }], usage: null }
+ yield { choices: [{ delta: { content: "Let me analyze this problem..." } }], usage: null }
+ yield { choices: [{ delta: { content: "" } }], usage: null }
+ yield { choices: [{ delta: { content: "The answer is 42." } }], usage: null }
+ yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 20 } }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [
+ { role: "user", content: "What is the meaning of life?" },
+ ]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // Verify that thinking tokens were parsed correctly
+ const reasoningChunks = results.filter((r) => r.type === "reasoning")
+ const textChunks = results.filter((r) => r.type === "text")
+
+ expect(reasoningChunks.length).toBeGreaterThan(0)
+ expect(reasoningChunks.some((c) => c.text?.includes("Let me analyze this problem"))).toBe(true)
+ expect(textChunks.some((c) => c.text === "The answer is 42.")).toBe(true)
+ })
+
+ it("should handle reasoning_content in delta for models that support it (ik_llama.cpp compatibility)", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ // Mock the stream response with reasoning_content (as ik_llama.cpp might provide)
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ yield { choices: [{ delta: { reasoning_content: "Thinking about the problem..." } }], usage: null }
+ yield { choices: [{ delta: { content: "The solution is simple." } }], usage: null }
+ yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 15 } }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this problem" }]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // Verify that reasoning_content was handled correctly
+ const reasoningChunks = results.filter((r) => r.type === "reasoning")
+ const textChunks = results.filter((r) => r.type === "text")
+
+ expect(reasoningChunks.some((c) => c.text === "Thinking about the problem...")).toBe(true)
+ expect(textChunks.some((c) => c.text === "The solution is simple.")).toBe(true)
+ })
+
+ it("should handle mixed reasoning formats (both XML and reasoning_content)", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ // Mock the stream response with both formats
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ // First some reasoning_content
+ yield { choices: [{ delta: { reasoning_content: "Initial thoughts..." } }], usage: null }
+ // Then XML-wrapped thinking
+ yield { choices: [{ delta: { content: "Deep analysis" } }], usage: null }
+ // Finally the actual response
+ yield { choices: [{ delta: { content: "Here's the answer." } }], usage: null }
+ yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 20 } }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Complex question" }]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // Verify that both types of reasoning were captured
+ const reasoningChunks = results.filter((r) => r.type === "reasoning")
+ const textChunks = results.filter((r) => r.type === "text")
+
+ expect(reasoningChunks.some((c) => c.text === "Initial thoughts...")).toBe(true)
+ expect(reasoningChunks.some((c) => c.text === "Deep analysis")).toBe(true)
+ expect(textChunks.some((c) => c.text === "Here's the answer.")).toBe(true)
+ })
+
+ it("should handle non-GLM-4.6 models without XML parsing", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "test-model",
+ })
+
+ // Mock the stream response with XML-like content that shouldn't be parsed
+ const mockStream = {
+ async *[Symbol.asyncIterator]() {
+ yield { choices: [{ delta: { content: "This is not parsed" } }], usage: null }
+ yield { choices: [{ delta: { content: "Regular response" } }], usage: null }
+ yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 15 } }
+ },
+ }
+ mockCreate.mockResolvedValue(mockStream)
+
+ // Create a message
+ const systemPrompt = "You are a helpful assistant"
+ const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test" }]
+
+ const stream = provider.createMessage(systemPrompt, messages)
+ const results = []
+ for await (const chunk of stream) {
+ results.push(chunk)
+ }
+
+ // For non-GLM-4.6, XML should not be parsed as reasoning
+ const reasoningChunks = results.filter((r) => r.type === "reasoning")
+ const textChunks = results.filter((r) => r.type === "text")
+
+ expect(reasoningChunks.length).toBe(0)
+ expect(textChunks.some((c) => c.text === "This is not parsed")).toBe(true)
+ expect(textChunks.some((c) => c.text === "Regular response")).toBe(true)
+ })
+ })
+
+ describe("completePrompt", () => {
+ it("should complete prompt successfully", async () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "test-model",
+ })
+
+ const mockResponse = {
+ choices: [{ message: { content: "Completed response" } }],
+ }
+ mockCreate.mockResolvedValue(mockResponse)
+
+ const result = await provider.completePrompt("Test prompt")
+
+ expect(result).toBe("Completed response")
+ expect(mockCreate).toHaveBeenCalledWith({
+ model: "test-model",
+ messages: [{ role: "user", content: "Test prompt" }],
+ })
+ })
+ })
+
+ describe("getModel", () => {
+ it("should return correct model info", () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ const model = provider.getModel()
+
+ expect(model.id).toBe("glm-4.6")
+ expect(model.info.maxTokens).toBe(8192)
+ expect(model.info.contextWindow).toBe(128000)
+ })
+
+ it("should use default model when apiModelId is not provided", () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ })
+
+ const model = provider.getModel()
+
+ expect(model.id).toBe("test-model")
+ expect(model.info.maxTokens).toBe(4096)
+ })
+ })
+
+ describe("shouldAddThinkingParameter", () => {
+ it("should return false by default for compatibility", () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ })
+
+ expect((provider as any).shouldAddThinkingParameter()).toBe(false)
+ })
+
+ it("should return true when explicitly enabled", () => {
+ provider = new TestOpenAiCompatibleProvider({
+ apiKey: "test-key",
+ apiModelId: "glm-4.6",
+ // @ts-ignore
+ openAiEnableThinkingParameter: true,
+ })
+
+ expect((provider as any).shouldAddThinkingParameter()).toBe(true)
+ })
+ })
+})
diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts
index fb6c5d03770e..14be9513d24c 100644
--- a/src/api/providers/base-openai-compatible-provider.ts
+++ b/src/api/providers/base-openai-compatible-provider.ts
@@ -6,6 +6,7 @@ import type { ModelInfo } from "@roo-code/types"
import type { ApiHandlerOptions } from "../../shared/api"
import { ApiStream } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
+import { XmlMatcher } from "../../utils/xml-matcher"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { DEFAULT_HEADERS } from "./constants"
@@ -85,6 +86,13 @@ export abstract class BaseOpenAiCompatibleProvider
stream_options: { include_usage: true },
}
+ // Add thinking parameter for GLM-4.6 model only if explicitly enabled
+ // This allows for compatibility with endpoints that don't support this parameter
+ if (this.isGLM46Model(model) && this.shouldAddThinkingParameter()) {
+ // @ts-ignore - GLM-4.6 specific parameter
+ params.thinking = { type: "enabled" }
+ }
+
try {
return this.client.chat.completions.create(params, requestOptions)
} catch (error) {
@@ -98,14 +106,45 @@ export abstract class BaseOpenAiCompatibleProvider
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const stream = await this.createStream(systemPrompt, messages, metadata)
+ const { id: model } = this.getModel()
+ const isGLM46 = this.isGLM46Model(model)
+
+ // Use XmlMatcher for GLM-4.6 to parse thinking tokens from content
+ // This works regardless of whether the endpoint supports the thinking parameter
+ const matcher = isGLM46
+ ? new XmlMatcher(
+ "think",
+ (chunk) =>
+ ({
+ type: chunk.matched ? "reasoning" : "text",
+ text: chunk.data,
+ }) as const,
+ )
+ : null
for await (const chunk of stream) {
- const delta = chunk.choices[0]?.delta
+ const delta = chunk.choices?.[0]?.delta
if (delta?.content) {
+ if (isGLM46 && matcher) {
+ // Parse thinking tokens for GLM-4.6 from content
+ for (const parsedChunk of matcher.update(delta.content)) {
+ yield parsedChunk
+ }
+ } else {
+ yield {
+ type: "text",
+ text: delta.content,
+ }
+ }
+ }
+
+ // Handle reasoning_content if present (for models/endpoints that support it directly)
+ // This ensures compatibility with various implementations including ik_llama.cpp
+ if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield {
- type: "text",
- text: delta.content,
+ type: "reasoning",
+ text: (delta.reasoning_content as string | undefined) || "",
}
}
@@ -117,6 +156,13 @@ export abstract class BaseOpenAiCompatibleProvider
}
}
}
+
+ // Finalize any remaining content from the matcher
+ if (isGLM46 && matcher) {
+ for (const parsedChunk of matcher.final()) {
+ yield parsedChunk
+ }
+ }
}
async completePrompt(prompt: string): Promise {
@@ -142,4 +188,25 @@ export abstract class BaseOpenAiCompatibleProvider
return { id, info: this.providerModels[id] }
}
+
+ /**
+ * Check if the model is GLM-4.6 which may require special handling for thinking tokens
+ */
+ protected isGLM46Model(modelId: string): boolean {
+ // Check for various GLM-4.6 model naming patterns
+ const lowerModel = modelId.toLowerCase()
+ return lowerModel.includes("glm-4.6") || lowerModel.includes("glm-4-6") || lowerModel === "glm-4.6"
+ }
+
+ /**
+ * Determine whether to add the thinking parameter to the request
+ * This can be overridden by subclasses or configured via options
+ * Default is conservative (false) to ensure compatibility with endpoints like ik_llama.cpp
+ */
+ protected shouldAddThinkingParameter(): boolean {
+ // Only add thinking parameter if explicitly enabled via configuration
+ // This ensures compatibility with endpoints that don't support this parameter
+ // @ts-ignore - Check for future configuration option
+ return this.options.openAiEnableThinkingParameter === true
+ }
}