fix: add GLM-4.6 reasoning support for OpenAI Compatible provider

roomote · roomote · commit 69a7825d4af1 · 2025-11-04T10:36:28.000Z
- Added logic to detect GLM-4.6 models and enable thinking parameter when reasoning is enabled - The thinking parameter is now properly added for both streaming and non-streaming modes - Added comprehensive tests to verify GLM-4.6 reasoning functionality - This fixes the issue where GLM-4.6 reasoning was not working with OpenAI Compatible provider Fixes #9012
diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts
@@ -315,6 +315,99 @@ describe("OpenAiHandler", () => {
 			const callArgs = mockCreate.mock.calls[0][0]
 			expect(callArgs.max_completion_tokens).toBe(4096)
 		})
+
+		it("should include thinking parameter for GLM-4.6 when reasoning is enabled", async () => {
+			const glm46Options: ApiHandlerOptions = {
+				...mockOptions,
+				openAiModelId: "glm-4.6",
+				enableReasoningEffort: true,
+				openAiCustomModelInfo: {
+					contextWindow: 200_000,
+					maxTokens: 98_304,
+					supportsPromptCache: true,
+					supportsReasoningBinary: true,
+				},
+			}
+			const glm46Handler = new OpenAiHandler(glm46Options)
+			const stream = glm46Handler.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called with thinking parameter
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "enabled" })
+		})
+
+		it("should not include thinking parameter for GLM-4.6 when reasoning is disabled", async () => {
+			const glm46NoReasoningOptions: ApiHandlerOptions = {
+				...mockOptions,
+				openAiModelId: "glm-4.6",
+				enableReasoningEffort: false,
+				openAiCustomModelInfo: {
+					contextWindow: 200_000,
+					maxTokens: 98_304,
+					supportsPromptCache: true,
+					supportsReasoningBinary: true,
+				},
+			}
+			const glm46NoReasoningHandler = new OpenAiHandler(glm46NoReasoningOptions)
+			const stream = glm46NoReasoningHandler.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called without thinking parameter
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toBeUndefined()
+		})
+
+		it("should include thinking parameter for GLM-4.6 in non-streaming mode when reasoning is enabled", async () => {
+			const glm46NonStreamingOptions: ApiHandlerOptions = {
+				...mockOptions,
+				openAiModelId: "glm-4.6",
+				openAiStreamingEnabled: false,
+				enableReasoningEffort: true,
+				openAiCustomModelInfo: {
+					contextWindow: 200_000,
+					maxTokens: 98_304,
+					supportsPromptCache: true,
+					supportsReasoningBinary: true,
+				},
+			}
+			const glm46NonStreamingHandler = new OpenAiHandler(glm46NonStreamingOptions)
+			const stream = glm46NonStreamingHandler.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called with thinking parameter
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toEqual({ type: "enabled" })
+		})
+
+		it("should not include thinking parameter for non-GLM-4.6 models even with reasoning enabled", async () => {
+			const nonGlmOptions: ApiHandlerOptions = {
+				...mockOptions,
+				openAiModelId: "gpt-4",
+				enableReasoningEffort: true,
+				openAiCustomModelInfo: {
+					contextWindow: 128_000,
+					maxTokens: 4096,
+					supportsPromptCache: false,
+					supportsReasoningBinary: true,
+				},
+			}
+			const nonGlmHandler = new OpenAiHandler(nonGlmOptions)
+			const stream = nonGlmHandler.createMessage(systemPrompt, messages)
+			// Consume the stream to trigger the API call
+			for await (const _chunk of stream) {
+			}
+			// Assert the mockCreate was called without thinking parameter
+			expect(mockCreate).toHaveBeenCalled()
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs.thinking).toBeUndefined()
+		})
 	})
 
 	describe("error handling", () => {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -94,6 +94,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 		const ark = modelUrl.includes(".volces.com")
 
+		// Check if this is GLM-4.6 model with reasoning support
+		const isGLM46WithReasoning =
+			modelId.includes("glm-4.6") &&
+			this.options.enableReasoningEffort &&
+			(modelInfo.supportsReasoningBinary || this.options.openAiCustomModelInfo?.supportsReasoningBinary)
+
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
 			return
@@ -166,6 +172,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				...(reasoning && reasoning),
 			}
 
+			// Add thinking parameter for GLM-4.6 when reasoning is enabled
+			if (isGLM46WithReasoning) {
+				;(requestOptions as any).thinking = { type: "enabled" }
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
@@ -233,6 +244,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 						: [systemMessage, ...convertToOpenAiMessages(messages)],
 			}
 
+			// Add thinking parameter for GLM-4.6 when reasoning is enabled (non-streaming)
+			if (isGLM46WithReasoning) {
+				;(requestOptions as any).thinking = { type: "enabled" }
+			}
+
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)