Skip to content

Commit 69a7825

Browse files
committed
fix: add GLM-4.6 reasoning support for OpenAI Compatible provider
- Added logic to detect GLM-4.6 models and enable thinking parameter when reasoning is enabled - The thinking parameter is now properly added for both streaming and non-streaming modes - Added comprehensive tests to verify GLM-4.6 reasoning functionality - This fixes the issue where GLM-4.6 reasoning was not working with OpenAI Compatible provider Fixes #9012
1 parent 8e4b145 commit 69a7825

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

src/api/providers/__tests__/openai.spec.ts

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,99 @@ describe("OpenAiHandler", () => {
315315
const callArgs = mockCreate.mock.calls[0][0]
316316
expect(callArgs.max_completion_tokens).toBe(4096)
317317
})
318+
319+
it("should include thinking parameter for GLM-4.6 when reasoning is enabled", async () => {
320+
const glm46Options: ApiHandlerOptions = {
321+
...mockOptions,
322+
openAiModelId: "glm-4.6",
323+
enableReasoningEffort: true,
324+
openAiCustomModelInfo: {
325+
contextWindow: 200_000,
326+
maxTokens: 98_304,
327+
supportsPromptCache: true,
328+
supportsReasoningBinary: true,
329+
},
330+
}
331+
const glm46Handler = new OpenAiHandler(glm46Options)
332+
const stream = glm46Handler.createMessage(systemPrompt, messages)
333+
// Consume the stream to trigger the API call
334+
for await (const _chunk of stream) {
335+
}
336+
// Assert the mockCreate was called with thinking parameter
337+
expect(mockCreate).toHaveBeenCalled()
338+
const callArgs = mockCreate.mock.calls[0][0]
339+
expect(callArgs.thinking).toEqual({ type: "enabled" })
340+
})
341+
342+
it("should not include thinking parameter for GLM-4.6 when reasoning is disabled", async () => {
343+
const glm46NoReasoningOptions: ApiHandlerOptions = {
344+
...mockOptions,
345+
openAiModelId: "glm-4.6",
346+
enableReasoningEffort: false,
347+
openAiCustomModelInfo: {
348+
contextWindow: 200_000,
349+
maxTokens: 98_304,
350+
supportsPromptCache: true,
351+
supportsReasoningBinary: true,
352+
},
353+
}
354+
const glm46NoReasoningHandler = new OpenAiHandler(glm46NoReasoningOptions)
355+
const stream = glm46NoReasoningHandler.createMessage(systemPrompt, messages)
356+
// Consume the stream to trigger the API call
357+
for await (const _chunk of stream) {
358+
}
359+
// Assert the mockCreate was called without thinking parameter
360+
expect(mockCreate).toHaveBeenCalled()
361+
const callArgs = mockCreate.mock.calls[0][0]
362+
expect(callArgs.thinking).toBeUndefined()
363+
})
364+
365+
it("should include thinking parameter for GLM-4.6 in non-streaming mode when reasoning is enabled", async () => {
366+
const glm46NonStreamingOptions: ApiHandlerOptions = {
367+
...mockOptions,
368+
openAiModelId: "glm-4.6",
369+
openAiStreamingEnabled: false,
370+
enableReasoningEffort: true,
371+
openAiCustomModelInfo: {
372+
contextWindow: 200_000,
373+
maxTokens: 98_304,
374+
supportsPromptCache: true,
375+
supportsReasoningBinary: true,
376+
},
377+
}
378+
const glm46NonStreamingHandler = new OpenAiHandler(glm46NonStreamingOptions)
379+
const stream = glm46NonStreamingHandler.createMessage(systemPrompt, messages)
380+
// Consume the stream to trigger the API call
381+
for await (const _chunk of stream) {
382+
}
383+
// Assert the mockCreate was called with thinking parameter
384+
expect(mockCreate).toHaveBeenCalled()
385+
const callArgs = mockCreate.mock.calls[0][0]
386+
expect(callArgs.thinking).toEqual({ type: "enabled" })
387+
})
388+
389+
it("should not include thinking parameter for non-GLM-4.6 models even with reasoning enabled", async () => {
390+
const nonGlmOptions: ApiHandlerOptions = {
391+
...mockOptions,
392+
openAiModelId: "gpt-4",
393+
enableReasoningEffort: true,
394+
openAiCustomModelInfo: {
395+
contextWindow: 128_000,
396+
maxTokens: 4096,
397+
supportsPromptCache: false,
398+
supportsReasoningBinary: true,
399+
},
400+
}
401+
const nonGlmHandler = new OpenAiHandler(nonGlmOptions)
402+
const stream = nonGlmHandler.createMessage(systemPrompt, messages)
403+
// Consume the stream to trigger the API call
404+
for await (const _chunk of stream) {
405+
}
406+
// Assert the mockCreate was called without thinking parameter
407+
expect(mockCreate).toHaveBeenCalled()
408+
const callArgs = mockCreate.mock.calls[0][0]
409+
expect(callArgs.thinking).toBeUndefined()
410+
})
318411
})
319412

320413
describe("error handling", () => {

src/api/providers/openai.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
9494
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
9595
const ark = modelUrl.includes(".volces.com")
9696

97+
// Check if this is GLM-4.6 model with reasoning support
98+
const isGLM46WithReasoning =
99+
modelId.includes("glm-4.6") &&
100+
this.options.enableReasoningEffort &&
101+
(modelInfo.supportsReasoningBinary || this.options.openAiCustomModelInfo?.supportsReasoningBinary)
102+
97103
if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
98104
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
99105
return
@@ -166,6 +172,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
166172
...(reasoning && reasoning),
167173
}
168174

175+
// Add thinking parameter for GLM-4.6 when reasoning is enabled
176+
if (isGLM46WithReasoning) {
177+
;(requestOptions as any).thinking = { type: "enabled" }
178+
}
179+
169180
// Add max_tokens if needed
170181
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
171182

@@ -233,6 +244,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
233244
: [systemMessage, ...convertToOpenAiMessages(messages)],
234245
}
235246

247+
// Add thinking parameter for GLM-4.6 when reasoning is enabled (non-streaming)
248+
if (isGLM46WithReasoning) {
249+
;(requestOptions as any).thinking = { type: "enabled" }
250+
}
251+
236252
// Add max_tokens if needed
237253
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
238254

0 commit comments

Comments
 (0)