Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions src/api/providers/__tests__/openai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,99 @@ describe("OpenAiHandler", () => {
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.max_completion_tokens).toBe(4096)
})

it("should include thinking parameter for GLM-4.6 when reasoning is enabled", async () => {
const glm46Options: ApiHandlerOptions = {
...mockOptions,
openAiModelId: "glm-4.6",
enableReasoningEffort: true,
openAiCustomModelInfo: {
contextWindow: 200_000,
maxTokens: 98_304,
supportsPromptCache: true,
supportsReasoningBinary: true,
},
}
const glm46Handler = new OpenAiHandler(glm46Options)
const stream = glm46Handler.createMessage(systemPrompt, messages)
// Consume the stream to trigger the API call
for await (const _chunk of stream) {
}
// Assert the mockCreate was called with thinking parameter
expect(mockCreate).toHaveBeenCalled()
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toEqual({ type: "enabled" })
})

it("should not include thinking parameter for GLM-4.6 when reasoning is disabled", async () => {
const glm46NoReasoningOptions: ApiHandlerOptions = {
...mockOptions,
openAiModelId: "glm-4.6",
enableReasoningEffort: false,
openAiCustomModelInfo: {
contextWindow: 200_000,
maxTokens: 98_304,
supportsPromptCache: true,
supportsReasoningBinary: true,
},
}
const glm46NoReasoningHandler = new OpenAiHandler(glm46NoReasoningOptions)
const stream = glm46NoReasoningHandler.createMessage(systemPrompt, messages)
// Consume the stream to trigger the API call
for await (const _chunk of stream) {
}
// Assert the mockCreate was called without thinking parameter
expect(mockCreate).toHaveBeenCalled()
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toBeUndefined()
})

it("should include thinking parameter for GLM-4.6 in non-streaming mode when reasoning is enabled", async () => {
const glm46NonStreamingOptions: ApiHandlerOptions = {
...mockOptions,
openAiModelId: "glm-4.6",
openAiStreamingEnabled: false,
enableReasoningEffort: true,
openAiCustomModelInfo: {
contextWindow: 200_000,
maxTokens: 98_304,
supportsPromptCache: true,
supportsReasoningBinary: true,
},
}
const glm46NonStreamingHandler = new OpenAiHandler(glm46NonStreamingOptions)
const stream = glm46NonStreamingHandler.createMessage(systemPrompt, messages)
// Consume the stream to trigger the API call
for await (const _chunk of stream) {
}
// Assert the mockCreate was called with thinking parameter
expect(mockCreate).toHaveBeenCalled()
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toEqual({ type: "enabled" })
})

it("should not include thinking parameter for non-GLM-4.6 models even with reasoning enabled", async () => {
const nonGlmOptions: ApiHandlerOptions = {
...mockOptions,
openAiModelId: "gpt-4",
enableReasoningEffort: true,
openAiCustomModelInfo: {
contextWindow: 128_000,
maxTokens: 4096,
supportsPromptCache: false,
supportsReasoningBinary: true,
},
}
const nonGlmHandler = new OpenAiHandler(nonGlmOptions)
const stream = nonGlmHandler.createMessage(systemPrompt, messages)
// Consume the stream to trigger the API call
for await (const _chunk of stream) {
}
// Assert the mockCreate was called without thinking parameter
expect(mockCreate).toHaveBeenCalled()
const callArgs = mockCreate.mock.calls[0][0]
expect(callArgs.thinking).toBeUndefined()
})
})

describe("error handling", () => {
Expand Down
18 changes: 18 additions & 0 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
const ark = modelUrl.includes(".volces.com")

// Check if this is GLM-4.6 model with reasoning support
// GLM-4.6 uses the 'thinking' parameter instead of 'reasoning_effort' for enabling reasoning
// This is a vendor-specific implementation detail for Z AI's GLM models
const isGLM46WithReasoning =
modelId.includes("glm-4.6") &&
this.options.enableReasoningEffort &&
(modelInfo.supportsReasoningBinary || this.options.openAiCustomModelInfo?.supportsReasoningBinary)

if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
return
Expand Down Expand Up @@ -166,6 +174,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
...(reasoning && reasoning),
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential parameter conflict: The thinking parameter (added for GLM-4.6 on lines 178-180) could coexist with reasoning_effort from the spread reasoning object on this line. If a GLM-4.6 model has both supportsReasoningBinary: true and supportsReasoningEffort: true configured, both parameters would be sent to the API. Since the PR description states GLM-4.6 uses thinking instead of reasoning_effort, consider excluding the reasoning object when adding the thinking parameter to prevent both from being present simultaneously.

Fix it with Roo Code or mention @roomote and request a fix.

}

// Add thinking parameter for GLM-4.6 when reasoning is enabled
if (isGLM46WithReasoning) {
;(requestOptions as any).thinking = { type: "enabled" }
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

Expand Down Expand Up @@ -233,6 +246,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
: [systemMessage, ...convertToOpenAiMessages(messages)],
}

// Add thinking parameter for GLM-4.6 when reasoning is enabled (non-streaming)
if (isGLM46WithReasoning) {
;(requestOptions as any).thinking = { type: "enabled" }
}

// Add max_tokens if needed
this.addMaxTokensIfNeeded(requestOptions, modelInfo)

Expand Down