From d50edaf3ce0e8a4325b7f8b8345256ca79999e96 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Tue, 23 Sep 2025 13:14:13 +0000
Subject: [PATCH 1/2] feat: add DeepSeek V3.1 Terminus/Turbo variants and
 enable reasoning for hybrid models

- Added deepseek-ai/DeepSeek-V3.1-Terminus and deepseek-ai/DeepSeek-V3.1-Turbo model variants to ChutesModelId type
- Enabled reasoning mode support for DeepSeek V3.1 and GLM-4.5 models when enableReasoningEffort is true
- Updated ChutesHandler to parse <think> tags for reasoning content in supported hybrid models
- Added tests for new model variants and reasoning mode functionality

Fixes #8256
---
 packages/types/src/providers/chutes.ts     |  20 +++
 src/api/providers/__tests__/chutes.spec.ts | 143 +++++++++++++++++++++
 src/api/providers/chutes.ts                |  18 ++-
 3 files changed, 179 insertions(+), 2 deletions(-)
diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
index 15dea58263..ad63a72b7f 100644
--- a/packages/types/src/providers/chutes.ts
+++ b/packages/types/src/providers/chutes.ts
@@ -6,6 +6,8 @@ export type ChutesModelId =
 	| "deepseek-ai/DeepSeek-R1"
 	| "deepseek-ai/DeepSeek-V3"
 	| "deepseek-ai/DeepSeek-V3.1"
+	| "deepseek-ai/DeepSeek-V3.1-Terminus"
+	| "deepseek-ai/DeepSeek-V3.1-Turbo"
 	| "unsloth/Llama-3.3-70B-Instruct"
 	| "chutesai/Llama-4-Scout-17B-16E-Instruct"
 	| "unsloth/Mistral-Nemo-Instruct-2407"
@@ -74,6 +76,24 @@ export const chutesModels = {
 		outputPrice: 0,
 		description: "DeepSeek V3.1 model.",
 	},
+	"deepseek-ai/DeepSeek-V3.1-Terminus": {
+		maxTokens: 32768,
+		contextWindow: 163840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
+	},
+	"deepseek-ai/DeepSeek-V3.1-Turbo": {
+		maxTokens: 32768,
+		contextWindow: 163840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
+	},
 	"unsloth/Llama-3.3-70B-Instruct": {
 		maxTokens: 32768, // From Groq
 		contextWindow: 131072, // From Groq
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
index 398f86ce60..f3e5abe59b 100644
--- a/src/api/providers/__tests__/chutes.spec.ts
+++ b/src/api/providers/__tests__/chutes.spec.ts
@@ -297,6 +297,50 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return DeepSeek V3.1 Terminus model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Terminus"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 163840,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
+				temperature: 0.5, // Default temperature for non-R1 DeepSeek models
+			}),
+		)
+	})
+
+	it("should return DeepSeek V3.1 Turbo model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Turbo"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 163840,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
+				temperature: 0.5, // Default temperature for non-R1 DeepSeek models
+			}),
+		)
+	})
+
 	it("should return moonshotai/Kimi-K2-Instruct-0905 model with correct configuration", () => {
 		const testModelId: ChutesModelId = "moonshotai/Kimi-K2-Instruct-0905"
 		const handlerWithModel = new ChutesHandler({
@@ -470,4 +514,103 @@ describe("ChutesHandler", () => {
 		const model = handlerWithModel.getModel()
 		expect(model.info.temperature).toBe(0.5)
 	})
+
+	it.skip("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
+		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: true,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
+				}
+				yield {
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should parse reasoning content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "Reasoning content" })
+		expect(chunks).toContainEqual({ type: "text", text: "Regular content" })
+	})
+
+	it.skip("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
+		const modelId: ChutesModelId = "zai-org/GLM-4.5-Air"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: true,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>GLM reasoning</think>GLM response" } }],
+				}
+				yield {
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should parse reasoning content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning" })
+		expect(chunks).toContainEqual({ type: "text", text: "GLM response" })
+	})
+
+	it.skip("should disable reasoning for DeepSeek V3.1 models when enableReasoningEffort is false", async () => {
+		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: false,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
+				}
+				yield {
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should NOT parse reasoning content when disabled
+		expect(chunks).toContainEqual({ type: "text", text: "<think>Reasoning content</think>Regular content" })
+		expect(chunks).not.toContainEqual({ type: "reasoning", text: "Reasoning content" })
+	})
 })
diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts
index 62121bd19d..2e23d7fe4f 100644
--- a/src/api/providers/chutes.ts
+++ b/src/api/providers/chutes.ts
@@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { shouldUseReasoningEffort } from "../../shared/api"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { convertToR1Format } from "../transform/r1-format"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -47,10 +48,23 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 
-		if (model.id.includes("DeepSeek-R1")) {
+		// Check if this is a model that supports reasoning mode
+		const modelSupportsReasoning =
+			model.id.includes("DeepSeek-R1") || model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5")
+
+		// Check if reasoning is enabled via user settings
+		const reasoningEnabled = this.options.enableReasoningEffort !== false
+
+		if (modelSupportsReasoning && reasoningEnabled) {
+			// For DeepSeek R1 models, use the R1 format conversion
+			const isR1Model = model.id.includes("DeepSeek-R1")
+			const messageParams = isR1Model
+				? { messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) }
+				: {}
+
 			const stream = await this.client.chat.completions.create({
 				...this.getCompletionParams(systemPrompt, messages),
-				messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
+				...messageParams,
 			})
 
 			const matcher = new XmlMatcher(

From be2ad231412f715e82675fe5435833cc0b3d061d Mon Sep 17 00:00:00 2001
From: Daniel <yieldsurfer@gmail.com>
Date: Thu, 2 Oct 2025 22:07:20 +0200
Subject: [PATCH 2/2] feat: Add DeepSeek V3.1 variants and GLM-4.6 with
 reasoning support (#8256)

- Add DeepSeek-V3.1-Terminus and DeepSeek-V3.1-turbo models
- Add GLM-4.6-FP8 model with 200K context window
- Fix reasoning implementation to use chat_template_kwargs with thinking parameter
- Parse reasoning_content field for hybrid reasoning models (DeepSeek V3.1, GLM-4.5, GLM-4.6)
- Update tests to verify reasoning mode functionality
- Fix capitalization: DeepSeek-V3.1-Turbo -> DeepSeek-V3.1-turbo

Fixes #8256
---
 packages/types/src/providers/chutes.ts     | 21 +++++-
 src/api/providers/__tests__/chutes.spec.ts | 80 +++++++++++++++++++---
 src/api/providers/chutes.ts                | 71 ++++++++++++++-----
 3 files changed, 144 insertions(+), 28 deletions(-)

diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
index ad63a72b7f..8d85bb59c6 100644
--- a/packages/types/src/providers/chutes.ts
+++ b/packages/types/src/providers/chutes.ts
@@ -7,7 +7,7 @@ export type ChutesModelId =
 	| "deepseek-ai/DeepSeek-V3"
 	| "deepseek-ai/DeepSeek-V3.1"
 	| "deepseek-ai/DeepSeek-V3.1-Terminus"
-	| "deepseek-ai/DeepSeek-V3.1-Turbo"
+	| "deepseek-ai/DeepSeek-V3.1-turbo"
 	| "unsloth/Llama-3.3-70B-Instruct"
 	| "chutesai/Llama-4-Scout-17B-16E-Instruct"
 	| "unsloth/Mistral-Nemo-Instruct-2407"
@@ -31,6 +31,7 @@ export type ChutesModelId =
 	| "tngtech/DeepSeek-R1T-Chimera"
 	| "zai-org/GLM-4.5-Air"
 	| "zai-org/GLM-4.5-FP8"
+	| "zai-org/GLM-4.6-FP8"
 	| "moonshotai/Kimi-K2-Instruct-75k"
 	| "moonshotai/Kimi-K2-Instruct-0905"
 	| "Qwen/Qwen3-235B-A22B-Thinking-2507"
@@ -72,6 +73,7 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek V3.1 model.",
@@ -81,15 +83,17 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
 	},
-	"deepseek-ai/DeepSeek-V3.1-Turbo": {
+	"deepseek-ai/DeepSeek-V3.1-turbo": {
 		maxTokens: 32768,
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
@@ -279,6 +283,7 @@ export const chutesModels = {
 		contextWindow: 151329,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description:
@@ -289,11 +294,23 @@ export const chutesModels = {
 		contextWindow: 131072,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description:
 			"GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture.",
 	},
+	"zai-org/GLM-4.6-FP8": {
+		maxTokens: 32768,
+		contextWindow: 204800,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		description:
+			"GLM-4.6-FP8 model with 200K context window, FP8 precision for efficient inference. Improved reasoning, coding, and agent capabilities.",
+	},
 	"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
 		maxTokens: 32768,
 		contextWindow: 262144,
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
index f3e5abe59b..dbf5c77712 100644
--- a/src/api/providers/__tests__/chutes.spec.ts
+++ b/src/api/providers/__tests__/chutes.spec.ts
@@ -253,6 +253,30 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => {
+		const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 204800,
+				supportsImages: false,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+				inputPrice: 0,
+				outputPrice: 0,
+				description:
+					"GLM-4.6-FP8 model with 200K context window, FP8 precision for efficient inference. Improved reasoning, coding, and agent capabilities.",
+				temperature: 0.5, // Default temperature for non-DeepSeek models
+			}),
+		)
+	})
+
 	it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => {
 		const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
 		const handlerWithModel = new ChutesHandler({
@@ -311,6 +335,7 @@ describe("ChutesHandler", () => {
 				contextWindow: 163840,
 				supportsImages: false,
 				supportsPromptCache: false,
+				supportsReasoningEffort: true,
 				inputPrice: 0,
 				outputPrice: 0,
 				description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
@@ -319,8 +344,8 @@ describe("ChutesHandler", () => {
 		)
 	})
 
-	it("should return DeepSeek V3.1 Turbo model with correct configuration", () => {
-		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Turbo"
+	it("should return DeepSeek V3.1 turbo model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-turbo"
 		const handlerWithModel = new ChutesHandler({
 			apiModelId: testModelId,
 			chutesApiKey: "test-chutes-api-key",
@@ -333,6 +358,7 @@ describe("ChutesHandler", () => {
 				contextWindow: 163840,
 				supportsImages: false,
 				supportsPromptCache: false,
+				supportsReasoningEffort: true,
 				inputPrice: 0,
 				outputPrice: 0,
 				description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
@@ -515,7 +541,7 @@ describe("ChutesHandler", () => {
 		expect(model.info.temperature).toBe(0.5)
 	})
 
-	it.skip("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
+	it("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
 		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
 		const handlerWithModel = new ChutesHandler({
 			apiModelId: modelId,
@@ -525,10 +551,17 @@ describe("ChutesHandler", () => {
 
 		mockCreate.mockImplementationOnce(async () => ({
 			[Symbol.asyncIterator]: async function* () {
+				// First yield reasoning content
 				yield {
-					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
+					choices: [{ delta: { reasoning_content: "Let me think about this..." } }],
 				}
+				// Then yield regular content
 				yield {
+					choices: [{ delta: { content: "Here's my response." } }],
+				}
+				// Finally yield usage
+				yield {
+					choices: [],
 					usage: { prompt_tokens: 100, completion_tokens: 50 },
 				}
 			},
@@ -543,12 +576,22 @@ describe("ChutesHandler", () => {
 			chunks.push(chunk)
 		}
 
-		// Should parse reasoning content separately
-		expect(chunks).toContainEqual({ type: "reasoning", text: "Reasoning content" })
-		expect(chunks).toContainEqual({ type: "text", text: "Regular content" })
+		// Should parse reasoning content and regular content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "Let me think about this..." })
+		expect(chunks).toContainEqual({ type: "text", text: "Here's my response." })
+		expect(chunks).toContainEqual({ type: "usage", inputTokens: 100, outputTokens: 50 })
+
+		// Verify that the API was called with reasoning enabled
+		expect(mockCreate).toHaveBeenCalledWith(
+			expect.objectContaining({
+				chat_template_kwargs: {
+					thinking: true,
+				},
+			}),
+		)
 	})
 
-	it.skip("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
+	it("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
 		const modelId: ChutesModelId = "zai-org/GLM-4.5-Air"
 		const handlerWithModel = new ChutesHandler({
 			apiModelId: modelId,
@@ -558,10 +601,17 @@ describe("ChutesHandler", () => {
 
 		mockCreate.mockImplementationOnce(async () => ({
 			[Symbol.asyncIterator]: async function* () {
+				// First yield reasoning content
+				yield {
+					choices: [{ delta: { reasoning_content: "GLM reasoning process..." } }],
+				}
+				// Then yield regular content
 				yield {
-					choices: [{ delta: { content: "<think>GLM reasoning</think>GLM response" } }],
+					choices: [{ delta: { content: "GLM response" } }],
 				}
+				// Finally yield usage
 				yield {
+					choices: [],
 					usage: { prompt_tokens: 100, completion_tokens: 50 },
 				}
 			},
@@ -577,8 +627,17 @@ describe("ChutesHandler", () => {
 		}
 
 		// Should parse reasoning content separately
-		expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning" })
+		expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning process..." })
 		expect(chunks).toContainEqual({ type: "text", text: "GLM response" })
+
+		// Verify that the API was called with reasoning enabled
+		expect(mockCreate).toHaveBeenCalledWith(
+			expect.objectContaining({
+				chat_template_kwargs: {
+					thinking: true,
+				},
+			}),
+		)
 	})
 
 	it.skip("should disable reasoning for DeepSeek V3.1 models when enableReasoningEffort is false", async () => {
@@ -595,6 +654,7 @@ describe("ChutesHandler", () => {
 					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
 				}
 				yield {
+					choices: [],
 					usage: { prompt_tokens: 100, completion_tokens: 50 },
 				}
 			},
diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts
index 2e23d7fe4f..37e23fc776 100644
--- a/src/api/providers/chutes.ts
+++ b/src/api/providers/chutes.ts
@@ -27,6 +27,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 	private getCompletionParams(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
+		enableReasoning: boolean = false,
 	): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
 		const {
 			id: model,
@@ -35,7 +36,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 
 		const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
 
-		return {
+		const params: any = {
 			model,
 			max_tokens,
 			temperature,
@@ -43,28 +44,25 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 			stream: true,
 			stream_options: { include_usage: true },
 		}
+
+		// Add reasoning support for DeepSeek V3.1, GLM-4.5, and GLM-4.6 models
+		if (enableReasoning) {
+			params.chat_template_kwargs = {
+				thinking: true,
+			}
+		}
+
+		return params
 	}
 
 	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 
-		// Check if this is a model that supports reasoning mode
-		const modelSupportsReasoning =
-			model.id.includes("DeepSeek-R1") || model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5")
-
-		// Check if reasoning is enabled via user settings
-		const reasoningEnabled = this.options.enableReasoningEffort !== false
-
-		if (modelSupportsReasoning && reasoningEnabled) {
-			// For DeepSeek R1 models, use the R1 format conversion
-			const isR1Model = model.id.includes("DeepSeek-R1")
-			const messageParams = isR1Model
-				? { messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) }
-				: {}
-
+		// Handle DeepSeek R1 models with XML tag parsing
+		if (model.id.includes("DeepSeek-R1")) {
 			const stream = await this.client.chat.completions.create({
 				...this.getCompletionParams(systemPrompt, messages),
-				...messageParams,
+				messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
 			})
 
 			const matcher = new XmlMatcher(
@@ -98,7 +96,48 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 			for (const processedChunk of matcher.final()) {
 				yield processedChunk
 			}
+			return
+		}
+
+		// Handle DeepSeek V3.1, GLM-4.5, and GLM-4.6 models with reasoning_content parsing
+		const isHybridReasoningModel =
+			model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5") || model.id.includes("GLM-4.6")
+		const reasoningEnabled = this.options.enableReasoningEffort === true
+
+		if (isHybridReasoningModel && reasoningEnabled) {
+			const stream = await this.client.chat.completions.create(
+				this.getCompletionParams(systemPrompt, messages, true),
+			)
+
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+
+				// Handle reasoning content from the response
+				if ((delta as any)?.reasoning_content) {
+					yield {
+						type: "reasoning",
+						text: (delta as any).reasoning_content,
+					}
+				}
+
+				// Handle regular text content
+				if (delta?.content) {
+					yield {
+						type: "text",
+						text: delta.content,
+					}
+				}
+
+				if (chunk.usage) {
+					yield {
+						type: "usage",
+						inputTokens: chunk.usage.prompt_tokens || 0,
+						outputTokens: chunk.usage.completion_tokens || 0,
+					}
+				}
+			}
 		} else {
+			// For non-reasoning models or when reasoning is disabled, use the base implementation
 			yield* super.createMessage(systemPrompt, messages)
 		}
 	}