RooCodeInc
diff --git a/‎CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎packages/types/src/global-settings.ts‎
Lines changed: 11 additions & 0 deletions b/‎packages/types/src/global-settings.ts‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎packages/types/src/model.ts‎
Lines changed: 2 additions & 0 deletions b/‎packages/types/src/model.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/types/src/providers/chutes.ts‎
Lines changed: 11 additions & 0 deletions b/‎packages/types/src/providers/chutes.ts‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/api/providers/__tests__/chutes.spec.ts‎
Lines changed: 68 additions & 0 deletions b/‎src/api/providers/__tests__/chutes.spec.ts‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎src/api/providers/fetchers/__tests__/litellm.spec.ts‎
Lines changed: 90 additions & 0 deletions b/‎src/api/providers/fetchers/__tests__/litellm.spec.ts‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎src/api/providers/fetchers/litellm.ts‎
Lines changed: 1 addition & 2 deletions b/‎src/api/providers/fetchers/litellm.ts‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/api/providers/fetchers/roo.ts‎
Lines changed: 1 addition & 0 deletions b/‎src/api/providers/fetchers/roo.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/api/providers/roo.ts‎
Lines changed: 5 additions & 1 deletion b/‎src/api/providers/roo.ts‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/core/environment/getEnvironmentDetails.ts‎
Lines changed: 21 additions & 14 deletions b/‎src/core/environment/getEnvironmentDetails.ts‎
Lines changed: 21 additions & 14 deletions
@@ -1,5 +1,12 @@
 # Roo Code Changelog
 
+## [3.29.2] - 2025-10-27
+
+- Add support for LongCat-Flash-Thinking-FP8 models in Chutes AI provider (#8425 by @leakless21, PR by @roomote)
+- Fix: Remove specific Claude model version from settings descriptions to avoid outdated references (#8435 by @rwydaegh, PR by @roomote)
+- Fix: Correct caching logic in Roo provider to improve performance (thanks @mrubens!)
+- Fix: Ensure free models don't display pricing information in the UI (thanks @mrubens!)
+
 ## [3.29.1] - 2025-10-26
 
 ![3.29.1 Release - Window Cleaning](/releases/3.29.1-release.png)
 
@@ -93,6 +93,17 @@ export const globalSettingsSchema = z.object({
 	autoCondenseContextPercent: z.number().optional(),
 	maxConcurrentFileReads: z.number().optional(),
 
+	/**
+	 * Whether to include current time in the environment details
+	 * @default true
+	 */
+	includeCurrentTime: z.boolean().optional(),
+	/**
+	 * Whether to include current cost in the environment details
+	 * @default true
+	 */
+	includeCurrentCost: z.boolean().optional(),
+
 	/**
 	 * Whether to include diagnostic messages (errors, warnings) in tool outputs
 	 * @default true
 
@@ -78,6 +78,8 @@ export const modelInfoSchema = z.object({
 	cachableFields: z.array(z.string()).optional(),
 	// Flag to indicate if the model is deprecated and should not be used
 	deprecated: z.boolean().optional(),
+	// Flag to indicate if the model is free (no cost)
+	isFree: z.boolean().optional(),
 	/**
 	 * Service tiers with pricing information.
 	 * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
 
@@ -35,6 +35,7 @@ export type ChutesModelId =
 	| "zai-org/GLM-4.5-turbo"
 	| "zai-org/GLM-4.6-FP8"
 	| "zai-org/GLM-4.6-turbo"
+	| "meituan-longcat/LongCat-Flash-Thinking-FP8"
 	| "moonshotai/Kimi-K2-Instruct-75k"
 	| "moonshotai/Kimi-K2-Instruct-0905"
 	| "Qwen/Qwen3-235B-A22B-Thinking-2507"
@@ -339,6 +340,16 @@ export const chutesModels = {
 		outputPrice: 3.25,
 		description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.",
 	},
+	"meituan-longcat/LongCat-Flash-Thinking-FP8": {
+		maxTokens: 32768,
+		contextWindow: 128000,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description:
+			"LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.",
+	},
 	"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
 		maxTokens: 32768,
 		contextWindow: 262144,
 
@@ -275,6 +275,74 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => {
+		const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 202752,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description:
+					"GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios.",
+				temperature: 0.5, // Default temperature for non-DeepSeek models
+			}),
+		)
+	})
+
+	it("should return zai-org/GLM-4.6-turbo model with correct configuration", () => {
+		const testModelId: ChutesModelId = "zai-org/GLM-4.6-turbo"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 202752,
+				contextWindow: 202752,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 1.15,
+				outputPrice: 3.25,
+				description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference.",
+				temperature: 0.5, // Default temperature for non-DeepSeek models
+			}),
+		)
+	})
+
+	it("should return meituan-longcat/LongCat-Flash-Thinking-FP8 model with correct configuration", () => {
+		const testModelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 128000,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description:
+					"LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.",
+				temperature: 0.5, // Default temperature for non-DeepSeek models
+			}),
+		)
+	})
+
 	it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => {
 		const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
 		const handlerWithModel = new ChutesHandler({
 
@@ -589,4 +589,94 @@ describe("getLiteLLMModels", () => {
 
 		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
 	})
+
+	it("prefers max_output_tokens over max_tokens when both are present", async () => {
+		const mockResponse = {
+			data: {
+				data: [
+					{
+						model_name: "claude-3-5-sonnet-4-5",
+						model_info: {
+							max_tokens: 200000, // This should be ignored
+							max_output_tokens: 64000, // This should be used
+							max_input_tokens: 200000,
+							supports_vision: true,
+							supports_prompt_caching: false,
+							supports_computer_use: true,
+						},
+						litellm_params: {
+							model: "anthropic/claude-3-5-sonnet-4-5",
+						},
+					},
+					{
+						model_name: "model-with-only-max-tokens",
+						model_info: {
+							max_tokens: 8192, // This should be used as fallback
+							// No max_output_tokens
+							max_input_tokens: 128000,
+							supports_vision: false,
+						},
+						litellm_params: {
+							model: "test/model-with-only-max-tokens",
+						},
+					},
+					{
+						model_name: "model-with-only-max-output-tokens",
+						model_info: {
+							// No max_tokens
+							max_output_tokens: 16384, // This should be used
+							max_input_tokens: 100000,
+							supports_vision: false,
+						},
+						litellm_params: {
+							model: "test/model-with-only-max-output-tokens",
+						},
+					},
+				],
+			},
+		}
+
+		mockedAxios.get.mockResolvedValue(mockResponse)
+
+		const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+		// Should use max_output_tokens (64000) instead of max_tokens (200000)
+		expect(result["claude-3-5-sonnet-4-5"]).toEqual({
+			maxTokens: 64000,
+			contextWindow: 200000,
+			supportsImages: true,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			cacheWritesPrice: undefined,
+			cacheReadsPrice: undefined,
+			description: "claude-3-5-sonnet-4-5 via LiteLLM proxy",
+		})
+
+		// Should fall back to max_tokens when max_output_tokens is not present
+		expect(result["model-with-only-max-tokens"]).toEqual({
+			maxTokens: 8192,
+			contextWindow: 128000,
+			supportsImages: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			cacheWritesPrice: undefined,
+			cacheReadsPrice: undefined,
+			description: "model-with-only-max-tokens via LiteLLM proxy",
+		})
+
+		// Should use max_output_tokens when max_tokens is not present
+		expect(result["model-with-only-max-output-tokens"]).toEqual({
+			maxTokens: 16384,
+			contextWindow: 100000,
+			supportsImages: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			cacheWritesPrice: undefined,
+			cacheReadsPrice: undefined,
+			description: "model-with-only-max-output-tokens via LiteLLM proxy",
+		})
+	})
 })
@@ -41,10 +41,9 @@ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise
 				if (!modelName || !modelInfo || !litellmModelName) continue
 
 				models[modelName] = {
-					maxTokens: modelInfo.max_tokens || 8192,
+					maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192,
 					contextWindow: modelInfo.max_input_tokens || 200000,
 					supportsImages: Boolean(modelInfo.supports_vision),
-					// litellm_params.model may have a prefix like openrouter/
 					supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
 					inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,
 					outputPrice: modelInfo.output_cost_per_token
 
@@ -96,6 +96,7 @@ export async function getRooModels(baseUrl: string, apiKey?: string): Promise<Mo
 					cacheReadsPrice: cacheReadPrice,
 					description: model.description || model.name,
 					deprecated: model.deprecated || false,
+					isFree: tags.includes("free"),
 				}
 			}
 
 
@@ -181,13 +181,17 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {
 		}
 
 		if (lastUsage) {
+			// Check if the current model is marked as free
+			const model = this.getModel()
+			const isFreeModel = model.info.isFree ?? false
+
 			yield {
 				type: "usage",
 				inputTokens: lastUsage.prompt_tokens || 0,
 				outputTokens: lastUsage.completion_tokens || 0,
 				cacheWriteTokens: lastUsage.cache_creation_input_tokens,
 				cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
-				totalCost: lastUsage.cost ?? 0,
+				totalCost: isFreeModel ? 0 : (lastUsage.cost ?? 0),
 			}
 		}
 	}
 
@@ -190,21 +190,28 @@ export async function getEnvironmentDetails(cline: Task, includeFileDetails: boo
 		details += terminalDetails
 	}
 
-	// Add current time information with timezone.
-	const now = new Date()
-
-	const timeZone = Intl.DateTimeFormat().resolvedOptions().timeZone
-	const timeZoneOffset = -now.getTimezoneOffset() / 60 // Convert to hours and invert sign to match conventional notation
-	const timeZoneOffsetHours = Math.floor(Math.abs(timeZoneOffset))
-	const timeZoneOffsetMinutes = Math.abs(Math.round((Math.abs(timeZoneOffset) - timeZoneOffsetHours) * 60))
-	const timeZoneOffsetStr = `${timeZoneOffset >= 0 ? "+" : "-"}${timeZoneOffsetHours}:${timeZoneOffsetMinutes.toString().padStart(2, "0")}`
-	details += `\n\n# Current Time\nCurrent time in ISO 8601 UTC format: ${now.toISOString()}\nUser time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
-
-	// Add context tokens information.
-	const { contextTokens, totalCost } = getApiMetrics(cline.clineMessages)
-	const { id: modelId } = cline.api.getModel()
+	// Get settings for time and cost display
+	const { includeCurrentTime = true, includeCurrentCost = true } = state ?? {}
+
+	// Add current time information with timezone (if enabled).
+	if (includeCurrentTime) {
+		const now = new Date()
+
+		const timeZone = Intl.DateTimeFormat().resolvedOptions().timeZone
+		const timeZoneOffset = -now.getTimezoneOffset() / 60 // Convert to hours and invert sign to match conventional notation
+		const timeZoneOffsetHours = Math.floor(Math.abs(timeZoneOffset))
+		const timeZoneOffsetMinutes = Math.abs(Math.round((Math.abs(timeZoneOffset) - timeZoneOffsetHours) * 60))
+		const timeZoneOffsetStr = `${timeZoneOffset >= 0 ? "+" : "-"}${timeZoneOffsetHours}:${timeZoneOffsetMinutes.toString().padStart(2, "0")}`
+		details += `\n\n# Current Time\nCurrent time in ISO 8601 UTC format: ${now.toISOString()}\nUser time zone: ${timeZone}, UTC${timeZoneOffsetStr}`
+	}
 
-	details += `\n\n# Current Cost\n${totalCost !== null ? `$${totalCost.toFixed(2)}` : "(Not available)"}`
+	// Add context tokens information (if enabled).
+	if (includeCurrentCost) {
+		const { totalCost } = getApiMetrics(cline.clineMessages)
+		details += `\n\n# Current Cost\n${totalCost !== null ? `$${totalCost.toFixed(2)}` : "(Not available)"}`
+	}
+
+	const { id: modelId } = cline.api.getModel()
 
 	// Add current mode and any mode-specific warnings.
 	const {
Original file line number	Diff line number	Diff line change
`@@ -96,6 +96,7 @@ export async function getRooModels(baseUrl: string, apiKey?: string): Promise<Mo`
`96`	`96`	`cacheReadsPrice: cacheReadPrice,`
`97`	`97`	`description: model.description \|\| model.name,`
`98`	`98`	`deprecated: model.deprecated \|\| false,`
	`99`	`+ isFree: tags.includes("free"),`
`99`	`100`	`}`
`100`	`101`	`}`
`101`	`102`
Original file line number	Diff line number	Diff line change
`@@ -181,13 +181,17 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {`
`181`	`181`	`}`
`182`	`182`
`183`	`183`	`if (lastUsage) {`
	`184`	`+ // Check if the current model is marked as free`
	`185`	`+ const model = this.getModel()`
	`186`	`+ const isFreeModel = model.info.isFree ?? false`
	`187`	`+`
`184`	`188`	`yield {`
`185`	`189`	`type: "usage",`
`186`	`190`	`inputTokens: lastUsage.prompt_tokens \|\| 0,`
`187`	`191`	`outputTokens: lastUsage.completion_tokens \|\| 0,`
`188`	`192`	`cacheWriteTokens: lastUsage.cache_creation_input_tokens,`
`189`	`193`	`cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,`
`190`		`- totalCost: lastUsage.cost ?? 0,`
	`194`	`+ totalCost: isFreeModel ? 0 : (lastUsage.cost ?? 0),`
`191`	`195`	`}`
`192`	`196`	`}`
`193`	`197`	`}`