feat: Add extended thinking for LiteLLM provider (RooCodeInc#2615)

jorgegarciarey · web-flow · commit f21bcb22a66c · 2025-04-01T19:07:30.000-07:00
* feat: add extended thinking slider to LiteLLM provider

Signed-off-by: Jorge García Rey &lt;jorgegar@inditex.com&gt;

* feat: add changeset

Signed-off-by: Jorge García Rey &lt;jorgegar@inditex.com&gt;

* fix: format

Signed-off-by: Jorge García Rey &lt;jorgegar@inditex.com&gt;

---------

Signed-off-by: Jorge García Rey &lt;jorgegar@inditex.com&gt;
diff --git a/.changeset/clever-eggs-perform.md b/.changeset/clever-eggs-perform.md
@@ -0,0 +1,5 @@
+---
+"claude-dev": minor
+---
+
+Add Enable extended thinking for LiteLLM provider
diff --git a/src/api/providers/litellm.ts b/src/api/providers/litellm.ts
@@ -59,10 +59,16 @@ export class LiteLlmHandler implements ApiHandler {
 		}
 		const modelId = this.options.liteLlmModelId || liteLlmDefaultModelId
 		const isOminiModel = modelId.includes("o1-mini") || modelId.includes("o3-mini")
+
+		// Configuration for extended thinking
+		const budgetTokens = this.options.thinkingBudgetTokens || 0
+		const reasoningOn = budgetTokens !== 0 ? true : false
+		const thinkingConfig = reasoningOn ? { type: "enabled", budget_tokens: budgetTokens } : undefined
+
 		let temperature: number | undefined = 0
 
-		if (isOminiModel) {
-			temperature = undefined // does not support temperature
+		if (isOminiModel && reasoningOn) {
+			temperature = undefined // Thinking mode doesn't support temperature
 		}
 
 		const stream = await this.client.chat.completions.create({
@@ -71,20 +77,37 @@ export class LiteLlmHandler implements ApiHandler {
 			temperature,
 			stream: true,
 			stream_options: { include_usage: true },
+			...(thinkingConfig && { thinking: thinkingConfig }), // Add thinking configuration when applicable
 		})
 
 		const inputCost = (await this.calculateCost(1e6, 0)) || 0
 		const outputCost = (await this.calculateCost(0, 1e6)) || 0
 
 		for await (const chunk of stream) {
 			const delta = chunk.choices[0]?.delta
+
+			// Handle normal text content
 			if (delta?.content) {
 				yield {
 					type: "text",
 					text: delta.content,
 				}
 			}
 
+			// Handle reasoning events (thinking)
+			// Thinking is not in the standard types but may be in the response
+			interface ThinkingDelta {
+				thinking?: string
+			}
+
+			if ((delta as ThinkingDelta)?.thinking) {
+				yield {
+					type: "reasoning",
+					reasoning: (delta as ThinkingDelta).thinking || "",
+				}
+			}
+
+			// Handle token usage information
 			if (chunk.usage) {
 				const totalCost =
 					(inputCost * chunk.usage.prompt_tokens) / 1e6 + (outputCost * chunk.usage.completion_tokens) / 1e6
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -1205,6 +1205,24 @@ const ApiOptions = ({ showModelOptions, apiErrorMessage, modelIdErrorMessage, is
 						placeholder={"e.g. gpt-4"}>
 						<span style={{ fontWeight: 500 }}>Model ID</span>
 					</VSCodeTextField>
+
+					<>
+						<ThinkingBudgetSlider apiConfiguration={apiConfiguration} setApiConfiguration={setApiConfiguration} />
+						<p
+							style={{
+								fontSize: "12px",
+								marginTop: "5px",
+								color: "var(--vscode-charts-green)",
+							}}>
+							Extended thinking is available for models as Sonnet-3-7, o3-mini, Deepseek R1, etc. More info on{" "}
+							<VSCodeLink
+								href="https://docs.litellm.ai/docs/reasoning_content"
+								style={{ display: "inline", fontSize: "inherit" }}>
+								thinking mode configuration
+							</VSCodeLink>
+						</p>
+					</>
+
 					<p
 						style={{
 							fontSize: "12px",
diff --git a/webview-ui/src/components/settings/ThinkingBudgetSlider.tsx b/webview-ui/src/components/settings/ThinkingBudgetSlider.tsx
@@ -123,11 +123,12 @@ const ThinkingBudgetSlider = ({ apiConfiguration, setApiConfiguration }: Thinkin
 			{isEnabled && (
 				<>
 					<LabelContainer>
-						<Label>
+						<Label htmlFor="thinking-budget-slider">
 							<strong>Budget:</strong> {localValue.toLocaleString()} tokens
 						</Label>
 					</LabelContainer>
 					<RangeInput
+						id="thinking-budget-slider"
 						type="range"
 						min={MIN_VALID_TOKENS}
 						max={maxSliderValue}
@@ -139,9 +140,16 @@ const ThinkingBudgetSlider = ({ apiConfiguration, setApiConfiguration }: Thinkin
 						$value={localValue}
 						$min={MIN_VALID_TOKENS}
 						$max={maxSliderValue}
+						aria-label={`Thinking budget: ${localValue.toLocaleString()} tokens`}
+						aria-valuemin={MIN_VALID_TOKENS}
+						aria-valuemax={maxSliderValue}
+						aria-valuenow={localValue}
+						aria-describedby="thinking-budget-description"
 					/>
 
-					<Description>Higher budgets may allow you to achieve more comprehensive and nuanced reasoning</Description>
+					<Description id="thinking-budget-description">
+						Higher budgets may allow you to achieve more comprehensive and nuanced reasoning
+					</Description>
 				</>
 			)}
 		</Container>

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"claude-dev": minor
 +---
++
 +Add Enable extended thinking for LiteLLM provider