RooCodeInc
diff --git a/‎src/api/providers/__tests__/openrouter.test.ts‎
Lines changed: 230 additions & 256 deletions b/‎src/api/providers/__tests__/openrouter.test.ts‎
Lines changed: 230 additions & 256 deletions
diff --git a/‎src/api/providers/openrouter.ts‎
Lines changed: 79 additions & 53 deletions b/‎src/api/providers/openrouter.ts‎
Lines changed: 79 additions & 53 deletions
diff --git a/‎src/api/transform/stream.ts‎
Lines changed: 3 additions & 1 deletion b/‎src/api/transform/stream.ts‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎webview-ui/src/components/chat/ChatRow.tsx‎
Lines changed: 4 additions & 2 deletions b/‎webview-ui/src/components/chat/ChatRow.tsx‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎webview-ui/src/components/chat/ContextWindowProgress.tsx‎
Lines changed: 90 additions & 0 deletions b/‎webview-ui/src/components/chat/ContextWindowProgress.tsx‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎webview-ui/src/components/chat/Mention.tsx‎
Lines changed: 33 additions & 0 deletions b/‎webview-ui/src/components/chat/Mention.tsx‎
Lines changed: 33 additions & 0 deletions
@@ -6,7 +6,7 @@ import OpenAI from "openai"
 import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
 import { parseApiPrice } from "../../utils/cost"
 import { convertToOpenAiMessages } from "../transform/openai-format"
-import { ApiStreamChunk, ApiStreamUsageChunk } from "../transform/stream"
+import { ApiStreamChunk } from "../transform/stream"
 import { convertToR1Format } from "../transform/r1-format"
 
 import { DEFAULT_HEADERS, DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
@@ -28,6 +28,22 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
 	}
 }
 
+// See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
+// `CompletionsAPI.CompletionUsage`
+// See also: https://openrouter.ai/docs/use-cases/usage-accounting
+interface CompletionUsage {
+	completion_tokens?: number
+	completion_tokens_details?: {
+		reasoning_tokens?: number
+	}
+	prompt_tokens?: number
+	prompt_tokens_details?: {
+		cached_tokens?: number
+	}
+	total_tokens?: number
+	cost?: number
+}
+
 export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -46,7 +62,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 	): AsyncGenerator<ApiStreamChunk> {
-		let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort } = this.getModel()
+		let {
+			id: modelId,
+			maxTokens,
+			thinking,
+			temperature,
+			supportsPromptCache,
+			topP,
+			reasoningEffort,
+		} = this.getModel()
 
 		// Convert Anthropic messages to OpenAI format.
 		let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -59,46 +83,42 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
 		}
 
-		// prompt caching: https://openrouter.ai/docs/prompt-caching
-		// this is specifically for claude models (some models may 'support prompt caching' automatically without this)
-		switch (true) {
-			case modelId.startsWith("anthropic/"):
-				openAiMessages[0] = {
-					role: "system",
-					content: [
-						{
-							type: "text",
-							text: systemPrompt,
-							// @ts-ignore-next-line
-							cache_control: { type: "ephemeral" },
-						},
-					],
-				}
+		// Prompt caching: https://openrouter.ai/docs/prompt-caching
+		// Now with Gemini support: https://openrouter.ai/docs/features/prompt-caching
+		if (supportsPromptCache) {
+			openAiMessages[0] = {
+				role: "system",
+				content: [
+					{
+						type: "text",
+						text: systemPrompt,
+						// @ts-ignore-next-line
+						cache_control: { type: "ephemeral" },
+					},
+				],
+			}
 
-				// Add cache_control to the last two user messages
-				// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
-				const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
+			// Add cache_control to the last two user messages
+			// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
+			const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)
 
-				lastTwoUserMessages.forEach((msg) => {
-					if (typeof msg.content === "string") {
-						msg.content = [{ type: "text", text: msg.content }]
-					}
+			lastTwoUserMessages.forEach((msg) => {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
 
-					if (Array.isArray(msg.content)) {
-						// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
-						let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
 
-						if (!lastTextPart) {
-							lastTextPart = { type: "text", text: "..." }
-							msg.content.push(lastTextPart)
-						}
-						// @ts-ignore-next-line
-						lastTextPart["cache_control"] = { type: "ephemeral" }
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
 					}
-				})
-				break
-			default:
-				break
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
+				}
+			})
 		}
 
 		// https://openrouter.ai/docs/transforms
@@ -125,9 +145,9 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		const stream = await this.client.chat.completions.create(completionParams)
 
-		let lastUsage
+		let lastUsage: CompletionUsage | undefined = undefined
 
-		for await (const chunk of stream as unknown as AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>) {
+		for await (const chunk of stream) {
 			// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
 			if ("error" in chunk) {
 				const error = chunk.error as { message?: string; code?: number }
@@ -137,13 +157,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 			const delta = chunk.choices[0]?.delta
 
-			if ("reasoning" in delta && delta.reasoning) {
-				yield { type: "reasoning", text: delta.reasoning } as ApiStreamChunk
+			if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
+				yield { type: "reasoning", text: delta.reasoning }
 			}
 
 			if (delta?.content) {
 				fullResponseText += delta.content
-				yield { type: "text", text: delta.content } as ApiStreamChunk
+				yield { type: "text", text: delta.content }
 			}
 
 			if (chunk.usage) {
@@ -152,16 +172,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		}
 
 		if (lastUsage) {
-			yield this.processUsageMetrics(lastUsage)
-		}
-	}
-
-	processUsageMetrics(usage: any): ApiStreamUsageChunk {
-		return {
-			type: "usage",
-			inputTokens: usage?.prompt_tokens || 0,
-			outputTokens: usage?.completion_tokens || 0,
-			totalCost: usage?.cost || 0,
+			yield {
+				type: "usage",
+				inputTokens: lastUsage.prompt_tokens || 0,
+				outputTokens: lastUsage.completion_tokens || 0,
+				// Waiting on OpenRouter to figure out what this represents in the Gemini case
+				// and how to best support it.
+				// cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
+				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
+				totalCost: lastUsage.cost || 0,
+			}
 		}
 	}
 
@@ -171,7 +191,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		let id = modelId ?? openRouterDefaultModelId
 		const info = modelInfo ?? openRouterDefaultModelInfo
-
+		const supportsPromptCache = modelInfo?.supportsPromptCache
 		const isDeepSeekR1 = id.startsWith("deepseek/deepseek-r1") || modelId === "perplexity/sonar-reasoning"
 		const defaultTemperature = isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0
 		const topP = isDeepSeekR1 ? 0.95 : undefined
@@ -180,6 +200,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			id,
 			info,
 			...getModelParams({ options: this.options, model: info, defaultTemperature }),
+			supportsPromptCache,
 			topP,
 		}
 	}
@@ -269,6 +290,11 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
 					modelInfo.cacheReadsPrice = 0.03
 					modelInfo.maxTokens = 8192
 					break
+				case rawModel.id.startsWith("google/gemini-2.5-pro-preview-03-25"):
+				case rawModel.id.startsWith("google/gemini-2.0-flash-001"):
+				case rawModel.id.startsWith("google/gemini-flash-1.5"):
+					modelInfo.supportsPromptCache = true
+					break
 				default:
 					break
 			}
 
@@ -1,4 +1,5 @@
 export type ApiStream = AsyncGenerator<ApiStreamChunk>
+
 export type ApiStreamChunk = ApiStreamTextChunk | ApiStreamUsageChunk | ApiStreamReasoningChunk
 
 export interface ApiStreamTextChunk {
@@ -17,5 +18,6 @@ export interface ApiStreamUsageChunk {
 	outputTokens: number
 	cacheWriteTokens?: number
 	cacheReadTokens?: number
-	totalCost?: number // openrouter
+	reasoningTokens?: number
+	totalCost?: number
 }
@@ -21,7 +21,7 @@ import { ReasoningBlock } from "./ReasoningBlock"
 import Thumbnails from "../common/Thumbnails"
 import McpResourceRow from "../mcp/McpResourceRow"
 import McpToolRow from "../mcp/McpToolRow"
-import { highlightMentions } from "./TaskHeader"
+import { Mention } from "./Mention"
 import { CheckpointSaved } from "./checkpoints/CheckpointSaved"
 import { FollowUpSuggest } from "./FollowUpSuggest"
 
@@ -867,7 +867,9 @@ export const ChatRowContent = ({
 					return (
 						<div className="bg-vscode-editor-background border rounded-xs p-1 overflow-hidden whitespace-pre-wrap word-break-break-word overflow-wrap-anywhere">
 							<div className="flex justify-between gap-2">
-								<div className="flex-grow px-2 py-1">{highlightMentions(message.text)}</div>
+								<div className="flex-grow px-2 py-1">
+									<Mention text={message.text} withShadow />
+								</div>
 								<Button
 									variant="ghost"
 									size="icon"
 
@@ -0,0 +1,90 @@
+import { useMemo } from "react"
+import { useTranslation } from "react-i18next"
+
+import { formatLargeNumber } from "@/utils/format"
+import { calculateTokenDistribution } from "@/utils/model-utils"
+
+interface ContextWindowProgressProps {
+	contextWindow: number
+	contextTokens: number
+	maxTokens?: number
+}
+
+export const ContextWindowProgress = ({ contextWindow, contextTokens, maxTokens }: ContextWindowProgressProps) => {
+	const { t } = useTranslation()
+	// Use the shared utility function to calculate all token distribution values
+	const tokenDistribution = useMemo(
+		() => calculateTokenDistribution(contextWindow, contextTokens, maxTokens),
+		[contextWindow, contextTokens, maxTokens],
+	)
+
+	// Destructure the values we need
+	const { currentPercent, reservedPercent, availableSize, reservedForOutput, availablePercent } = tokenDistribution
+
+	// For display purposes
+	const safeContextWindow = Math.max(0, contextWindow)
+	const safeContextTokens = Math.max(0, contextTokens)
+
+	return (
+		<>
+			<div className="flex items-center gap-2 flex-1 whitespace-nowrap px-2">
+				<div data-testid="context-tokens-count">{formatLargeNumber(safeContextTokens)}</div>
+				<div className="flex-1 relative">
+					{/* Invisible overlay for hover area */}
+					<div
+						className="absolute w-full h-4 -top-[7px] z-5"
+						title={t("chat:tokenProgress.availableSpace", { amount: formatLargeNumber(availableSize) })}
+						data-testid="context-available-space"
+					/>
+
+					{/* Main progress bar container */}
+					<div className="flex items-center h-1 rounded-[2px] overflow-hidden w-full bg-[color-mix(in_srgb,var(--vscode-foreground)_20%,transparent)]">
+						{/* Current tokens container */}
+						<div className="relative h-full" style={{ width: `${currentPercent}%` }}>
+							{/* Invisible overlay for current tokens section */}
+							<div
+								className="absolute h-4 -top-[7px] w-full z-6"
+								title={t("chat:tokenProgress.tokensUsed", {
+									used: formatLargeNumber(safeContextTokens),
+									total: formatLargeNumber(safeContextWindow),
+								})}
+								data-testid="context-tokens-used"
+							/>
+							{/* Current tokens used - darkest */}
+							<div className="h-full w-full bg-[var(--vscode-foreground)] transition-width duration-300 ease-out" />
+						</div>
+
+						{/* Container for reserved tokens */}
+						<div className="relative h-full" style={{ width: `${reservedPercent}%` }}>
+							{/* Invisible overlay for reserved section */}
+							<div
+								className="absolute h-4 -top-[7px] w-full z-6"
+								title={t("chat:tokenProgress.reservedForResponse", {
+									amount: formatLargeNumber(reservedForOutput),
+								})}
+								data-testid="context-reserved-tokens"
+							/>
+							{/* Reserved for output section - medium gray */}
+							<div className="h-full w-full bg-[color-mix(in_srgb,var(--vscode-foreground)_30%,transparent)] transition-width duration-300 ease-out" />
+						</div>
+
+						{/* Empty section (if any) */}
+						{availablePercent > 0 && (
+							<div className="relative h-full" style={{ width: `${availablePercent}%` }}>
+								{/* Invisible overlay for available space */}
+								<div
+									className="absolute h-4 -top-[7px] w-full z-6"
+									title={t("chat:tokenProgress.availableSpace", {
+										amount: formatLargeNumber(availableSize),
+									})}
+									data-testid="context-available-space-section"
+								/>
+							</div>
+						)}
+					</div>
+				</div>
+				<div data-testid="context-window-size">{formatLargeNumber(safeContextWindow)}</div>
+			</div>
+		</>
+	)
+}
@@ -0,0 +1,33 @@
+import { mentionRegexGlobal } from "@roo/shared/context-mentions"
+
+import { vscode } from "../../utils/vscode"
+
+interface MentionProps {
+	text?: string
+	withShadow?: boolean
+}
+
+export const Mention = ({ text, withShadow = false }: MentionProps) => {
+	if (!text) {
+		return <>{text}</>
+	}
+
+	const parts = text.split(mentionRegexGlobal).map((part, index) => {
+		if (index % 2 === 0) {
+			// This is regular text.
+			return part
+		} else {
+			// This is a mention.
+			return (
+				<span
+					key={index}
+					className={`${withShadow ? "mention-context-highlight-with-shadow" : "mention-context-highlight"} cursor-pointer`}
+					onClick={() => vscode.postMessage({ type: "openMention", text: part })}>
+					@{part}
+				</span>
+			)
+		}
+	})
+
+	return <>{parts}</>
+}