Merge pull request #1587 from dleen/cache

mrubens · web-flow · commit 9b5ee2732077 · 2025-03-12T11:35:42.000-04:00
feat: Add prompt caching to OpenAI-compatible custom models
diff --git a/.changeset/thin-fans-deliver.md b/.changeset/thin-fans-deliver.md
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Add prompt caching to OpenAI-compatible custom model info
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -72,7 +72,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 
 		if (this.options.openAiStreamingEnabled ?? true) {
-			const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
+			let systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = {
 				role: "system",
 				content: systemPrompt,
 			}
@@ -83,7 +83,42 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			} else if (ark) {
 				convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
 			} else {
+				if (modelInfo.supportsPromptCache) {
+					systemMessage = {
+						role: "system",
+						content: [
+							{
+								type: "text",
+								text: systemPrompt,
+								// @ts-ignore-next-line
+								cache_control: { type: "ephemeral" },
+							},
+						],
+					}
+				}
 				convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]
+				if (modelInfo.supportsPromptCache) {
+					// Note: the following logic is copied from openrouter:
+					// Add cache_control to the last two user messages
+					// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
+					const lastTwoUserMessages = convertedMessages.filter((msg) => msg.role === "user").slice(-2)
+					lastTwoUserMessages.forEach((msg) => {
+						if (typeof msg.content === "string") {
+							msg.content = [{ type: "text", text: msg.content }]
+						}
+						if (Array.isArray(msg.content)) {
+							// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
+							let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+							if (!lastTextPart) {
+								lastTextPart = { type: "text", text: "..." }
+								msg.content.push(lastTextPart)
+							}
+							// @ts-ignore-next-line
+							lastTextPart["cache_control"] = { type: "ephemeral" }
+						}
+					})
+				}
 			}
 
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -819,7 +819,7 @@ const ApiOptions = ({
 									style={{ fontSize: "12px" }}
 								/>
 							</div>
-							<div className="text-sm text-vscode-descriptionForeground">
+							<div className="text-sm text-vscode-descriptionForeground pt-1">
 								Is this model capable of processing and understanding images?
 							</div>
 						</div>
@@ -842,11 +842,34 @@ const ApiOptions = ({
 									style={{ fontSize: "12px" }}
 								/>
 							</div>
-							<div className="text-sm text-vscode-descriptionForeground [pt">
+							<div className="text-sm text-vscode-descriptionForeground pt-1">
 								Is this model capable of interacting with a browser? (e.g. Claude 3.7 Sonnet).
 							</div>
 						</div>
 
+						<div>
+							<div className="flex items-center gap-1">
+								<Checkbox
+									checked={apiConfiguration?.openAiCustomModelInfo?.supportsPromptCache ?? false}
+									onChange={handleInputChange("openAiCustomModelInfo", (checked) => {
+										return {
+											...(apiConfiguration?.openAiCustomModelInfo || openAiModelInfoSaneDefaults),
+											supportsPromptCache: checked,
+										}
+									})}>
+									<span className="font-medium">Prompt Caching</span>
+								</Checkbox>
+								<i
+									className="codicon codicon-info text-vscode-descriptionForeground"
+									title="Enable if the model supports prompt caching. This can improve performance and reduce costs."
+									style={{ fontSize: "12px" }}
+								/>
+							</div>
+							<div className="text-sm text-vscode-descriptionForeground pt-1">
+								Is this model capable of caching prompts?
+							</div>
+						</div>
+
 						<div>
 							<VSCodeTextField
 								value={
@@ -933,6 +956,93 @@ const ApiOptions = ({
 							</VSCodeTextField>
 						</div>
 
+						{apiConfiguration?.openAiCustomModelInfo?.supportsPromptCache && (
+							<>
+								<div>
+									<VSCodeTextField
+										value={
+											apiConfiguration?.openAiCustomModelInfo?.cacheReadsPrice?.toString() ?? "0"
+										}
+										type="text"
+										style={{
+											borderColor: (() => {
+												const value = apiConfiguration?.openAiCustomModelInfo?.cacheReadsPrice
+
+												if (!value && value !== 0) {
+													return "var(--vscode-input-border)"
+												}
+
+												return value >= 0
+													? "var(--vscode-charts-green)"
+													: "var(--vscode-errorForeground)"
+											})(),
+										}}
+										onChange={handleInputChange("openAiCustomModelInfo", (e) => {
+											const value = (e.target as HTMLInputElement).value
+											const parsed = parseFloat(value)
+
+											return {
+												...(apiConfiguration?.openAiCustomModelInfo ??
+													openAiModelInfoSaneDefaults),
+												cacheReadsPrice: isNaN(parsed) ? 0 : parsed,
+											}
+										})}
+										placeholder="e.g. 0.0001"
+										className="w-full">
+										<div className="flex items-center gap-1">
+											<span className="font-medium">Cache Reads Price</span>
+											<i
+												className="codicon codicon-info text-vscode-descriptionForeground"
+												title="Cost per million tokens for reading from the cache. This is the price charged when a cached response is retrieved."
+												style={{ fontSize: "12px" }}
+											/>
+										</div>
+									</VSCodeTextField>
+								</div>
+								<div>
+									<VSCodeTextField
+										value={
+											apiConfiguration?.openAiCustomModelInfo?.cacheWritesPrice?.toString() ?? "0"
+										}
+										type="text"
+										style={{
+											borderColor: (() => {
+												const value = apiConfiguration?.openAiCustomModelInfo?.cacheWritesPrice
+
+												if (!value && value !== 0) {
+													return "var(--vscode-input-border)"
+												}
+
+												return value >= 0
+													? "var(--vscode-charts-green)"
+													: "var(--vscode-errorForeground)"
+											})(),
+										}}
+										onChange={handleInputChange("openAiCustomModelInfo", (e) => {
+											const value = (e.target as HTMLInputElement).value
+											const parsed = parseFloat(value)
+
+											return {
+												...(apiConfiguration?.openAiCustomModelInfo ??
+													openAiModelInfoSaneDefaults),
+												cacheWritesPrice: isNaN(parsed) ? 0 : parsed,
+											}
+										})}
+										placeholder="e.g. 0.00005"
+										className="w-full">
+										<div className="flex items-center gap-1">
+											<span className="font-medium">Cache Writes Price</span>
+											<i
+												className="codicon codicon-info text-vscode-descriptionForeground"
+												title="Cost per million tokens for writing to the cache. This is the price charged when a prompt is cached for the first time."
+												style={{ fontSize: "12px" }}
+											/>
+										</div>
+									</VSCodeTextField>
+								</div>
+							</>
+						)}
+
 						<Button
 							variant="secondary"
 							onClick={() =>

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"roo-cline": patch
 +---
++
 +Add prompt caching to OpenAI-compatible custom model info