Systematic selection of gemini models w/ caching (RooCodeInc#3343)

0xToshii · web-flow · commit 6ad11badf33b · 2025-05-07T10:07:45.000-07:00
* no more updating gemini models

* changeset
diff --git a/.changeset/selfish-feet-camp.md b/.changeset/selfish-feet-camp.md
@@ -0,0 +1,5 @@
+---
+"claude-dev": minor
+---
+
+updated OR/cline provider to automate gemini models caching
diff --git a/src/api/transform/openrouter-stream.ts b/src/api/transform/openrouter-stream.ts
@@ -21,7 +21,7 @@ export async function createOpenRouterStream(
 
 	// prompt caching: https://openrouter.ai/docs/prompt-caching
 	// this was initially specifically for claude models (some models may 'support prompt caching' automatically without this)
-	// includes custom support for gemini which does not have iterative caching
+	// handles direct model.id match logic
 	switch (model.id) {
 		case "anthropic/claude-3.7-sonnet":
 		case "anthropic/claude-3.7-sonnet:beta":
@@ -71,77 +71,76 @@ export async function createOpenRouterStream(
 				}
 			})
 			break
-		case "google/gemini-2.5-pro-preview-03-25":
-		case "google/gemini-2.0-flash-001":
-		case "google/gemini-flash-1.5":
-		case "google/gemini-pro-1.5":
-			// gemini only uses the last breakpoint for caching, so the others will be ignored
-			openAiMessages[0] = {
-				role: "system",
-				content: [
-					{
-						type: "text",
-						text: systemPrompt,
-						// @ts-ignore-next-line
-						cache_control: { type: "ephemeral" },
-					},
-				],
-			}
+		default:
+			break
+	}
+
+	// handles gemini caching logic
+	if (model.id.startsWith("google/") && model.info.supportsPromptCache) {
+		// gemini only uses the last breakpoint for caching, so the others will be ignored
+		openAiMessages[0] = {
+			role: "system",
+			content: [
+				{
+					type: "text",
+					text: systemPrompt,
+					// @ts-ignore-next-line
+					cache_control: { type: "ephemeral" },
+				},
+			],
+		}
 
-			// for safety, but this should always be the case
-			if (openAiMessages.length >= 2) {
-				const msg = openAiMessages[1]
+		// for safety, but this should always be the case
+		if (openAiMessages.length >= 2) {
+			const msg = openAiMessages[1]
 
-				if (msg) {
-					if (typeof msg.content === "string") {
-						msg.content = [{ type: "text", text: msg.content }]
-					}
-					if (Array.isArray(msg.content)) {
-						// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
-						let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
-
-						if (!lastTextPart) {
-							lastTextPart = { type: "text", text: "..." }
-							msg.content.push(lastTextPart)
-						}
-						// @ts-ignore-next-line
-						lastTextPart["cache_control"] = { type: "ephemeral" }
+			if (msg) {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
 					}
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
 				}
 			}
+		}
 
-			// it doesn't make sense to alter breakpoints at all with the gemini cache implementation at this time
-			/*const GEMINI_CACHE_USER_MESSAGE_INTERVAL = 4 // add new breakpoint every 4 turns
-			const userMessages = openAiMessages.filter((msg) => msg.role === "user")
+		// it doesn't make sense to alter breakpoints at all with the gemini cache implementation at this time
+		/*const GEMINI_CACHE_USER_MESSAGE_INTERVAL = 4 // add new breakpoint every 4 turns
+		const userMessages = openAiMessages.filter((msg) => msg.role === "user")
 
-			const userMessageCount = userMessages.length
-			const targetUserMessageNumber =
-				Math.floor(userMessageCount / GEMINI_CACHE_USER_MESSAGE_INTERVAL) * GEMINI_CACHE_USER_MESSAGE_INTERVAL
+		const userMessageCount = userMessages.length
+		const targetUserMessageNumber =
+			Math.floor(userMessageCount / GEMINI_CACHE_USER_MESSAGE_INTERVAL) * GEMINI_CACHE_USER_MESSAGE_INTERVAL
 
-			if (targetUserMessageNumber > 0) {
-				// otherwise dont need to add a breakpoint
-				const msg = userMessages[targetUserMessageNumber - 1]
+		if (targetUserMessageNumber > 0) {
+			// otherwise dont need to add a breakpoint
+			const msg = userMessages[targetUserMessageNumber - 1]
 
-				if (msg) {
-					if (typeof msg.content === "string") {
-						msg.content = [{ type: "text", text: msg.content }]
-					}
-					if (Array.isArray(msg.content)) {
-						// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
-						let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
-
-						if (!lastTextPart) {
-							lastTextPart = { type: "text", text: "..." }
-							msg.content.push(lastTextPart)
-						}
-						// @ts-ignore-next-line
-						lastTextPart["cache_control"] = { type: "ephemeral" }
+			if (msg) {
+				if (typeof msg.content === "string") {
+					msg.content = [{ type: "text", text: msg.content }]
+				}
+				if (Array.isArray(msg.content)) {
+					// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
+					let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
+
+					if (!lastTextPart) {
+						lastTextPart = { type: "text", text: "..." }
+						msg.content.push(lastTextPart)
 					}
+					// @ts-ignore-next-line
+					lastTextPart["cache_control"] = { type: "ephemeral" }
 				}
-			}*/
-			break
-		default:
-			break
+			}
+		}*/
 	}
 
 	// Not sure how openrouter defaults max tokens when no value is provided, but the anthropic api requires this value and since they offer both 4096 and 8192 variants, we should ensure 8192.
diff --git a/src/core/controller/index.ts b/src/core/controller/index.ts
@@ -1301,14 +1301,6 @@ Here is the project's README to help you get started:\n\n${mcpDetails.readmeCont
 							modelInfo.cacheWritesPrice = 0.14
 							modelInfo.cacheReadsPrice = 0.014
 							break
-						case "google/gemini-2.5-pro-preview-03-25":
-						case "google/gemini-2.0-flash-001":
-						case "google/gemini-flash-1.5":
-						case "google/gemini-pro-1.5":
-							modelInfo.supportsPromptCache = true
-							modelInfo.cacheWritesPrice = parsePrice(rawModel.pricing?.input_cache_write)
-							modelInfo.cacheReadsPrice = parsePrice(rawModel.pricing?.input_cache_read)
-							break
 						default:
 							if (rawModel.id.startsWith("openai/")) {
 								modelInfo.cacheReadsPrice = parsePrice(rawModel.pricing?.input_cache_read)
@@ -1317,6 +1309,12 @@ Here is the project's README to help you get started:\n\n${mcpDetails.readmeCont
 									modelInfo.cacheWritesPrice = parsePrice(rawModel.pricing?.input_cache_write)
 									// openrouter charges no cache write pricing for openAI models
 								}
+							} else if (rawModel.id.startsWith("google/")) {
+								modelInfo.cacheReadsPrice = parsePrice(rawModel.pricing?.input_cache_read)
+								if (modelInfo.cacheReadsPrice) {
+									modelInfo.supportsPromptCache = true
+									modelInfo.cacheWritesPrice = parsePrice(rawModel.pricing?.input_cache_write)
+								}
 							}
 							break
 					}

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"claude-dev": minor
 +---
++
 +updated OR/cline provider to automate gemini models caching