Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/shaggy-turtles-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Disable Gemini prompt caching
46 changes: 23 additions & 23 deletions src/api/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,24 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
let cacheWriteTokens: number | undefined = undefined

// https://ai.google.dev/gemini-api/docs/caching?lang=node
if (info.supportsPromptCache && cacheKey) {
const cacheEntry = this.contentCaches.get(cacheKey)
// if (info.supportsPromptCache && cacheKey) {
// const cacheEntry = this.contentCaches.get(cacheKey)

if (cacheEntry) {
uncachedContent = contents.slice(cacheEntry.count, contents.length)
cachedContent = cacheEntry.key
}
// if (cacheEntry) {
// uncachedContent = contents.slice(cacheEntry.count, contents.length)
// cachedContent = cacheEntry.key
// }

const newCacheEntry = await this.client.caches.create({
model,
config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
})
// const newCacheEntry = await this.client.caches.create({
// model,
// config: { contents, systemInstruction, ttl: `${CACHE_TTL * 60}s` },
// })

if (newCacheEntry.name) {
this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
}
}
// if (newCacheEntry.name) {
// this.contentCaches.set(cacheKey, { key: newCacheEntry.name, count: contents.length })
// cacheWriteTokens = newCacheEntry.usageMetadata?.totalTokenCount ?? 0
// }
// }

const params: GenerateContentParameters = {
model,
Expand Down Expand Up @@ -94,13 +94,13 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
const cacheReadTokens = lastUsageMetadata.cachedContentTokenCount
const reasoningTokens = lastUsageMetadata.thoughtsTokenCount

const totalCost = this.calculateCost({
info,
inputTokens,
outputTokens,
cacheWriteTokens,
cacheReadTokens,
})
// const totalCost = this.calculateCost({
// info,
// inputTokens,
// outputTokens,
// cacheWriteTokens,
// cacheReadTokens,
// })

yield {
type: "usage",
Expand All @@ -109,7 +109,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
cacheWriteTokens,
cacheReadTokens,
reasoningTokens,
totalCost,
// totalCost,
}
}
}
Expand Down
31 changes: 20 additions & 11 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

// Prompt caching: https://openrouter.ai/docs/prompt-caching
// Now with Gemini support: https://openrouter.ai/docs/features/prompt-caching
if (supportsPromptCache) {
// Note that we don't check the `ModelInfo` object because it is cached
// in the settings for OpenRouter.
if (this.isPromptCacheSupported(modelId)) {
openAiMessages[0] = {
role: "system",
content: [
{
type: "text",
text: systemPrompt,
// @ts-ignore-next-line
cache_control: { type: "ephemeral" },
},
],
// @ts-ignore-next-line
content: [{ type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }],
}

// Add cache_control to the last two user messages
Expand All @@ -108,13 +104,17 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
}

if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
// NOTE: This is fine since env details will always be added
// at the end. But if it wasn't there, and the user added a
// image_url type message, it would pop a text part before
// it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()

if (!lastTextPart) {
lastTextPart = { type: "text", text: "..." }
msg.content.push(lastTextPart)
}

// @ts-ignore-next-line
lastTextPart["cache_control"] = { type: "ephemeral" }
}
Expand Down Expand Up @@ -227,6 +227,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
const completion = response as OpenAI.Chat.ChatCompletion
return completion.choices[0]?.message?.content || ""
}

private isPromptCacheSupported(modelId: string) {
return (
modelId.startsWith("anthropic/claude-3.7-sonnet") ||
modelId.startsWith("anthropic/claude-3.5-sonnet") ||
modelId.startsWith("anthropic/claude-3-opus") ||
modelId.startsWith("anthropic/claude-3-haiku")
)
}
}

export async function getOpenRouterModels(options?: ApiHandlerOptions) {
Expand All @@ -250,7 +259,7 @@ export async function getOpenRouterModels(options?: ApiHandlerOptions) {
thinking: rawModel.id === "anthropic/claude-3.7-sonnet:thinking",
}

// NOTE: this needs to be synced with api.ts/openrouter default model info.
// NOTE: This needs to be synced with api.ts/openrouter default model info.
switch (true) {
case rawModel.id.startsWith("anthropic/claude-3.7-sonnet"):
modelInfo.supportsComputerUse = true
Expand Down