diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts index 2c7bd1e575..bc85dfd499 100644 --- a/src/api/providers/unbound.ts +++ b/src/api/providers/unbound.ts @@ -9,6 +9,7 @@ import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transform/caching/anthropic" import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" +import { addCacheBreakpoints as addVertexCacheBreakpoints } from "../transform/caching/vertex" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" @@ -70,6 +71,10 @@ export class UnboundHandler extends RouterProvider implements SingleCompletionHa addAnthropicCacheBreakpoints(systemPrompt, openAiMessages) } } + // Custom models from Vertex AI (no configuration) need to be handled differently. + if (modelId.startsWith("vertex-ai/google.") || modelId.startsWith("vertex-ai/anthropic.")) { + addVertexCacheBreakpoints(messages) + } // Required by Anthropic; other providers default to max tokens allowed. let maxTokens: number | undefined