Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions intercept_anthropic_messages_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() {
return
}

// Any existing tool definitions.
for _, tool := range i.req.Tools {
if tool.OfTool == nil {
continue
}

// Explicitly unset all cache control settings, we'll set one at the end.
tool.OfTool.CacheControl = anthropic.CacheControlEphemeralParam{}
}

// Inject tools.
for _, tool := range i.mcpProxy.ListTools() {
i.req.Tools = append(i.req.Tools, anthropic.ToolUnionParam{
Expand All @@ -55,10 +65,18 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() {
Name: tool.ID,
Description: anthropic.String(tool.Description),
Type: anthropic.ToolTypeCustom,
// Explicitly unset all cache control settings, we'll set one at the end.
CacheControl: anthropic.CacheControlEphemeralParam{},
},
})
}

// See https://docs.claude.com/en/docs/build-with-claude/prompt-caching.
// "The cache_control parameter on the last tool definition caches all tool definitions."
if count := len(i.req.Tools); count > 0 {
i.req.Tools[count-1].OfTool.CacheControl = anthropic.NewCacheControlEphemeralParam()
}

// Note: Parallel tool calls are disabled to avoid tool_use/tool_result block mismatches.
i.req.ToolChoice = anthropic.ToolChoiceUnionParam{
OfAny: &anthropic.ToolChoiceAnyParam{
Expand All @@ -68,6 +86,28 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() {
}
}

// removeUnnecessaryCacheMarkers removes any cache control settings which are unnecessarily set by the client
// and/or may interfere with the cache control we need to implement ourselves.
func (i *AnthropicMessagesInterceptionBase) removeUnnecessaryCacheMarkers() {
if i.req == nil {
return
}

// Explicitly unset any cache control markers on "assistant" messages; these should never be set
// since it's more beneficial for us to cache tool definitions, and Anthropic only allows for 4
// cache markers...
// https://docs.claude.com/en/docs/build-with-claude/prompt-caching#when-to-use-multiple-breakpoints
for _, msg := range i.req.Messages {
if msg.Role == anthropic.MessageParamRoleAssistant {
for _, c := range msg.Content {
if c.OfText != nil {
c.OfText.CacheControl = anthropic.CacheControlEphemeralParam{}
}
}
}
}
}

// isSmallFastModel checks if the model is a small/fast model (Haiku 3.5).
// These models are optimized for tasks like code autocomplete and other small, quick operations.
// See `ANTHROPIC_SMALL_FAST_MODEL`: https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables
Expand Down
1 change: 1 addition & 0 deletions intercept_anthropic_messages_blocking.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func (i *AnthropicMessagesBlockingInterception) ProcessRequest(w http.ResponseWr

ctx := r.Context()

i.removeUnnecessaryCacheMarkers()
i.injectTools()

var (
Expand Down
1 change: 1 addition & 0 deletions intercept_anthropic_messages_streaming.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func (i *AnthropicMessagesStreamingInterception) ProcessRequest(w http.ResponseW
logger.Warn(ctx, "failed to determine last user prompt", slog.Error(err))
}

i.removeUnnecessaryCacheMarkers()
// Only inject tools into "actual" request.
i.injectTools()
}
Expand Down