diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java index e94a1a220c5..fed960ed76a 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java @@ -22,31 +22,104 @@ * system → messages. * * @author Mark Pollack + * @author Soby Chacko * @since 1.1.0 */ public enum AnthropicCacheStrategy { /** - * No caching (default behavior). + * No caching (default behavior). All content is processed fresh on each request. + *

+ * Use this when: + *

*/ NONE, + /** + * Cache tool definitions only. Places a cache breakpoint on the last tool, while + * system messages and conversation history remain uncached and are processed fresh on + * each request. + *

+ * Use this when: + *

+ *

+ * Important: Changing any tool definition will invalidate this cache + * entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate + * any downstream cache breakpoints (system, messages) if used in combination with + * other strategies. + */ + TOOLS_ONLY, + /** * Cache system instructions only. Places a cache breakpoint on the system message - * content. + * content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback + * mechanism (content before the cache breakpoint is included in the cache). + *

+ * Use this when: + *

+ *

+ * Note: Changing tools will invalidate the cache since tools are + * part of the cache prefix (they appear before system in the request hierarchy). */ SYSTEM_ONLY, /** * Cache system instructions and tool definitions. Places cache breakpoints on the - * last tool and system message content. + * last tool (breakpoint 1) and system message content (breakpoint 2). + *

+ * Use this when: + *

+ *

+ * Behavior: + *

+ * This allows efficient reuse of tool cache when only system prompts are updated. */ SYSTEM_AND_TOOLS, /** * Cache the entire conversation history up to (but not including) the current user - * question. This is ideal for multi-turn conversations where you want to reuse the - * conversation context while asking new questions. + * question. Places a cache breakpoint on the last user message in the conversation + * history, enabling incremental caching as the conversation grows. + *

+ * Use this when: + *

+ *

+ * Behavior: Each turn builds on the previous cached prefix. The + * cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1 + * + Message2], etc. This provides significant cost savings (90%+) and performance + * improvements for long conversations. + *

+ * Important: Changing tools or system prompts will invalidate the + * entire conversation cache due to cascade invalidation. Tool and system stability is + * critical for this strategy. */ CONVERSATION_HISTORY diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java index a5443166a9b..cebd9988ea4 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java @@ -39,6 +39,7 @@ * definition messages. * * @author Austin Dase + * @author Soby Chacko * @since 1.1.0 **/ public class CacheEligibilityResolver { @@ -84,6 +85,7 @@ private static Set extractEligibleMessageTypes(AnthropicCacheStrate return switch (anthropicCacheStrategy) { case NONE -> Set.of(); case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM); + case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions case CONVERSATION_HISTORY -> Set.of(MessageType.values()); }; } @@ -108,11 +110,17 @@ public AnthropicApi.ChatCompletionRequest.CacheControl resolve(MessageType messa } public AnthropicApi.ChatCompletionRequest.CacheControl resolveToolCacheControl() { - // Tool definitions are only cache-eligible when caching is enabled and - // the strategy includes SYSTEM messages (SYSTEM_ONLY, SYSTEM_AND_TOOLS, or - // CONVERSATION_HISTORY). When NONE, tools must not be cached. - if (!isCachingEnabled() || !this.cacheEligibleMessageTypes.contains(TOOL_DEFINITION_MESSAGE_TYPE) - || this.cacheBreakpointTracker.allBreakpointsAreUsed()) { + // Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and + // CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages, + // relying on Anthropic's cache hierarchy to implicitly cache tools. + if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY + && this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS + && this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) { + logger.debug("Caching not enabled for tool definition, cacheStrategy={}", this.cacheStrategy); + return null; + } + + if (this.cacheBreakpointTracker.allBreakpointsAreUsed()) { logger.debug("Caching not enabled for tool definition, usedBreakpoints={}", this.cacheBreakpointTracker.getCount()); return null; diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java index a914a243085..aac3622e137 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java @@ -104,9 +104,17 @@ void testSystemOnlyCacheStrategy() throws Exception { this.mockWebServer .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + // Create tool callback to test that tools are NOT cached with SYSTEM_ONLY + var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class); + MethodToolCallback toolCallback = MethodToolCallback.builder() + .toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build()) + .toolMethod(toolMethod) + .build(); + // Test with SYSTEM_ONLY cache strategy AnthropicChatOptions options = AnthropicChatOptions.builder() .cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build()) + .toolCallbacks(List.of(toolCallback)) .build(); Prompt prompt = new Prompt( @@ -130,6 +138,18 @@ void testSystemOnlyCacheStrategy() throws Exception { assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral"); } + // Verify tools exist but DO NOT have cache_control (key difference from + // SYSTEM_AND_TOOLS) + if (requestBody.has("tools")) { + JsonNode toolsArray = requestBody.get("tools"); + assertThat(toolsArray.isArray()).isTrue(); + // Verify NO tool has cache_control + for (int i = 0; i < toolsArray.size(); i++) { + JsonNode tool = toolsArray.get(i); + assertThat(tool.has("cache_control")).isFalse(); + } + } + // Verify response assertThat(response).isNotNull(); assertThat(response.getResult().getOutput().getText()).contains("Hello!"); diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java index d056baddcec..0b594500dd3 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java @@ -30,6 +30,7 @@ * Tests for {@link CacheEligibilityResolver}. * * @author Austin Dase + * @author Soby Chacko */ class CacheEligibilityResolverTests { @@ -78,14 +79,215 @@ void toolCacheControlRespectsStrategy() { .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build()); assertThat(none.resolveToolCacheControl()).isNull(); - // SYSTEM_ONLY -> tool caching enabled (uses SYSTEM TTL) + // SYSTEM_ONLY -> no explicit tool caching (tools cached implicitly via hierarchy) CacheEligibilityResolver sys = CacheEligibilityResolver.from(AnthropicCacheOptions.builder() .strategy(AnthropicCacheStrategy.SYSTEM_ONLY) .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR) .build()); - var cc = sys.resolveToolCacheControl(); + assertThat(sys.resolveToolCacheControl()).isNull(); + + // TOOLS_ONLY -> tool caching enabled, system messages NOT cached + CacheEligibilityResolver toolsOnly = CacheEligibilityResolver.from(AnthropicCacheOptions.builder() + .strategy(AnthropicCacheStrategy.TOOLS_ONLY) + .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR) + .build()); + assertThat(toolsOnly.resolveToolCacheControl()).isNotNull(); + assertThat(toolsOnly.resolve(MessageType.SYSTEM, "Large system prompt text")).isNull(); + + // SYSTEM_AND_TOOLS -> tool caching enabled (uses SYSTEM TTL) + CacheEligibilityResolver sysAndTools = CacheEligibilityResolver.from(AnthropicCacheOptions.builder() + .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR) + .build()); + var cc = sysAndTools.resolveToolCacheControl(); assertThat(cc).isNotNull(); assertThat(cc.ttl()).isEqualTo(AnthropicCacheTtl.ONE_HOUR.getValue()); + + // CONVERSATION_HISTORY -> tool caching enabled + CacheEligibilityResolver history = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build()); + assertThat(history.resolveToolCacheControl()).isNotNull(); + } + + @Test + void toolsOnlyStrategyBehavior() { + AnthropicCacheOptions options = AnthropicCacheOptions.builder() + .strategy(AnthropicCacheStrategy.TOOLS_ONLY) + .messageTypeMinContentLength(MessageType.SYSTEM, 100) + .build(); + CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options); + + // Caching is enabled + assertThat(resolver.isCachingEnabled()).isTrue(); + + // System messages should NOT be cached + assertThat(resolver.resolve(MessageType.SYSTEM, "Large system prompt with plenty of content")) + .as("System messages should not be cached with TOOLS_ONLY strategy") + .isNull(); + + // User messages should NOT be cached + assertThat(resolver.resolve(MessageType.USER, "User message content")).isNull(); + + // Assistant messages should NOT be cached + assertThat(resolver.resolve(MessageType.ASSISTANT, "Assistant message content")).isNull(); + + // Tool messages should NOT be cached + assertThat(resolver.resolve(MessageType.TOOL, "Tool result content")).isNull(); + + // Tool definitions SHOULD be cached + AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl(); + assertThat(toolCache).as("Tool definitions should be cached with TOOLS_ONLY strategy").isNotNull(); + assertThat(toolCache.type()).isEqualTo("ephemeral"); + } + + @Test + void breakpointCountForEachStrategy() { + // NONE: 0 breakpoints + CacheEligibilityResolver none = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build()); + assertThat(none.resolveToolCacheControl()).isNull(); + assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull(); + + // SYSTEM_ONLY: 1 breakpoint (system only, tools implicit) + CacheEligibilityResolver systemOnly = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build()); + assertThat(systemOnly.resolveToolCacheControl()).as("SYSTEM_ONLY should not explicitly cache tools").isNull(); + assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull(); + + // TOOLS_ONLY: 1 breakpoint (tools only) + CacheEligibilityResolver toolsOnly = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build()); + assertThat(toolsOnly.resolveToolCacheControl()).as("TOOLS_ONLY should cache tools").isNotNull(); + assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).as("TOOLS_ONLY should not cache system").isNull(); + + // SYSTEM_AND_TOOLS: 2 breakpoints (tools + system) + CacheEligibilityResolver systemAndTools = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build()); + assertThat(systemAndTools.resolveToolCacheControl()).as("SYSTEM_AND_TOOLS should cache tools").isNotNull(); + assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).as("SYSTEM_AND_TOOLS should cache system") + .isNotNull(); + } + + @Test + void messageTypeEligibilityPerStrategy() { + // NONE: No message types eligible + CacheEligibilityResolver none = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build()); + assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull(); + assertThat(none.resolve(MessageType.USER, "content")).isNull(); + assertThat(none.resolve(MessageType.ASSISTANT, "content")).isNull(); + assertThat(none.resolve(MessageType.TOOL, "content")).isNull(); + + // SYSTEM_ONLY: Only SYSTEM eligible + CacheEligibilityResolver systemOnly = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build()); + assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull(); + assertThat(systemOnly.resolve(MessageType.USER, "content")).isNull(); + assertThat(systemOnly.resolve(MessageType.ASSISTANT, "content")).isNull(); + assertThat(systemOnly.resolve(MessageType.TOOL, "content")).isNull(); + + // TOOLS_ONLY: No message types eligible (only tool definitions) + CacheEligibilityResolver toolsOnly = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build()); + assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).isNull(); + assertThat(toolsOnly.resolve(MessageType.USER, "content")).isNull(); + assertThat(toolsOnly.resolve(MessageType.ASSISTANT, "content")).isNull(); + assertThat(toolsOnly.resolve(MessageType.TOOL, "content")).isNull(); + + // SYSTEM_AND_TOOLS: Only SYSTEM eligible + CacheEligibilityResolver systemAndTools = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build()); + assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).isNotNull(); + assertThat(systemAndTools.resolve(MessageType.USER, "content")).isNull(); + assertThat(systemAndTools.resolve(MessageType.ASSISTANT, "content")).isNull(); + assertThat(systemAndTools.resolve(MessageType.TOOL, "content")).isNull(); + + // CONVERSATION_HISTORY: All message types eligible + CacheEligibilityResolver history = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build()); + assertThat(history.resolve(MessageType.SYSTEM, "content")).isNotNull(); + assertThat(history.resolve(MessageType.USER, "content")).isNotNull(); + assertThat(history.resolve(MessageType.ASSISTANT, "content")).isNotNull(); + assertThat(history.resolve(MessageType.TOOL, "content")).isNotNull(); + } + + @Test + void toolsOnlyIsolationFromSystemChanges() { + // Validates that TOOLS_ONLY resolver behavior is consistent + // regardless of system message content (simulating different system prompts) + CacheEligibilityResolver resolver = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build()); + + // Different system prompts should all be ineligible for caching + assertThat(resolver.resolve(MessageType.SYSTEM, "You are a helpful assistant")) + .as("System prompt 1 should not be cached") + .isNull(); + assertThat(resolver.resolve(MessageType.SYSTEM, "You are a STRICT validator")) + .as("System prompt 2 should not be cached") + .isNull(); + assertThat(resolver.resolve(MessageType.SYSTEM, "You are a creative writer")) + .as("System prompt 3 should not be cached") + .isNull(); + + // Tool cache eligibility should remain consistent + assertThat(resolver.resolveToolCacheControl()).as("Tools should always be cacheable").isNotNull(); + } + + @Test + void systemAndToolsIndependentBreakpoints() { + // Validates that SYSTEM_AND_TOOLS creates two independent eligibility checks + CacheEligibilityResolver resolver = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build()); + + // Both tools and system should be independently eligible + AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl(); + AnthropicApi.ChatCompletionRequest.CacheControl systemCache = resolver.resolve(MessageType.SYSTEM, "content"); + + assertThat(toolCache).as("Tools should be cacheable").isNotNull(); + assertThat(systemCache).as("System should be cacheable").isNotNull(); + + // They should use the same TTL (both use SYSTEM message type TTL) + assertThat(toolCache.ttl()).isEqualTo(systemCache.ttl()); + } + + @Test + void breakpointLimitEnforced() { + AnthropicCacheOptions options = AnthropicCacheOptions.builder() + .strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY) + .build(); + CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options); + + // Use up breakpoints by resolving multiple times + resolver.resolve(MessageType.SYSTEM, "content"); // Uses breakpoint 1 + resolver.useCacheBlock(); + resolver.resolve(MessageType.USER, "content"); // Uses breakpoint 2 + resolver.useCacheBlock(); + resolver.resolve(MessageType.ASSISTANT, "content"); // Uses breakpoint 3 + resolver.useCacheBlock(); + resolver.resolve(MessageType.TOOL, "content"); // Uses breakpoint 4 + resolver.useCacheBlock(); + + // 5th attempt should return null (all 4 breakpoints used) + assertThat(resolver.resolve(MessageType.USER, "more content")) + .as("Should return null when all 4 breakpoints are used") + .isNull(); + } + + @Test + void emptyAndNullContentHandling() { + CacheEligibilityResolver resolver = CacheEligibilityResolver + .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build()); + + // Empty string should not be cached + assertThat(resolver.resolve(MessageType.SYSTEM, "")).as("Empty string should not be cached").isNull(); + + // Null should not be cached + assertThat(resolver.resolve(MessageType.SYSTEM, null)).as("Null content should not be cached").isNull(); + + // Whitespace-only should be cached if it meets length requirement + assertThat(resolver.resolve(MessageType.SYSTEM, " ")) + .as("Whitespace-only content meeting length requirements should be cacheable") + .isNotNull(); } } diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc index 06bb3eb32ef..d506315d8fe 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc @@ -212,14 +212,43 @@ Different models have different minimum token thresholds for cache effectiveness === Cache Strategies -Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum: +Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum. +Each strategy automatically places cache breakpoints at optimal locations while staying within Anthropic's 4-breakpoint limit. -* `NONE`: Disables prompt caching completely -* `SYSTEM_ONLY`: Caches only the system message content -* `SYSTEM_AND_TOOLS`: Caches system message and the last tool definition -* `CONVERSATION_HISTORY`: Caches conversation history in chat memory scenarios +[cols="2,3,5", stripes=even] +|==== +| Strategy | Breakpoints Used | Use Case + +| `NONE` +| 0 +| Disables prompt caching completely. +Use when requests are one-off or content is too small to benefit from caching. + +| `SYSTEM_ONLY` +| 1 +| Caches system message content. +Tools are cached implicitly via Anthropic's automatic ~20-block lookback mechanism. +Use when system prompts are large and stable with fewer than 20 tools. + +| `TOOLS_ONLY` +| 1 +| Caches tool definitions only. System messages remain uncached and are processed fresh on each request. +Use when tool definitions are large and stable (5000+ tokens) but system prompts change frequently or vary per tenant/context. + +| `SYSTEM_AND_TOOLS` +| 2 +| Caches both tool definitions (breakpoint 1) and system message (breakpoint 2) explicitly. +Use when you have 20+ tools (beyond automatic lookback) or want deterministic caching of both components. +System changes don't invalidate tool cache. + +| `CONVERSATION_HISTORY` +| 1-4 +| Caches entire conversation history up to the current user question. +Use for multi-turn conversations with chat memory where conversation history grows over time. +|==== -This strategic approach ensures optimal cache breakpoint placement while staying within Anthropic's 4-breakpoint limit. +IMPORTANT: Due to Anthropic's cascade invalidation, changing tool definitions will invalidate ALL downstream cache breakpoints (system, messages). +Tool stability is critical when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies. === Enabling Prompt Caching @@ -227,9 +256,11 @@ Enable prompt caching by setting `cacheOptions` on `AnthropicChatOptions` and ch ==== System-Only Caching +Best for: Stable system prompts with <20 tools (tools cached implicitly via automatic lookback). + [source,java] ---- -// Cache system message content +// Cache system message content (tools cached implicitly) ChatResponse response = chatModel.call( new Prompt( List.of( @@ -247,11 +278,39 @@ ChatResponse response = chatModel.call( ); ---- +==== Tools-Only Caching + +Best for: Large stable tool sets with dynamic system prompts (multi-tenant apps, A/B testing). + +[source,java] +---- +// Cache tool definitions, system prompt processed fresh each time +ChatResponse response = chatModel.call( + new Prompt( + List.of( + new SystemMessage("You are a " + persona + " assistant..."), // Dynamic per-tenant + new UserMessage("What's the weather like in San Francisco?") + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheOptions(AnthropicCacheOptions.builder() + .strategy(AnthropicCacheStrategy.TOOLS_ONLY) + .build()) + .toolCallbacks(weatherToolCallback) // Large tool set cached + .maxTokens(500) + .build() + ) +); +---- + ==== System and Tools Caching +Best for: 20+ tools (beyond automatic lookback) or when both components should be cached independently. + [source,java] ---- -// Cache system message and the last tool definition +// Cache both tool definitions and system message with independent breakpoints +// Changing system won't invalidate tool cache (but changing tools invalidates both) ChatResponse response = chatModel.call( new Prompt( List.of( @@ -259,11 +318,11 @@ ChatResponse response = chatModel.call( new UserMessage("What's the weather like in San Francisco?") ), AnthropicChatOptions.builder() - .model("claude-sonnet-4") + .model("claude-sonnet-4") .cacheOptions(AnthropicCacheOptions.builder() .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) .build()) - .toolCallbacks(weatherToolCallback) + .toolCallbacks(weatherToolCallback) // 20+ tools .maxTokens(500) .build() ) @@ -317,7 +376,9 @@ String response = ChatClient.create(chatModel) ==== Per-Message TTL (5m or 1h) -By default, cached content uses a 5-minute TTL. You can set a 1-hour TTL for specific message types. When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header. +By default, cached content uses a 5-minute TTL. +You can set a 1-hour TTL for specific message types. +When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header. [source,java] ---- @@ -544,6 +605,62 @@ for (String filename : codeFiles) { // Guidelines cached after first request, subsequent reviews are faster and cheaper ---- +==== Multi-Tenant SaaS with Shared Tools + +Build a multi-tenant application where tools are shared but system prompts are customized per tenant: + +[source,java] +---- +// Define large shared tool set (used by all tenants) +List sharedTools = Arrays.asList( + weatherToolCallback, // ~500 tokens + calendarToolCallback, // ~800 tokens + emailToolCallback, // ~700 tokens + analyticsToolCallback, // ~600 tokens + reportingToolCallback, // ~900 tokens + // ... 20+ more tools, totaling 5000+ tokens +); + +@Service +public class MultiTenantAIService { + + public String handleTenantRequest(String tenantId, String userQuery) { + // Get tenant-specific configuration + TenantConfig config = tenantRepository.findById(tenantId); + + // Dynamic system prompt per tenant + String tenantSystemPrompt = String.format(""" + You are %s's AI assistant. Company values: %s. + Brand voice: %s. Compliance requirements: %s. + """, config.companyName(), config.values(), + config.brandVoice(), config.compliance()); + + ChatResponse response = chatModel.call( + new Prompt( + List.of( + new SystemMessage(tenantSystemPrompt), // Different per tenant, NOT cached + new UserMessage(userQuery) + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheOptions(AnthropicCacheOptions.builder() + .strategy(AnthropicCacheStrategy.TOOLS_ONLY) // Cache tools only + .build()) + .toolCallbacks(sharedTools) // Cached once, shared across all tenants + .maxTokens(800) + .build() + ) + ); + + return response.getResult().getOutput().getText(); + } +} + +// Tools cached once (5000 tokens @ 10% = 500 token cost for cache hits) +// Each tenant's unique system prompt processed fresh (200-500 tokens @ 100%) +// Total per request: ~700-1000 tokens vs 5500+ without TOOLS_ONLY +---- + ==== Customer Support with Knowledge Base Create a customer support system that caches your product knowledge base for consistent, accurate responses: @@ -562,7 +679,7 @@ String knowledgeBase = """ @Service public class CustomerSupportService { - + public String handleCustomerQuery(String customerQuery, String customerId) { ChatResponse response = chatModel.call( new Prompt( @@ -580,7 +697,7 @@ public class CustomerSupportService { .build() ) ); - + return response.getResult().getOutput().getText(); } } @@ -591,18 +708,27 @@ public class CustomerSupportService { === Best Practices -1. **Choose the Right Strategy**: - - Use `SYSTEM_ONLY` for reusable system prompts and instructions - - Use `SYSTEM_AND_TOOLS` when you have both system content and tool definitions to cache (the last tool definition is cached) +1. **Choose the Right Strategy**: + - Use `SYSTEM_ONLY` for stable system prompts with <20 tools (tools cached implicitly via automatic lookback) + - Use `TOOLS_ONLY` for large stable tool sets (5000+ tokens) with dynamic system prompts (multi-tenant, A/B testing) + - Use `SYSTEM_AND_TOOLS` when you have 20+ tools (beyond automatic lookback) or want both cached independently - Use `CONVERSATION_HISTORY` with ChatClient memory for multi-turn conversations - Use `NONE` to explicitly disable caching -2. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models). +2. **Understand Cascade Invalidation**: Anthropic's cache hierarchy (`tools → system → messages`) means changes flow downward: + - Changing **tools** invalidates: tools + system + messages (all caches) ❌❌❌ + - Changing **system** invalidates: system + messages (tools cache remains valid) ✅❌❌ + - Changing **messages** invalidates: messages only (tools and system caches remain valid) ✅✅❌ + + **Tool stability is critical** when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies. + +3. **SYSTEM_AND_TOOLS Independence**: With `SYSTEM_AND_TOOLS`, changing the system message does NOT invalidate the tool cache, allowing efficient reuse of cached tools even when system prompts vary. + +4. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models). -3. **Reuse Identical Content**: Caching works best with exact matches of prompt content. -Even small changes will require a new cache entry. +5. **Reuse Identical Content**: Caching works best with exact matches of prompt content. Even small changes will require a new cache entry. -4. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness: +6. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness: ```java AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage(); if (usage != null) { @@ -611,11 +737,11 @@ Even small changes will require a new cache entry. } ``` -5. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit. +7. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit. -6. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer. +8. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer. -7. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response. +9. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response. === Implementation Details