diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
index e94a1a220c5..fed960ed76a 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
@@ -22,31 +22,104 @@
* system → messages.
*
* @author Mark Pollack
+ * @author Soby Chacko
* @since 1.1.0
*/
public enum AnthropicCacheStrategy {
/**
- * No caching (default behavior).
+ * No caching (default behavior). All content is processed fresh on each request.
+ *
+ * Use this when:
+ *
+ * - Requests are one-off or highly variable
+ * - Content doesn't meet minimum token requirements (1024+ tokens)
+ * - You want to avoid caching overhead
+ *
*/
NONE,
+ /**
+ * Cache tool definitions only. Places a cache breakpoint on the last tool, while
+ * system messages and conversation history remain uncached and are processed fresh on
+ * each request.
+ *
+ * Use this when:
+ *
+ * - Tool definitions are large and stable (5000+ tokens)
+ * - System prompts change frequently or are small (<500 tokens)
+ * - You want to share cached tools across different system contexts (e.g.,
+ * multi-tenant applications, A/B testing system prompts)
+ * - Tool definitions rarely change
+ *
+ *
+ * Important: Changing any tool definition will invalidate this cache
+ * entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate
+ * any downstream cache breakpoints (system, messages) if used in combination with
+ * other strategies.
+ */
+ TOOLS_ONLY,
+
/**
* Cache system instructions only. Places a cache breakpoint on the system message
- * content.
+ * content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback
+ * mechanism (content before the cache breakpoint is included in the cache).
+ *
+ * Use this when:
+ *
+ * - System prompts are large and stable (1024+ tokens)
+ * - Tool definitions are relatively small (<20 tools)
+ * - You want simple, single-breakpoint caching
+ *
+ *
+ * Note: Changing tools will invalidate the cache since tools are
+ * part of the cache prefix (they appear before system in the request hierarchy).
*/
SYSTEM_ONLY,
/**
* Cache system instructions and tool definitions. Places cache breakpoints on the
- * last tool and system message content.
+ * last tool (breakpoint 1) and system message content (breakpoint 2).
+ *
+ * Use this when:
+ *
+ * - Both tools and system prompts are large and stable
+ * - You have many tools (20+ tools, beyond the automatic lookback window)
+ * - You want deterministic, explicit caching of both components
+ * - System prompts may change independently of tools
+ *
+ *
+ * Behavior:
+ *
+ * - If only tools change: Both caches invalidated (tools + system)
+ * - If only system changes: Tools cache remains valid, system cache
+ * invalidated
+ *
+ * This allows efficient reuse of tool cache when only system prompts are updated.
*/
SYSTEM_AND_TOOLS,
/**
* Cache the entire conversation history up to (but not including) the current user
- * question. This is ideal for multi-turn conversations where you want to reuse the
- * conversation context while asking new questions.
+ * question. Places a cache breakpoint on the last user message in the conversation
+ * history, enabling incremental caching as the conversation grows.
+ *
+ * Use this when:
+ *
+ * - Building multi-turn conversational applications (chatbots, assistants)
+ * - Conversation history is large and grows over time
+ * - You want to reuse conversation context while asking new questions
+ * - Using chat memory advisors or conversation persistence
+ *
+ *
+ * Behavior: Each turn builds on the previous cached prefix. The
+ * cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1
+ * + Message2], etc. This provides significant cost savings (90%+) and performance
+ * improvements for long conversations.
+ *
+ * Important: Changing tools or system prompts will invalidate the
+ * entire conversation cache due to cascade invalidation. Tool and system stability is
+ * critical for this strategy.
*/
CONVERSATION_HISTORY
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
index a5443166a9b..cebd9988ea4 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
@@ -39,6 +39,7 @@
* definition messages.
*
* @author Austin Dase
+ * @author Soby Chacko
* @since 1.1.0
**/
public class CacheEligibilityResolver {
@@ -84,6 +85,7 @@ private static Set extractEligibleMessageTypes(AnthropicCacheStrate
return switch (anthropicCacheStrategy) {
case NONE -> Set.of();
case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM);
+ case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions
case CONVERSATION_HISTORY -> Set.of(MessageType.values());
};
}
@@ -108,11 +110,17 @@ public AnthropicApi.ChatCompletionRequest.CacheControl resolve(MessageType messa
}
public AnthropicApi.ChatCompletionRequest.CacheControl resolveToolCacheControl() {
- // Tool definitions are only cache-eligible when caching is enabled and
- // the strategy includes SYSTEM messages (SYSTEM_ONLY, SYSTEM_AND_TOOLS, or
- // CONVERSATION_HISTORY). When NONE, tools must not be cached.
- if (!isCachingEnabled() || !this.cacheEligibleMessageTypes.contains(TOOL_DEFINITION_MESSAGE_TYPE)
- || this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
+ // Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and
+ // CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages,
+ // relying on Anthropic's cache hierarchy to implicitly cache tools.
+ if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY
+ && this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS
+ && this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) {
+ logger.debug("Caching not enabled for tool definition, cacheStrategy={}", this.cacheStrategy);
+ return null;
+ }
+
+ if (this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
logger.debug("Caching not enabled for tool definition, usedBreakpoints={}",
this.cacheBreakpointTracker.getCount());
return null;
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
index a914a243085..aac3622e137 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
@@ -104,9 +104,17 @@ void testSystemOnlyCacheStrategy() throws Exception {
this.mockWebServer
.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
+ // Create tool callback to test that tools are NOT cached with SYSTEM_ONLY
+ var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
+ MethodToolCallback toolCallback = MethodToolCallback.builder()
+ .toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
+ .toolMethod(toolMethod)
+ .build();
+
// Test with SYSTEM_ONLY cache strategy
AnthropicChatOptions options = AnthropicChatOptions.builder()
.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
+ .toolCallbacks(List.of(toolCallback))
.build();
Prompt prompt = new Prompt(
@@ -130,6 +138,18 @@ void testSystemOnlyCacheStrategy() throws Exception {
assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
}
+ // Verify tools exist but DO NOT have cache_control (key difference from
+ // SYSTEM_AND_TOOLS)
+ if (requestBody.has("tools")) {
+ JsonNode toolsArray = requestBody.get("tools");
+ assertThat(toolsArray.isArray()).isTrue();
+ // Verify NO tool has cache_control
+ for (int i = 0; i < toolsArray.size(); i++) {
+ JsonNode tool = toolsArray.get(i);
+ assertThat(tool.has("cache_control")).isFalse();
+ }
+ }
+
// Verify response
assertThat(response).isNotNull();
assertThat(response.getResult().getOutput().getText()).contains("Hello!");
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
index d056baddcec..0b594500dd3 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
@@ -30,6 +30,7 @@
* Tests for {@link CacheEligibilityResolver}.
*
* @author Austin Dase
+ * @author Soby Chacko
*/
class CacheEligibilityResolverTests {
@@ -78,14 +79,215 @@ void toolCacheControlRespectsStrategy() {
.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
assertThat(none.resolveToolCacheControl()).isNull();
- // SYSTEM_ONLY -> tool caching enabled (uses SYSTEM TTL)
+ // SYSTEM_ONLY -> no explicit tool caching (tools cached implicitly via hierarchy)
CacheEligibilityResolver sys = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
.build());
- var cc = sys.resolveToolCacheControl();
+ assertThat(sys.resolveToolCacheControl()).isNull();
+
+ // TOOLS_ONLY -> tool caching enabled, system messages NOT cached
+ CacheEligibilityResolver toolsOnly = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
+ .strategy(AnthropicCacheStrategy.TOOLS_ONLY)
+ .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+ .build());
+ assertThat(toolsOnly.resolveToolCacheControl()).isNotNull();
+ assertThat(toolsOnly.resolve(MessageType.SYSTEM, "Large system prompt text")).isNull();
+
+ // SYSTEM_AND_TOOLS -> tool caching enabled (uses SYSTEM TTL)
+ CacheEligibilityResolver sysAndTools = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
+ .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
+ .messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+ .build());
+ var cc = sysAndTools.resolveToolCacheControl();
assertThat(cc).isNotNull();
assertThat(cc.ttl()).isEqualTo(AnthropicCacheTtl.ONE_HOUR.getValue());
+
+ // CONVERSATION_HISTORY -> tool caching enabled
+ CacheEligibilityResolver history = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
+ assertThat(history.resolveToolCacheControl()).isNotNull();
+ }
+
+ @Test
+ void toolsOnlyStrategyBehavior() {
+ AnthropicCacheOptions options = AnthropicCacheOptions.builder()
+ .strategy(AnthropicCacheStrategy.TOOLS_ONLY)
+ .messageTypeMinContentLength(MessageType.SYSTEM, 100)
+ .build();
+ CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
+
+ // Caching is enabled
+ assertThat(resolver.isCachingEnabled()).isTrue();
+
+ // System messages should NOT be cached
+ assertThat(resolver.resolve(MessageType.SYSTEM, "Large system prompt with plenty of content"))
+ .as("System messages should not be cached with TOOLS_ONLY strategy")
+ .isNull();
+
+ // User messages should NOT be cached
+ assertThat(resolver.resolve(MessageType.USER, "User message content")).isNull();
+
+ // Assistant messages should NOT be cached
+ assertThat(resolver.resolve(MessageType.ASSISTANT, "Assistant message content")).isNull();
+
+ // Tool messages should NOT be cached
+ assertThat(resolver.resolve(MessageType.TOOL, "Tool result content")).isNull();
+
+ // Tool definitions SHOULD be cached
+ AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl();
+ assertThat(toolCache).as("Tool definitions should be cached with TOOLS_ONLY strategy").isNotNull();
+ assertThat(toolCache.type()).isEqualTo("ephemeral");
+ }
+
+ @Test
+ void breakpointCountForEachStrategy() {
+ // NONE: 0 breakpoints
+ CacheEligibilityResolver none = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
+ assertThat(none.resolveToolCacheControl()).isNull();
+ assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull();
+
+ // SYSTEM_ONLY: 1 breakpoint (system only, tools implicit)
+ CacheEligibilityResolver systemOnly = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build());
+ assertThat(systemOnly.resolveToolCacheControl()).as("SYSTEM_ONLY should not explicitly cache tools").isNull();
+ assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull();
+
+ // TOOLS_ONLY: 1 breakpoint (tools only)
+ CacheEligibilityResolver toolsOnly = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
+ assertThat(toolsOnly.resolveToolCacheControl()).as("TOOLS_ONLY should cache tools").isNotNull();
+ assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).as("TOOLS_ONLY should not cache system").isNull();
+
+ // SYSTEM_AND_TOOLS: 2 breakpoints (tools + system)
+ CacheEligibilityResolver systemAndTools = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
+ assertThat(systemAndTools.resolveToolCacheControl()).as("SYSTEM_AND_TOOLS should cache tools").isNotNull();
+ assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).as("SYSTEM_AND_TOOLS should cache system")
+ .isNotNull();
+ }
+
+ @Test
+ void messageTypeEligibilityPerStrategy() {
+ // NONE: No message types eligible
+ CacheEligibilityResolver none = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
+ assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull();
+ assertThat(none.resolve(MessageType.USER, "content")).isNull();
+ assertThat(none.resolve(MessageType.ASSISTANT, "content")).isNull();
+ assertThat(none.resolve(MessageType.TOOL, "content")).isNull();
+
+ // SYSTEM_ONLY: Only SYSTEM eligible
+ CacheEligibilityResolver systemOnly = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build());
+ assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull();
+ assertThat(systemOnly.resolve(MessageType.USER, "content")).isNull();
+ assertThat(systemOnly.resolve(MessageType.ASSISTANT, "content")).isNull();
+ assertThat(systemOnly.resolve(MessageType.TOOL, "content")).isNull();
+
+ // TOOLS_ONLY: No message types eligible (only tool definitions)
+ CacheEligibilityResolver toolsOnly = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
+ assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).isNull();
+ assertThat(toolsOnly.resolve(MessageType.USER, "content")).isNull();
+ assertThat(toolsOnly.resolve(MessageType.ASSISTANT, "content")).isNull();
+ assertThat(toolsOnly.resolve(MessageType.TOOL, "content")).isNull();
+
+ // SYSTEM_AND_TOOLS: Only SYSTEM eligible
+ CacheEligibilityResolver systemAndTools = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
+ assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).isNotNull();
+ assertThat(systemAndTools.resolve(MessageType.USER, "content")).isNull();
+ assertThat(systemAndTools.resolve(MessageType.ASSISTANT, "content")).isNull();
+ assertThat(systemAndTools.resolve(MessageType.TOOL, "content")).isNull();
+
+ // CONVERSATION_HISTORY: All message types eligible
+ CacheEligibilityResolver history = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
+ assertThat(history.resolve(MessageType.SYSTEM, "content")).isNotNull();
+ assertThat(history.resolve(MessageType.USER, "content")).isNotNull();
+ assertThat(history.resolve(MessageType.ASSISTANT, "content")).isNotNull();
+ assertThat(history.resolve(MessageType.TOOL, "content")).isNotNull();
+ }
+
+ @Test
+ void toolsOnlyIsolationFromSystemChanges() {
+ // Validates that TOOLS_ONLY resolver behavior is consistent
+ // regardless of system message content (simulating different system prompts)
+ CacheEligibilityResolver resolver = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
+
+ // Different system prompts should all be ineligible for caching
+ assertThat(resolver.resolve(MessageType.SYSTEM, "You are a helpful assistant"))
+ .as("System prompt 1 should not be cached")
+ .isNull();
+ assertThat(resolver.resolve(MessageType.SYSTEM, "You are a STRICT validator"))
+ .as("System prompt 2 should not be cached")
+ .isNull();
+ assertThat(resolver.resolve(MessageType.SYSTEM, "You are a creative writer"))
+ .as("System prompt 3 should not be cached")
+ .isNull();
+
+ // Tool cache eligibility should remain consistent
+ assertThat(resolver.resolveToolCacheControl()).as("Tools should always be cacheable").isNotNull();
+ }
+
+ @Test
+ void systemAndToolsIndependentBreakpoints() {
+ // Validates that SYSTEM_AND_TOOLS creates two independent eligibility checks
+ CacheEligibilityResolver resolver = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
+
+ // Both tools and system should be independently eligible
+ AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl();
+ AnthropicApi.ChatCompletionRequest.CacheControl systemCache = resolver.resolve(MessageType.SYSTEM, "content");
+
+ assertThat(toolCache).as("Tools should be cacheable").isNotNull();
+ assertThat(systemCache).as("System should be cacheable").isNotNull();
+
+ // They should use the same TTL (both use SYSTEM message type TTL)
+ assertThat(toolCache.ttl()).isEqualTo(systemCache.ttl());
+ }
+
+ @Test
+ void breakpointLimitEnforced() {
+ AnthropicCacheOptions options = AnthropicCacheOptions.builder()
+ .strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
+ .build();
+ CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
+
+ // Use up breakpoints by resolving multiple times
+ resolver.resolve(MessageType.SYSTEM, "content"); // Uses breakpoint 1
+ resolver.useCacheBlock();
+ resolver.resolve(MessageType.USER, "content"); // Uses breakpoint 2
+ resolver.useCacheBlock();
+ resolver.resolve(MessageType.ASSISTANT, "content"); // Uses breakpoint 3
+ resolver.useCacheBlock();
+ resolver.resolve(MessageType.TOOL, "content"); // Uses breakpoint 4
+ resolver.useCacheBlock();
+
+ // 5th attempt should return null (all 4 breakpoints used)
+ assertThat(resolver.resolve(MessageType.USER, "more content"))
+ .as("Should return null when all 4 breakpoints are used")
+ .isNull();
+ }
+
+ @Test
+ void emptyAndNullContentHandling() {
+ CacheEligibilityResolver resolver = CacheEligibilityResolver
+ .from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
+
+ // Empty string should not be cached
+ assertThat(resolver.resolve(MessageType.SYSTEM, "")).as("Empty string should not be cached").isNull();
+
+ // Null should not be cached
+ assertThat(resolver.resolve(MessageType.SYSTEM, null)).as("Null content should not be cached").isNull();
+
+ // Whitespace-only should be cached if it meets length requirement
+ assertThat(resolver.resolve(MessageType.SYSTEM, " "))
+ .as("Whitespace-only content meeting length requirements should be cacheable")
+ .isNotNull();
}
}
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
index 06bb3eb32ef..d506315d8fe 100644
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
@@ -212,14 +212,43 @@ Different models have different minimum token thresholds for cache effectiveness
=== Cache Strategies
-Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum:
+Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum.
+Each strategy automatically places cache breakpoints at optimal locations while staying within Anthropic's 4-breakpoint limit.
-* `NONE`: Disables prompt caching completely
-* `SYSTEM_ONLY`: Caches only the system message content
-* `SYSTEM_AND_TOOLS`: Caches system message and the last tool definition
-* `CONVERSATION_HISTORY`: Caches conversation history in chat memory scenarios
+[cols="2,3,5", stripes=even]
+|====
+| Strategy | Breakpoints Used | Use Case
+
+| `NONE`
+| 0
+| Disables prompt caching completely.
+Use when requests are one-off or content is too small to benefit from caching.
+
+| `SYSTEM_ONLY`
+| 1
+| Caches system message content.
+Tools are cached implicitly via Anthropic's automatic ~20-block lookback mechanism.
+Use when system prompts are large and stable with fewer than 20 tools.
+
+| `TOOLS_ONLY`
+| 1
+| Caches tool definitions only. System messages remain uncached and are processed fresh on each request.
+Use when tool definitions are large and stable (5000+ tokens) but system prompts change frequently or vary per tenant/context.
+
+| `SYSTEM_AND_TOOLS`
+| 2
+| Caches both tool definitions (breakpoint 1) and system message (breakpoint 2) explicitly.
+Use when you have 20+ tools (beyond automatic lookback) or want deterministic caching of both components.
+System changes don't invalidate tool cache.
+
+| `CONVERSATION_HISTORY`
+| 1-4
+| Caches entire conversation history up to the current user question.
+Use for multi-turn conversations with chat memory where conversation history grows over time.
+|====
-This strategic approach ensures optimal cache breakpoint placement while staying within Anthropic's 4-breakpoint limit.
+IMPORTANT: Due to Anthropic's cascade invalidation, changing tool definitions will invalidate ALL downstream cache breakpoints (system, messages).
+Tool stability is critical when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies.
=== Enabling Prompt Caching
@@ -227,9 +256,11 @@ Enable prompt caching by setting `cacheOptions` on `AnthropicChatOptions` and ch
==== System-Only Caching
+Best for: Stable system prompts with <20 tools (tools cached implicitly via automatic lookback).
+
[source,java]
----
-// Cache system message content
+// Cache system message content (tools cached implicitly)
ChatResponse response = chatModel.call(
new Prompt(
List.of(
@@ -247,11 +278,39 @@ ChatResponse response = chatModel.call(
);
----
+==== Tools-Only Caching
+
+Best for: Large stable tool sets with dynamic system prompts (multi-tenant apps, A/B testing).
+
+[source,java]
+----
+// Cache tool definitions, system prompt processed fresh each time
+ChatResponse response = chatModel.call(
+ new Prompt(
+ List.of(
+ new SystemMessage("You are a " + persona + " assistant..."), // Dynamic per-tenant
+ new UserMessage("What's the weather like in San Francisco?")
+ ),
+ AnthropicChatOptions.builder()
+ .model("claude-sonnet-4")
+ .cacheOptions(AnthropicCacheOptions.builder()
+ .strategy(AnthropicCacheStrategy.TOOLS_ONLY)
+ .build())
+ .toolCallbacks(weatherToolCallback) // Large tool set cached
+ .maxTokens(500)
+ .build()
+ )
+);
+----
+
==== System and Tools Caching
+Best for: 20+ tools (beyond automatic lookback) or when both components should be cached independently.
+
[source,java]
----
-// Cache system message and the last tool definition
+// Cache both tool definitions and system message with independent breakpoints
+// Changing system won't invalidate tool cache (but changing tools invalidates both)
ChatResponse response = chatModel.call(
new Prompt(
List.of(
@@ -259,11 +318,11 @@ ChatResponse response = chatModel.call(
new UserMessage("What's the weather like in San Francisco?")
),
AnthropicChatOptions.builder()
- .model("claude-sonnet-4")
+ .model("claude-sonnet-4")
.cacheOptions(AnthropicCacheOptions.builder()
.strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
.build())
- .toolCallbacks(weatherToolCallback)
+ .toolCallbacks(weatherToolCallback) // 20+ tools
.maxTokens(500)
.build()
)
@@ -317,7 +376,9 @@ String response = ChatClient.create(chatModel)
==== Per-Message TTL (5m or 1h)
-By default, cached content uses a 5-minute TTL. You can set a 1-hour TTL for specific message types. When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header.
+By default, cached content uses a 5-minute TTL.
+You can set a 1-hour TTL for specific message types.
+When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header.
[source,java]
----
@@ -544,6 +605,62 @@ for (String filename : codeFiles) {
// Guidelines cached after first request, subsequent reviews are faster and cheaper
----
+==== Multi-Tenant SaaS with Shared Tools
+
+Build a multi-tenant application where tools are shared but system prompts are customized per tenant:
+
+[source,java]
+----
+// Define large shared tool set (used by all tenants)
+List sharedTools = Arrays.asList(
+ weatherToolCallback, // ~500 tokens
+ calendarToolCallback, // ~800 tokens
+ emailToolCallback, // ~700 tokens
+ analyticsToolCallback, // ~600 tokens
+ reportingToolCallback, // ~900 tokens
+ // ... 20+ more tools, totaling 5000+ tokens
+);
+
+@Service
+public class MultiTenantAIService {
+
+ public String handleTenantRequest(String tenantId, String userQuery) {
+ // Get tenant-specific configuration
+ TenantConfig config = tenantRepository.findById(tenantId);
+
+ // Dynamic system prompt per tenant
+ String tenantSystemPrompt = String.format("""
+ You are %s's AI assistant. Company values: %s.
+ Brand voice: %s. Compliance requirements: %s.
+ """, config.companyName(), config.values(),
+ config.brandVoice(), config.compliance());
+
+ ChatResponse response = chatModel.call(
+ new Prompt(
+ List.of(
+ new SystemMessage(tenantSystemPrompt), // Different per tenant, NOT cached
+ new UserMessage(userQuery)
+ ),
+ AnthropicChatOptions.builder()
+ .model("claude-sonnet-4")
+ .cacheOptions(AnthropicCacheOptions.builder()
+ .strategy(AnthropicCacheStrategy.TOOLS_ONLY) // Cache tools only
+ .build())
+ .toolCallbacks(sharedTools) // Cached once, shared across all tenants
+ .maxTokens(800)
+ .build()
+ )
+ );
+
+ return response.getResult().getOutput().getText();
+ }
+}
+
+// Tools cached once (5000 tokens @ 10% = 500 token cost for cache hits)
+// Each tenant's unique system prompt processed fresh (200-500 tokens @ 100%)
+// Total per request: ~700-1000 tokens vs 5500+ without TOOLS_ONLY
+----
+
==== Customer Support with Knowledge Base
Create a customer support system that caches your product knowledge base for consistent, accurate responses:
@@ -562,7 +679,7 @@ String knowledgeBase = """
@Service
public class CustomerSupportService {
-
+
public String handleCustomerQuery(String customerQuery, String customerId) {
ChatResponse response = chatModel.call(
new Prompt(
@@ -580,7 +697,7 @@ public class CustomerSupportService {
.build()
)
);
-
+
return response.getResult().getOutput().getText();
}
}
@@ -591,18 +708,27 @@ public class CustomerSupportService {
=== Best Practices
-1. **Choose the Right Strategy**:
- - Use `SYSTEM_ONLY` for reusable system prompts and instructions
- - Use `SYSTEM_AND_TOOLS` when you have both system content and tool definitions to cache (the last tool definition is cached)
+1. **Choose the Right Strategy**:
+ - Use `SYSTEM_ONLY` for stable system prompts with <20 tools (tools cached implicitly via automatic lookback)
+ - Use `TOOLS_ONLY` for large stable tool sets (5000+ tokens) with dynamic system prompts (multi-tenant, A/B testing)
+ - Use `SYSTEM_AND_TOOLS` when you have 20+ tools (beyond automatic lookback) or want both cached independently
- Use `CONVERSATION_HISTORY` with ChatClient memory for multi-turn conversations
- Use `NONE` to explicitly disable caching
-2. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models).
+2. **Understand Cascade Invalidation**: Anthropic's cache hierarchy (`tools → system → messages`) means changes flow downward:
+ - Changing **tools** invalidates: tools + system + messages (all caches) ❌❌❌
+ - Changing **system** invalidates: system + messages (tools cache remains valid) ✅❌❌
+ - Changing **messages** invalidates: messages only (tools and system caches remain valid) ✅✅❌
+
+ **Tool stability is critical** when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies.
+
+3. **SYSTEM_AND_TOOLS Independence**: With `SYSTEM_AND_TOOLS`, changing the system message does NOT invalidate the tool cache, allowing efficient reuse of cached tools even when system prompts vary.
+
+4. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models).
-3. **Reuse Identical Content**: Caching works best with exact matches of prompt content.
-Even small changes will require a new cache entry.
+5. **Reuse Identical Content**: Caching works best with exact matches of prompt content. Even small changes will require a new cache entry.
-4. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness:
+6. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness:
```java
AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage();
if (usage != null) {
@@ -611,11 +737,11 @@ Even small changes will require a new cache entry.
}
```
-5. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit.
+7. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit.
-6. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer.
+8. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer.
-7. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response.
+9. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response.
=== Implementation Details