diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
index e94a1a220c5..fed960ed76a 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java
@@ -22,31 +22,104 @@
  * system → messages.
  *
  * @author Mark Pollack
+ * @author Soby Chacko
  * @since 1.1.0
  */
 public enum AnthropicCacheStrategy {
 
 	/**
-	 * No caching (default behavior).
+	 * No caching (default behavior). All content is processed fresh on each request.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Requests are one-off or highly variable</li>
+	 * <li>Content doesn't meet minimum token requirements (1024+ tokens)</li>
+	 * <li>You want to avoid caching overhead</li>
+	 * </ul>
 	 */
 	NONE,
 
+	/**
+	 * Cache tool definitions only. Places a cache breakpoint on the last tool, while
+	 * system messages and conversation history remain uncached and are processed fresh on
+	 * each request.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Tool definitions are large and stable (5000+ tokens)</li>
+	 * <li>System prompts change frequently or are small (&lt;500 tokens)</li>
+	 * <li>You want to share cached tools across different system contexts (e.g.,
+	 * multi-tenant applications, A/B testing system prompts)</li>
+	 * <li>Tool definitions rarely change</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Important:</strong> Changing any tool definition will invalidate this cache
+	 * entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate
+	 * any downstream cache breakpoints (system, messages) if used in combination with
+	 * other strategies.
+	 */
+	TOOLS_ONLY,
+
 	/**
 	 * Cache system instructions only. Places a cache breakpoint on the system message
-	 * content.
+	 * content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback
+	 * mechanism (content before the cache breakpoint is included in the cache).
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>System prompts are large and stable (1024+ tokens)</li>
+	 * <li>Tool definitions are relatively small (&lt;20 tools)</li>
+	 * <li>You want simple, single-breakpoint caching</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Note:</strong> Changing tools will invalidate the cache since tools are
+	 * part of the cache prefix (they appear before system in the request hierarchy).
 	 */
 	SYSTEM_ONLY,
 
 	/**
 	 * Cache system instructions and tool definitions. Places cache breakpoints on the
-	 * last tool and system message content.
+	 * last tool (breakpoint 1) and system message content (breakpoint 2).
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Both tools and system prompts are large and stable</li>
+	 * <li>You have many tools (20+ tools, beyond the automatic lookback window)</li>
+	 * <li>You want deterministic, explicit caching of both components</li>
+	 * <li>System prompts may change independently of tools</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Behavior:</strong>
+	 * <ul>
+	 * <li>If only tools change: Both caches invalidated (tools + system)</li>
+	 * <li>If only system changes: Tools cache remains valid, system cache
+	 * invalidated</li>
+	 * </ul>
+	 * This allows efficient reuse of tool cache when only system prompts are updated.
 	 */
 	SYSTEM_AND_TOOLS,
 
 	/**
 	 * Cache the entire conversation history up to (but not including) the current user
-	 * question. This is ideal for multi-turn conversations where you want to reuse the
-	 * conversation context while asking new questions.
+	 * question. Places a cache breakpoint on the last user message in the conversation
+	 * history, enabling incremental caching as the conversation grows.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Building multi-turn conversational applications (chatbots, assistants)</li>
+	 * <li>Conversation history is large and grows over time</li>
+	 * <li>You want to reuse conversation context while asking new questions</li>
+	 * <li>Using chat memory advisors or conversation persistence</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Behavior:</strong> Each turn builds on the previous cached prefix. The
+	 * cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1
+	 * + Message2], etc. This provides significant cost savings (90%+) and performance
+	 * improvements for long conversations.
+	 * <p>
+	 * <strong>Important:</strong> Changing tools or system prompts will invalidate the
+	 * entire conversation cache due to cascade invalidation. Tool and system stability is
+	 * critical for this strategy.
 	 */
 	CONVERSATION_HISTORY
 
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
index a5443166a9b..cebd9988ea4 100644
--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java
@@ -39,6 +39,7 @@
  * definition messages.
  *
  * @author Austin Dase
+ * @author Soby Chacko
  * @since 1.1.0
  **/
 public class CacheEligibilityResolver {
@@ -84,6 +85,7 @@ private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrate
 		return switch (anthropicCacheStrategy) {
 			case NONE -> Set.of();
 			case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM);
+			case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions
 			case CONVERSATION_HISTORY -> Set.of(MessageType.values());
 		};
 	}
@@ -108,11 +110,17 @@ public AnthropicApi.ChatCompletionRequest.CacheControl resolve(MessageType messa
 	}
 
 	public AnthropicApi.ChatCompletionRequest.CacheControl resolveToolCacheControl() {
-		// Tool definitions are only cache-eligible when caching is enabled and
-		// the strategy includes SYSTEM messages (SYSTEM_ONLY, SYSTEM_AND_TOOLS, or
-		// CONVERSATION_HISTORY). When NONE, tools must not be cached.
-		if (!isCachingEnabled() || !this.cacheEligibleMessageTypes.contains(TOOL_DEFINITION_MESSAGE_TYPE)
-				|| this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
+		// Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and
+		// CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages,
+		// relying on Anthropic's cache hierarchy to implicitly cache tools.
+		if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY
+				&& this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS
+				&& this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) {
+			logger.debug("Caching not enabled for tool definition, cacheStrategy={}", this.cacheStrategy);
+			return null;
+		}
+
+		if (this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
 			logger.debug("Caching not enabled for tool definition, usedBreakpoints={}",
 					this.cacheBreakpointTracker.getCount());
 			return null;
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
index a914a243085..aac3622e137 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java
@@ -104,9 +104,17 @@ void testSystemOnlyCacheStrategy() throws Exception {
 		this.mockWebServer
 			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
 
+		// Create tool callback to test that tools are NOT cached with SYSTEM_ONLY
+		var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
+		MethodToolCallback toolCallback = MethodToolCallback.builder()
+			.toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
+			.toolMethod(toolMethod)
+			.build();
+
 		// Test with SYSTEM_ONLY cache strategy
 		AnthropicChatOptions options = AnthropicChatOptions.builder()
 			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
+			.toolCallbacks(List.of(toolCallback))
 			.build();
 
 		Prompt prompt = new Prompt(
@@ -130,6 +138,18 @@ void testSystemOnlyCacheStrategy() throws Exception {
 			assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
 		}
 
+		// Verify tools exist but DO NOT have cache_control (key difference from
+		// SYSTEM_AND_TOOLS)
+		if (requestBody.has("tools")) {
+			JsonNode toolsArray = requestBody.get("tools");
+			assertThat(toolsArray.isArray()).isTrue();
+			// Verify NO tool has cache_control
+			for (int i = 0; i < toolsArray.size(); i++) {
+				JsonNode tool = toolsArray.get(i);
+				assertThat(tool.has("cache_control")).isFalse();
+			}
+		}
+
 		// Verify response
 		assertThat(response).isNotNull();
 		assertThat(response.getResult().getOutput().getText()).contains("Hello!");
diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
index d056baddcec..0b594500dd3 100644
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolverTests.java
@@ -30,6 +30,7 @@
  * Tests for {@link CacheEligibilityResolver}.
  *
  * @author Austin Dase
+ * @author Soby Chacko
  */
 class CacheEligibilityResolverTests {
 
@@ -78,14 +79,215 @@ void toolCacheControlRespectsStrategy() {
 			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
 		assertThat(none.resolveToolCacheControl()).isNull();
 
-		// SYSTEM_ONLY -> tool caching enabled (uses SYSTEM TTL)
+		// SYSTEM_ONLY -> no explicit tool caching (tools cached implicitly via hierarchy)
 		CacheEligibilityResolver sys = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
 			.strategy(AnthropicCacheStrategy.SYSTEM_ONLY)
 			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
 			.build());
-		var cc = sys.resolveToolCacheControl();
+		assertThat(sys.resolveToolCacheControl()).isNull();
+
+		// TOOLS_ONLY -> tool caching enabled, system messages NOT cached
+		CacheEligibilityResolver toolsOnly = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
+			.strategy(AnthropicCacheStrategy.TOOLS_ONLY)
+			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+			.build());
+		assertThat(toolsOnly.resolveToolCacheControl()).isNotNull();
+		assertThat(toolsOnly.resolve(MessageType.SYSTEM, "Large system prompt text")).isNull();
+
+		// SYSTEM_AND_TOOLS -> tool caching enabled (uses SYSTEM TTL)
+		CacheEligibilityResolver sysAndTools = CacheEligibilityResolver.from(AnthropicCacheOptions.builder()
+			.strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
+			.messageTypeTtl(MessageType.SYSTEM, AnthropicCacheTtl.ONE_HOUR)
+			.build());
+		var cc = sysAndTools.resolveToolCacheControl();
 		assertThat(cc).isNotNull();
 		assertThat(cc.ttl()).isEqualTo(AnthropicCacheTtl.ONE_HOUR.getValue());
+
+		// CONVERSATION_HISTORY -> tool caching enabled
+		CacheEligibilityResolver history = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
+		assertThat(history.resolveToolCacheControl()).isNotNull();
+	}
+
+	@Test
+	void toolsOnlyStrategyBehavior() {
+		AnthropicCacheOptions options = AnthropicCacheOptions.builder()
+			.strategy(AnthropicCacheStrategy.TOOLS_ONLY)
+			.messageTypeMinContentLength(MessageType.SYSTEM, 100)
+			.build();
+		CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
+
+		// Caching is enabled
+		assertThat(resolver.isCachingEnabled()).isTrue();
+
+		// System messages should NOT be cached
+		assertThat(resolver.resolve(MessageType.SYSTEM, "Large system prompt with plenty of content"))
+			.as("System messages should not be cached with TOOLS_ONLY strategy")
+			.isNull();
+
+		// User messages should NOT be cached
+		assertThat(resolver.resolve(MessageType.USER, "User message content")).isNull();
+
+		// Assistant messages should NOT be cached
+		assertThat(resolver.resolve(MessageType.ASSISTANT, "Assistant message content")).isNull();
+
+		// Tool messages should NOT be cached
+		assertThat(resolver.resolve(MessageType.TOOL, "Tool result content")).isNull();
+
+		// Tool definitions SHOULD be cached
+		AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl();
+		assertThat(toolCache).as("Tool definitions should be cached with TOOLS_ONLY strategy").isNotNull();
+		assertThat(toolCache.type()).isEqualTo("ephemeral");
+	}
+
+	@Test
+	void breakpointCountForEachStrategy() {
+		// NONE: 0 breakpoints
+		CacheEligibilityResolver none = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
+		assertThat(none.resolveToolCacheControl()).isNull();
+		assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull();
+
+		// SYSTEM_ONLY: 1 breakpoint (system only, tools implicit)
+		CacheEligibilityResolver systemOnly = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build());
+		assertThat(systemOnly.resolveToolCacheControl()).as("SYSTEM_ONLY should not explicitly cache tools").isNull();
+		assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull();
+
+		// TOOLS_ONLY: 1 breakpoint (tools only)
+		CacheEligibilityResolver toolsOnly = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
+		assertThat(toolsOnly.resolveToolCacheControl()).as("TOOLS_ONLY should cache tools").isNotNull();
+		assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).as("TOOLS_ONLY should not cache system").isNull();
+
+		// SYSTEM_AND_TOOLS: 2 breakpoints (tools + system)
+		CacheEligibilityResolver systemAndTools = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
+		assertThat(systemAndTools.resolveToolCacheControl()).as("SYSTEM_AND_TOOLS should cache tools").isNotNull();
+		assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).as("SYSTEM_AND_TOOLS should cache system")
+			.isNotNull();
+	}
+
+	@Test
+	void messageTypeEligibilityPerStrategy() {
+		// NONE: No message types eligible
+		CacheEligibilityResolver none = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.NONE).build());
+		assertThat(none.resolve(MessageType.SYSTEM, "content")).isNull();
+		assertThat(none.resolve(MessageType.USER, "content")).isNull();
+		assertThat(none.resolve(MessageType.ASSISTANT, "content")).isNull();
+		assertThat(none.resolve(MessageType.TOOL, "content")).isNull();
+
+		// SYSTEM_ONLY: Only SYSTEM eligible
+		CacheEligibilityResolver systemOnly = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build());
+		assertThat(systemOnly.resolve(MessageType.SYSTEM, "content")).isNotNull();
+		assertThat(systemOnly.resolve(MessageType.USER, "content")).isNull();
+		assertThat(systemOnly.resolve(MessageType.ASSISTANT, "content")).isNull();
+		assertThat(systemOnly.resolve(MessageType.TOOL, "content")).isNull();
+
+		// TOOLS_ONLY: No message types eligible (only tool definitions)
+		CacheEligibilityResolver toolsOnly = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
+		assertThat(toolsOnly.resolve(MessageType.SYSTEM, "content")).isNull();
+		assertThat(toolsOnly.resolve(MessageType.USER, "content")).isNull();
+		assertThat(toolsOnly.resolve(MessageType.ASSISTANT, "content")).isNull();
+		assertThat(toolsOnly.resolve(MessageType.TOOL, "content")).isNull();
+
+		// SYSTEM_AND_TOOLS: Only SYSTEM eligible
+		CacheEligibilityResolver systemAndTools = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
+		assertThat(systemAndTools.resolve(MessageType.SYSTEM, "content")).isNotNull();
+		assertThat(systemAndTools.resolve(MessageType.USER, "content")).isNull();
+		assertThat(systemAndTools.resolve(MessageType.ASSISTANT, "content")).isNull();
+		assertThat(systemAndTools.resolve(MessageType.TOOL, "content")).isNull();
+
+		// CONVERSATION_HISTORY: All message types eligible
+		CacheEligibilityResolver history = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
+		assertThat(history.resolve(MessageType.SYSTEM, "content")).isNotNull();
+		assertThat(history.resolve(MessageType.USER, "content")).isNotNull();
+		assertThat(history.resolve(MessageType.ASSISTANT, "content")).isNotNull();
+		assertThat(history.resolve(MessageType.TOOL, "content")).isNotNull();
+	}
+
+	@Test
+	void toolsOnlyIsolationFromSystemChanges() {
+		// Validates that TOOLS_ONLY resolver behavior is consistent
+		// regardless of system message content (simulating different system prompts)
+		CacheEligibilityResolver resolver = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.TOOLS_ONLY).build());
+
+		// Different system prompts should all be ineligible for caching
+		assertThat(resolver.resolve(MessageType.SYSTEM, "You are a helpful assistant"))
+			.as("System prompt 1 should not be cached")
+			.isNull();
+		assertThat(resolver.resolve(MessageType.SYSTEM, "You are a STRICT validator"))
+			.as("System prompt 2 should not be cached")
+			.isNull();
+		assertThat(resolver.resolve(MessageType.SYSTEM, "You are a creative writer"))
+			.as("System prompt 3 should not be cached")
+			.isNull();
+
+		// Tool cache eligibility should remain consistent
+		assertThat(resolver.resolveToolCacheControl()).as("Tools should always be cacheable").isNotNull();
+	}
+
+	@Test
+	void systemAndToolsIndependentBreakpoints() {
+		// Validates that SYSTEM_AND_TOOLS creates two independent eligibility checks
+		CacheEligibilityResolver resolver = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS).build());
+
+		// Both tools and system should be independently eligible
+		AnthropicApi.ChatCompletionRequest.CacheControl toolCache = resolver.resolveToolCacheControl();
+		AnthropicApi.ChatCompletionRequest.CacheControl systemCache = resolver.resolve(MessageType.SYSTEM, "content");
+
+		assertThat(toolCache).as("Tools should be cacheable").isNotNull();
+		assertThat(systemCache).as("System should be cacheable").isNotNull();
+
+		// They should use the same TTL (both use SYSTEM message type TTL)
+		assertThat(toolCache.ttl()).isEqualTo(systemCache.ttl());
+	}
+
+	@Test
+	void breakpointLimitEnforced() {
+		AnthropicCacheOptions options = AnthropicCacheOptions.builder()
+			.strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY)
+			.build();
+		CacheEligibilityResolver resolver = CacheEligibilityResolver.from(options);
+
+		// Use up breakpoints by resolving multiple times
+		resolver.resolve(MessageType.SYSTEM, "content"); // Uses breakpoint 1
+		resolver.useCacheBlock();
+		resolver.resolve(MessageType.USER, "content"); // Uses breakpoint 2
+		resolver.useCacheBlock();
+		resolver.resolve(MessageType.ASSISTANT, "content"); // Uses breakpoint 3
+		resolver.useCacheBlock();
+		resolver.resolve(MessageType.TOOL, "content"); // Uses breakpoint 4
+		resolver.useCacheBlock();
+
+		// 5th attempt should return null (all 4 breakpoints used)
+		assertThat(resolver.resolve(MessageType.USER, "more content"))
+			.as("Should return null when all 4 breakpoints are used")
+			.isNull();
+	}
+
+	@Test
+	void emptyAndNullContentHandling() {
+		CacheEligibilityResolver resolver = CacheEligibilityResolver
+			.from(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.CONVERSATION_HISTORY).build());
+
+		// Empty string should not be cached
+		assertThat(resolver.resolve(MessageType.SYSTEM, "")).as("Empty string should not be cached").isNull();
+
+		// Null should not be cached
+		assertThat(resolver.resolve(MessageType.SYSTEM, null)).as("Null content should not be cached").isNull();
+
+		// Whitespace-only should be cached if it meets length requirement
+		assertThat(resolver.resolve(MessageType.SYSTEM, "   "))
+			.as("Whitespace-only content meeting length requirements should be cacheable")
+			.isNotNull();
 	}
 
 }
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
index 06bb3eb32ef..d506315d8fe 100644
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
@@ -212,14 +212,43 @@ Different models have different minimum token thresholds for cache effectiveness
 
 === Cache Strategies
 
-Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum:
+Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum.
+Each strategy automatically places cache breakpoints at optimal locations while staying within Anthropic's 4-breakpoint limit.
 
-* `NONE`: Disables prompt caching completely
-* `SYSTEM_ONLY`: Caches only the system message content  
-* `SYSTEM_AND_TOOLS`: Caches system message and the last tool definition
-* `CONVERSATION_HISTORY`: Caches conversation history in chat memory scenarios
+[cols="2,3,5", stripes=even]
+|====
+| Strategy | Breakpoints Used | Use Case
+
+| `NONE`
+| 0
+| Disables prompt caching completely.
+Use when requests are one-off or content is too small to benefit from caching.
+
+| `SYSTEM_ONLY`
+| 1
+| Caches system message content.
+Tools are cached implicitly via Anthropic's automatic ~20-block lookback mechanism.
+Use when system prompts are large and stable with fewer than 20 tools.
+
+| `TOOLS_ONLY`
+| 1
+| Caches tool definitions only. System messages remain uncached and are processed fresh on each request.
+Use when tool definitions are large and stable (5000+ tokens) but system prompts change frequently or vary per tenant/context.
+
+| `SYSTEM_AND_TOOLS`
+| 2
+| Caches both tool definitions (breakpoint 1) and system message (breakpoint 2) explicitly.
+Use when you have 20+ tools (beyond automatic lookback) or want deterministic caching of both components.
+System changes don't invalidate tool cache.
+
+| `CONVERSATION_HISTORY`
+| 1-4
+| Caches entire conversation history up to the current user question.
+Use for multi-turn conversations with chat memory where conversation history grows over time.
+|====
 
-This strategic approach ensures optimal cache breakpoint placement while staying within Anthropic's 4-breakpoint limit.
+IMPORTANT: Due to Anthropic's cascade invalidation, changing tool definitions will invalidate ALL downstream cache breakpoints (system, messages).
+Tool stability is critical when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies.
 
 === Enabling Prompt Caching
 
@@ -227,9 +256,11 @@ Enable prompt caching by setting `cacheOptions` on `AnthropicChatOptions` and ch
 
 ==== System-Only Caching
 
+Best for: Stable system prompts with <20 tools (tools cached implicitly via automatic lookback).
+
 [source,java]
 ----
-// Cache system message content
+// Cache system message content (tools cached implicitly)
 ChatResponse response = chatModel.call(
     new Prompt(
         List.of(
@@ -247,11 +278,39 @@ ChatResponse response = chatModel.call(
 );
 ----
 
+==== Tools-Only Caching
+
+Best for: Large stable tool sets with dynamic system prompts (multi-tenant apps, A/B testing).
+
+[source,java]
+----
+// Cache tool definitions, system prompt processed fresh each time
+ChatResponse response = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage("You are a " + persona + " assistant..."), // Dynamic per-tenant
+            new UserMessage("What's the weather like in San Francisco?")
+        ),
+        AnthropicChatOptions.builder()
+            .model("claude-sonnet-4")
+            .cacheOptions(AnthropicCacheOptions.builder()
+                .strategy(AnthropicCacheStrategy.TOOLS_ONLY)
+                .build())
+            .toolCallbacks(weatherToolCallback) // Large tool set cached
+            .maxTokens(500)
+            .build()
+    )
+);
+----
+
 ==== System and Tools Caching
 
+Best for: 20+ tools (beyond automatic lookback) or when both components should be cached independently.
+
 [source,java]
 ----
-// Cache system message and the last tool definition
+// Cache both tool definitions and system message with independent breakpoints
+// Changing system won't invalidate tool cache (but changing tools invalidates both)
 ChatResponse response = chatModel.call(
     new Prompt(
         List.of(
@@ -259,11 +318,11 @@ ChatResponse response = chatModel.call(
             new UserMessage("What's the weather like in San Francisco?")
         ),
         AnthropicChatOptions.builder()
-            .model("claude-sonnet-4") 
+            .model("claude-sonnet-4")
             .cacheOptions(AnthropicCacheOptions.builder()
                 .strategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS)
                 .build())
-            .toolCallbacks(weatherToolCallback)
+            .toolCallbacks(weatherToolCallback) // 20+ tools
             .maxTokens(500)
             .build()
     )
@@ -317,7 +376,9 @@ String response = ChatClient.create(chatModel)
 
 ==== Per-Message TTL (5m or 1h)
 
-By default, cached content uses a 5-minute TTL. You can set a 1-hour TTL for specific message types. When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header.
+By default, cached content uses a 5-minute TTL.
+You can set a 1-hour TTL for specific message types.
+When 1-hour TTL is used, Spring AI automatically sets the required Anthropic beta header.
 
 [source,java]
 ----
@@ -544,6 +605,62 @@ for (String filename : codeFiles) {
 // Guidelines cached after first request, subsequent reviews are faster and cheaper
 ----
 
+==== Multi-Tenant SaaS with Shared Tools
+
+Build a multi-tenant application where tools are shared but system prompts are customized per tenant:
+
+[source,java]
+----
+// Define large shared tool set (used by all tenants)
+List<FunctionCallback> sharedTools = Arrays.asList(
+    weatherToolCallback,    // ~500 tokens
+    calendarToolCallback,   // ~800 tokens
+    emailToolCallback,      // ~700 tokens
+    analyticsToolCallback,  // ~600 tokens
+    reportingToolCallback,  // ~900 tokens
+    // ... 20+ more tools, totaling 5000+ tokens
+);
+
+@Service
+public class MultiTenantAIService {
+
+    public String handleTenantRequest(String tenantId, String userQuery) {
+        // Get tenant-specific configuration
+        TenantConfig config = tenantRepository.findById(tenantId);
+
+        // Dynamic system prompt per tenant
+        String tenantSystemPrompt = String.format("""
+            You are %s's AI assistant. Company values: %s.
+            Brand voice: %s. Compliance requirements: %s.
+            """, config.companyName(), config.values(),
+                 config.brandVoice(), config.compliance());
+
+        ChatResponse response = chatModel.call(
+            new Prompt(
+                List.of(
+                    new SystemMessage(tenantSystemPrompt), // Different per tenant, NOT cached
+                    new UserMessage(userQuery)
+                ),
+                AnthropicChatOptions.builder()
+                    .model("claude-sonnet-4")
+                    .cacheOptions(AnthropicCacheOptions.builder()
+                        .strategy(AnthropicCacheStrategy.TOOLS_ONLY) // Cache tools only
+                        .build())
+                    .toolCallbacks(sharedTools) // Cached once, shared across all tenants
+                    .maxTokens(800)
+                    .build()
+            )
+        );
+
+        return response.getResult().getOutput().getText();
+    }
+}
+
+// Tools cached once (5000 tokens @ 10% = 500 token cost for cache hits)
+// Each tenant's unique system prompt processed fresh (200-500 tokens @ 100%)
+// Total per request: ~700-1000 tokens vs 5500+ without TOOLS_ONLY
+----
+
 ==== Customer Support with Knowledge Base
 
 Create a customer support system that caches your product knowledge base for consistent, accurate responses:
@@ -562,7 +679,7 @@ String knowledgeBase = """
 
 @Service
 public class CustomerSupportService {
-    
+
     public String handleCustomerQuery(String customerQuery, String customerId) {
         ChatResponse response = chatModel.call(
             new Prompt(
@@ -580,7 +697,7 @@ public class CustomerSupportService {
                     .build()
             )
         );
-        
+
         return response.getResult().getOutput().getText();
     }
 }
@@ -591,18 +708,27 @@ public class CustomerSupportService {
 
 === Best Practices
 
-1. **Choose the Right Strategy**: 
-   - Use `SYSTEM_ONLY` for reusable system prompts and instructions
-   - Use `SYSTEM_AND_TOOLS` when you have both system content and tool definitions to cache (the last tool definition is cached)
+1. **Choose the Right Strategy**:
+   - Use `SYSTEM_ONLY` for stable system prompts with <20 tools (tools cached implicitly via automatic lookback)
+   - Use `TOOLS_ONLY` for large stable tool sets (5000+ tokens) with dynamic system prompts (multi-tenant, A/B testing)
+   - Use `SYSTEM_AND_TOOLS` when you have 20+ tools (beyond automatic lookback) or want both cached independently
    - Use `CONVERSATION_HISTORY` with ChatClient memory for multi-turn conversations
    - Use `NONE` to explicitly disable caching
 
-2. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models).
+2. **Understand Cascade Invalidation**: Anthropic's cache hierarchy (`tools → system → messages`) means changes flow downward:
+   - Changing **tools** invalidates: tools + system + messages (all caches) ❌❌❌
+   - Changing **system** invalidates: system + messages (tools cache remains valid) ✅❌❌
+   - Changing **messages** invalidates: messages only (tools and system caches remain valid) ✅✅❌
+
+   **Tool stability is critical** when using `SYSTEM_AND_TOOLS` or `CONVERSATION_HISTORY` strategies.
+
+3. **SYSTEM_AND_TOOLS Independence**: With `SYSTEM_AND_TOOLS`, changing the system message does NOT invalidate the tool cache, allowing efficient reuse of cached tools even when system prompts vary.
+
+4. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models).
 
-3. **Reuse Identical Content**: Caching works best with exact matches of prompt content.
-Even small changes will require a new cache entry.
+5. **Reuse Identical Content**: Caching works best with exact matches of prompt content. Even small changes will require a new cache entry.
 
-4. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness:
+6. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness:
    ```java
    AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage();
    if (usage != null) {
@@ -611,11 +737,11 @@ Even small changes will require a new cache entry.
    }
    ```
 
-5. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit.
+7. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit.
 
-6. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer.
+8. **Cache Lifetime**: Default TTL is 5 minutes; set 1-hour TTL per message type via `messageTypeTtl(...)`. Each cache access resets the timer.
 
-7. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response.
+9. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response.
 
 === Implementation Details