diff --git a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockChatOptions.java b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockChatOptions.java
index ba0eceb2a62..54fcd367df6 100644
--- a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockChatOptions.java
+++ b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockChatOptions.java
@@ -29,6 +29,7 @@
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
+import org.springframework.ai.bedrock.converse.api.BedrockCacheOptions;
 import org.springframework.ai.model.tool.ToolCallingChatOptions;
 import org.springframework.ai.tool.ToolCallback;
 import org.springframework.lang.Nullable;
@@ -81,6 +82,9 @@ public class BedrockChatOptions implements ToolCallingChatOptions {
 	@JsonIgnore
 	private Boolean internalToolExecutionEnabled;
 
+	@JsonIgnore
+	private BedrockCacheOptions cacheOptions;
+
 	public static Builder builder() {
 		return new Builder();
 	}
@@ -101,6 +105,7 @@ public static BedrockChatOptions fromOptions(BedrockChatOptions fromOptions) {
 			.toolNames(new HashSet<>(fromOptions.getToolNames()))
 			.toolContext(new HashMap<>(fromOptions.getToolContext()))
 			.internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled())
+			.cacheOptions(fromOptions.getCacheOptions())
 			.build();
 	}
 
@@ -237,6 +242,16 @@ public void setInternalToolExecutionEnabled(@Nullable Boolean internalToolExecut
 		this.internalToolExecutionEnabled = internalToolExecutionEnabled;
 	}
 
+	@JsonIgnore
+	public BedrockCacheOptions getCacheOptions() {
+		return this.cacheOptions;
+	}
+
+	@JsonIgnore
+	public void setCacheOptions(BedrockCacheOptions cacheOptions) {
+		this.cacheOptions = cacheOptions;
+	}
+
 	@Override
 	@SuppressWarnings("unchecked")
 	public BedrockChatOptions copy() {
@@ -259,14 +274,15 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.temperature, that.temperature) && Objects.equals(this.topK, that.topK)
 				&& Objects.equals(this.topP, that.topP) && Objects.equals(this.toolCallbacks, that.toolCallbacks)
 				&& Objects.equals(this.toolNames, that.toolNames) && Objects.equals(this.toolContext, that.toolContext)
-				&& Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled);
+				&& Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled)
+				&& Objects.equals(this.cacheOptions, that.cacheOptions);
 	}
 
 	@Override
 	public int hashCode() {
 		return Objects.hash(this.model, this.frequencyPenalty, this.maxTokens, this.presencePenalty,
 				this.requestParameters, this.stopSequences, this.temperature, this.topK, this.topP, this.toolCallbacks,
-				this.toolNames, this.toolContext, this.internalToolExecutionEnabled);
+				this.toolNames, this.toolContext, this.internalToolExecutionEnabled, this.cacheOptions);
 	}
 
 	public static final class Builder {
@@ -356,6 +372,11 @@ public Builder internalToolExecutionEnabled(@Nullable Boolean internalToolExecut
 			return this;
 		}
 
+		public Builder cacheOptions(BedrockCacheOptions cacheOptions) {
+			this.options.setCacheOptions(cacheOptions);
+			return this;
+		}
+
 		public BedrockChatOptions build() {
 			return this.options;
 		}
diff --git a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModel.java b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModel.java
index 3c4e287ec2c..adc54cf132d 100644
--- a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModel.java
+++ b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModel.java
@@ -24,6 +24,7 @@
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Base64;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -46,6 +47,7 @@
 import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain;
 import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeAsyncClient;
 import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient;
+import software.amazon.awssdk.services.bedrockruntime.model.CachePointBlock;
 import software.amazon.awssdk.services.bedrockruntime.model.ContentBlock;
 import software.amazon.awssdk.services.bedrockruntime.model.ConversationRole;
 import software.amazon.awssdk.services.bedrockruntime.model.ConverseMetrics;
@@ -74,6 +76,8 @@
 import software.amazon.awssdk.services.bedrockruntime.model.VideoFormat;
 import software.amazon.awssdk.services.bedrockruntime.model.VideoSource;
 
+import org.springframework.ai.bedrock.converse.api.BedrockCacheOptions;
+import org.springframework.ai.bedrock.converse.api.BedrockCacheStrategy;
 import org.springframework.ai.bedrock.converse.api.BedrockMediaFormat;
 import org.springframework.ai.bedrock.converse.api.ConverseApiUtils;
 import org.springframework.ai.bedrock.converse.api.URLValidator;
@@ -316,6 +320,8 @@ else if (prompt.getOptions() instanceof ToolCallingChatOptions toolCallingChatOp
 				.internalToolExecutionEnabled(runtimeOptions.getInternalToolExecutionEnabled() != null
 						? runtimeOptions.getInternalToolExecutionEnabled()
 						: this.defaultOptions.getInternalToolExecutionEnabled())
+				.cacheOptions(runtimeOptions.getCacheOptions() != null ? runtimeOptions.getCacheOptions()
+						: this.defaultOptions.getCacheOptions())
 				.build();
 		}
 
@@ -326,85 +332,164 @@ else if (prompt.getOptions() instanceof ToolCallingChatOptions toolCallingChatOp
 
 	ConverseRequest createRequest(Prompt prompt) {
 
-		List<Message> instructionMessages = prompt.getInstructions()
+		BedrockChatOptions updatedRuntimeOptions = prompt.getOptions().copy();
+
+		// Get cache options to determine strategy
+		BedrockCacheOptions cacheOptions = updatedRuntimeOptions.getCacheOptions();
+		boolean shouldCacheConversationHistory = cacheOptions != null
+				&& cacheOptions.getStrategy() == BedrockCacheStrategy.CONVERSATION_HISTORY;
+
+		// Get all non-system messages
+		List<org.springframework.ai.chat.messages.Message> allNonSystemMessages = prompt.getInstructions()
 			.stream()
 			.filter(message -> message.getMessageType() != MessageType.SYSTEM)
-			.map(message -> {
-				if (message.getMessageType() == MessageType.USER) {
-					List<ContentBlock> contents = new ArrayList<>();
-					if (message instanceof UserMessage userMessage) {
-						contents.add(ContentBlock.fromText(userMessage.getText()));
-
-						if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
-							List<ContentBlock> mediaContent = userMessage.getMedia()
-								.stream()
-								.map(this::mapMediaToContentBlock)
-								.toList();
-							contents.addAll(mediaContent);
-						}
-					}
-					return Message.builder().content(contents).role(ConversationRole.USER).build();
+			.toList();
+
+		// Find the last user message index for CONVERSATION_HISTORY caching
+		int lastUserMessageIndex = -1;
+		if (shouldCacheConversationHistory) {
+			for (int i = allNonSystemMessages.size() - 1; i >= 0; i--) {
+				if (allNonSystemMessages.get(i).getMessageType() == MessageType.USER) {
+					lastUserMessageIndex = i;
+					break;
 				}
-				else if (message.getMessageType() == MessageType.ASSISTANT) {
-					AssistantMessage assistantMessage = (AssistantMessage) message;
-					List<ContentBlock> contentBlocks = new ArrayList<>();
-					if (StringUtils.hasText(message.getText())) {
-						contentBlocks.add(ContentBlock.fromText(message.getText()));
+			}
+			if (logger.isDebugEnabled()) {
+				logger.debug("CONVERSATION_HISTORY caching: lastUserMessageIndex={}, totalMessages={}",
+						lastUserMessageIndex, allNonSystemMessages.size());
+			}
+		}
+
+		// Build instruction messages with potential caching
+		List<Message> instructionMessages = new ArrayList<>();
+		for (int i = 0; i < allNonSystemMessages.size(); i++) {
+			org.springframework.ai.chat.messages.Message message = allNonSystemMessages.get(i);
+
+			// Determine if this message should have a cache point
+			// For CONVERSATION_HISTORY: cache point goes on the last user message
+			boolean shouldApplyCachePoint = shouldCacheConversationHistory && i == lastUserMessageIndex;
+
+			if (message.getMessageType() == MessageType.USER) {
+				List<ContentBlock> contents = new ArrayList<>();
+				if (message instanceof UserMessage) {
+					var userMessage = (UserMessage) message;
+					contents.add(ContentBlock.fromText(userMessage.getText()));
+
+					if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
+						List<ContentBlock> mediaContent = userMessage.getMedia()
+							.stream()
+							.map(this::mapMediaToContentBlock)
+							.toList();
+						contents.addAll(mediaContent);
 					}
-					if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
-						for (AssistantMessage.ToolCall toolCall : assistantMessage.getToolCalls()) {
+				}
 
-							var argumentsDocument = ConverseApiUtils
-								.convertObjectToDocument(ModelOptionsUtils.jsonToMap(toolCall.arguments()));
+				// Apply cache point if this is the last user message
+				if (shouldApplyCachePoint) {
+					CachePointBlock cachePoint = CachePointBlock.builder().type("default").build();
+					contents.add(ContentBlock.fromCachePoint(cachePoint));
+					logger.debug("Applied cache point on last user message (conversation history caching)");
+				}
+
+				instructionMessages.add(Message.builder().content(contents).role(ConversationRole.USER).build());
+			}
+			else if (message.getMessageType() == MessageType.ASSISTANT) {
+				AssistantMessage assistantMessage = (AssistantMessage) message;
+				List<ContentBlock> contentBlocks = new ArrayList<>();
+				if (StringUtils.hasText(message.getText())) {
+					contentBlocks.add(ContentBlock.fromText(message.getText()));
+				}
+				if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
+					for (AssistantMessage.ToolCall toolCall : assistantMessage.getToolCalls()) {
 
-							contentBlocks.add(ContentBlock.fromToolUse(ToolUseBlock.builder()
-								.toolUseId(toolCall.id())
-								.name(toolCall.name())
-								.input(argumentsDocument)
-								.build()));
+						var argumentsDocument = ConverseApiUtils
+							.convertObjectToDocument(ModelOptionsUtils.jsonToMap(toolCall.arguments()));
+
+						contentBlocks.add(ContentBlock.fromToolUse(ToolUseBlock.builder()
+							.toolUseId(toolCall.id())
+							.name(toolCall.name())
+							.input(argumentsDocument)
+							.build()));
 
-						}
 					}
-					return Message.builder().content(contentBlocks).role(ConversationRole.ASSISTANT).build();
 				}
-				else if (message.getMessageType() == MessageType.TOOL) {
-					List<ContentBlock> contentBlocks = ((ToolResponseMessage) message).getResponses()
-						.stream()
-						.map(toolResponse -> {
+
+				instructionMessages
+					.add(Message.builder().content(contentBlocks).role(ConversationRole.ASSISTANT).build());
+			}
+			else if (message.getMessageType() == MessageType.TOOL) {
+				List<ContentBlock> contentBlocks = new ArrayList<>(
+						((ToolResponseMessage) message).getResponses().stream().map(toolResponse -> {
 							ToolResultBlock toolResultBlock = ToolResultBlock.builder()
 								.toolUseId(toolResponse.id())
 								.content(ToolResultContentBlock.builder().text(toolResponse.responseData()).build())
 								.build();
 							return ContentBlock.fromToolResult(toolResultBlock);
-						})
-						.toList();
-					return Message.builder().content(contentBlocks).role(ConversationRole.USER).build();
-				}
-				else {
-					throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType());
-				}
-			})
-			.toList();
+						}).toList());
+
+				instructionMessages.add(Message.builder().content(contentBlocks).role(ConversationRole.USER).build());
+			}
+			else {
+				throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType());
+			}
+		}
+
+		// Determine if system message caching should be applied
+		boolean shouldCacheSystem = cacheOptions != null
+				&& (cacheOptions.getStrategy() == BedrockCacheStrategy.SYSTEM_ONLY
+						|| cacheOptions.getStrategy() == BedrockCacheStrategy.SYSTEM_AND_TOOLS);
+
+		if (logger.isDebugEnabled() && cacheOptions != null) {
+			logger.debug("Cache strategy: {}, shouldCacheSystem: {}", cacheOptions.getStrategy(), shouldCacheSystem);
+		}
 
-		List<SystemContentBlock> systemMessages = prompt.getInstructions()
+		// Build system messages with optional caching on last message
+		List<org.springframework.ai.chat.messages.Message> systemMessageList = prompt.getInstructions()
 			.stream()
 			.filter(m -> m.getMessageType() == MessageType.SYSTEM)
-			.map(sysMessage -> SystemContentBlock.builder().text(sysMessage.getText()).build())
 			.toList();
 
-		BedrockChatOptions updatedRuntimeOptions = prompt.getOptions().copy();
+		List<SystemContentBlock> systemMessages = new ArrayList<>();
+		for (int i = 0; i < systemMessageList.size(); i++) {
+			org.springframework.ai.chat.messages.Message sysMessage = systemMessageList.get(i);
+
+			// Add the text content block
+			SystemContentBlock textBlock = SystemContentBlock.builder().text(sysMessage.getText()).build();
+			systemMessages.add(textBlock);
+
+			// Apply cache point marker after last system message if caching is enabled
+			// SystemContentBlock is a UNION type - text and cachePoint must be separate
+			// blocks
+			boolean isLastSystem = (i == systemMessageList.size() - 1);
+			if (isLastSystem && shouldCacheSystem) {
+				CachePointBlock cachePoint = CachePointBlock.builder().type("default").build();
+				SystemContentBlock cachePointBlock = SystemContentBlock.builder().cachePoint(cachePoint).build();
+				systemMessages.add(cachePointBlock);
+				logger.debug("Applied cache point after system message");
+			}
+		}
 
 		ToolConfiguration toolConfiguration = null;
 
 		// Add the tool definitions to the request's tools parameter.
 		List<ToolDefinition> toolDefinitions = this.toolCallingManager.resolveToolDefinitions(updatedRuntimeOptions);
 
+		// Determine if tool caching should be applied
+		boolean shouldCacheTools = cacheOptions != null
+				&& (cacheOptions.getStrategy() == BedrockCacheStrategy.TOOLS_ONLY
+						|| cacheOptions.getStrategy() == BedrockCacheStrategy.SYSTEM_AND_TOOLS);
+
 		if (!CollectionUtils.isEmpty(toolDefinitions)) {
-			List<Tool> bedrockTools = toolDefinitions.stream().map(toolDefinition -> {
+			List<Tool> bedrockTools = new ArrayList<>();
+
+			for (int i = 0; i < toolDefinitions.size(); i++) {
+				ToolDefinition toolDefinition = toolDefinitions.get(i);
 				var description = toolDefinition.description();
 				var name = toolDefinition.name();
 				String inputSchema = toolDefinition.inputSchema();
-				return Tool.builder()
+
+				// Create tool specification
+				Tool tool = Tool.builder()
 					.toolSpec(ToolSpecification.builder()
 						.name(name)
 						.description(description)
@@ -412,7 +497,18 @@ else if (message.getMessageType() == MessageType.TOOL) {
 								ConverseApiUtils.convertObjectToDocument(ModelOptionsUtils.jsonToMap(inputSchema))))
 						.build())
 					.build();
-			}).toList();
+				bedrockTools.add(tool);
+
+				// Apply cache point marker after last tool if caching is enabled
+				// Tool is a UNION type - toolSpec and cachePoint must be separate objects
+				boolean isLastTool = (i == toolDefinitions.size() - 1);
+				if (isLastTool && shouldCacheTools) {
+					CachePointBlock cachePoint = CachePointBlock.builder().type("default").build();
+					Tool cachePointTool = Tool.builder().cachePoint(cachePoint).build();
+					bedrockTools.add(cachePointTool);
+					logger.debug("Applied cache point after tool definitions");
+				}
+			}
 
 			toolConfiguration = ToolConfiguration.builder().tools(bedrockTools).build();
 		}
@@ -635,12 +731,23 @@ private ChatResponse toChatResponse(ConverseResponse response, ChatResponse perv
 
 		ConverseMetrics metrics = response.metrics();
 
-		var chatResponseMetaData = ChatResponseMetadata.builder()
+		var metadataBuilder = ChatResponseMetadata.builder()
 			.id(response.responseMetadata() != null ? response.responseMetadata().requestId() : "Unknown")
-			.usage(usage)
-			.build();
+			.usage(usage);
+
+		// Add cache metrics if available
+		Map<String, Object> additionalMetadata = new HashMap<>();
+		if (response.usage().cacheReadInputTokens() != null) {
+			additionalMetadata.put("cacheReadInputTokens", response.usage().cacheReadInputTokens());
+		}
+		if (response.usage().cacheWriteInputTokens() != null) {
+			additionalMetadata.put("cacheWriteInputTokens", response.usage().cacheWriteInputTokens());
+		}
+		if (!additionalMetadata.isEmpty()) {
+			metadataBuilder.metadata(additionalMetadata);
+		}
 
-		return new ChatResponse(allGenerations, chatResponseMetaData);
+		return new ChatResponse(allGenerations, metadataBuilder.build());
 	}
 
 	/**
diff --git a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/api/BedrockCacheOptions.java b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/api/BedrockCacheOptions.java
new file mode 100644
index 00000000000..cb15ff6ab03
--- /dev/null
+++ b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/api/BedrockCacheOptions.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.bedrock.converse.api;
+
+/**
+ * AWS Bedrock cache options for configuring prompt caching behavior.
+ *
+ * <p>
+ * Prompt caching allows you to reduce latency and costs by reusing previously processed
+ * prompt content. Cached content has a fixed 5-minute Time To Live (TTL) that resets with
+ * each cache hit.
+ *
+ * <p>
+ * Example usage:
+ *
+ * <pre>{@code
+ * BedrockCacheOptions cacheOptions = BedrockCacheOptions.builder()
+ *     .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+ *     .build();
+ *
+ * ChatResponse response = chatModel.call(new Prompt(
+ *     List.of(new SystemMessage(largeSystemPrompt), new UserMessage("Question")),
+ *     BedrockChatOptions.builder()
+ *         .cacheOptions(cacheOptions)
+ *         .build()
+ * ));
+ * }</pre>
+ *
+ * @author Soby Chacko
+ * @since 1.1.0
+ * @see BedrockCacheStrategy
+ * @see <a href=
+ * "https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html">AWS Bedrock
+ * Prompt Caching</a>
+ */
+public class BedrockCacheOptions {
+
+	private BedrockCacheStrategy strategy = BedrockCacheStrategy.NONE;
+
+	/**
+	 * Creates a new builder for constructing BedrockCacheOptions.
+	 * @return a new Builder instance
+	 */
+	public static Builder builder() {
+		return new Builder();
+	}
+
+	/**
+	 * Gets the caching strategy.
+	 * @return the configured BedrockCacheStrategy
+	 */
+	public BedrockCacheStrategy getStrategy() {
+		return this.strategy;
+	}
+
+	/**
+	 * Sets the caching strategy.
+	 * @param strategy the BedrockCacheStrategy to use
+	 */
+	public void setStrategy(BedrockCacheStrategy strategy) {
+		this.strategy = strategy;
+	}
+
+	@Override
+	public String toString() {
+		return "BedrockCacheOptions{" + "strategy=" + this.strategy + '}';
+	}
+
+	/**
+	 * Builder for constructing BedrockCacheOptions instances.
+	 */
+	public static class Builder {
+
+		private final BedrockCacheOptions options = new BedrockCacheOptions();
+
+		/**
+		 * Sets the caching strategy.
+		 * @param strategy the BedrockCacheStrategy to use
+		 * @return this Builder instance
+		 */
+		public Builder strategy(BedrockCacheStrategy strategy) {
+			this.options.setStrategy(strategy);
+			return this;
+		}
+
+		/**
+		 * Builds the BedrockCacheOptions instance.
+		 * @return the configured BedrockCacheOptions
+		 */
+		public BedrockCacheOptions build() {
+			return this.options;
+		}
+
+	}
+
+}
diff --git a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/api/BedrockCacheStrategy.java b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/api/BedrockCacheStrategy.java
new file mode 100644
index 00000000000..ba76d9c933c
--- /dev/null
+++ b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/api/BedrockCacheStrategy.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2023-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.bedrock.converse.api;
+
+/**
+ * Defines the caching strategy for AWS Bedrock prompt caching. Bedrock allows up to 4
+ * cache breakpoints per request, and the cache hierarchy follows the order: tools →
+ * system → messages.
+ *
+ * <p>
+ * Prompt caching reduces latency and costs by reusing previously processed prompt
+ * content. Cached content has a 5-minute Time To Live (TTL) that resets with each cache
+ * hit.
+ *
+ * @author Soby Chacko
+ * @since 1.1.0
+ * @see <a href=
+ * "https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html">AWS Bedrock
+ * Prompt Caching</a>
+ */
+public enum BedrockCacheStrategy {
+
+	/**
+	 * No caching (default behavior). All content is processed fresh on each request.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Requests are one-off or highly variable</li>
+	 * <li>Content doesn't meet minimum token requirements (1024+ tokens for most
+	 * models)</li>
+	 * <li>You want to avoid caching overhead</li>
+	 * </ul>
+	 */
+	NONE,
+
+	/**
+	 * Cache system instructions only. Places a cache breakpoint on the system message
+	 * content. Tools are cached implicitly via Bedrock's automatic ~20-block lookback
+	 * mechanism (content before the cache breakpoint is included in the cache).
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>System prompts are large and stable (1024+ tokens)</li>
+	 * <li>Tool definitions are relatively small (&lt;20 tools)</li>
+	 * <li>You want simple, single-breakpoint caching</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Note:</strong> Changing tools will invalidate the cache since tools are
+	 * part of the cache prefix (they appear before system in the request hierarchy).
+	 * <p>
+	 * This is the recommended starting point for most use cases as it provides the best
+	 * balance of simplicity and effectiveness.
+	 */
+	SYSTEM_ONLY,
+
+	/**
+	 * Cache tool definitions only. Places a cache breakpoint after the last tool
+	 * definition. System messages and conversation history are not cached.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>You have many tool definitions (20+ tools, 1024+ tokens total)</li>
+	 * <li>Tools are stable but system prompts change frequently</li>
+	 * <li>You want to cache tool schemas without caching system instructions</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Important Model Compatibility:</strong>
+	 * <ul>
+	 * <li><strong>Supported:</strong> Claude 3.x and Claude 4.x models (all
+	 * variants)</li>
+	 * <li><strong>Not Supported:</strong> Amazon Nova models (Nova Micro, Lite, Pro,
+	 * Premier) - these models only support caching for system and messages, not
+	 * tools</li>
+	 * </ul>
+	 * <p>
+	 * If you use this strategy with an unsupported model, AWS will return a
+	 * ValidationException. Use {@link #SYSTEM_ONLY} instead for Amazon Nova models.
+	 * <p>
+	 * <strong>Note:</strong> If no tools are present in the request, this strategy is
+	 * equivalent to NONE (no caching occurs).
+	 */
+	TOOLS_ONLY,
+
+	/**
+	 * Cache both tool definitions and system instructions. Places two cache breakpoints:
+	 * one after the last tool definition, and one after the last system message.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Both tools and system prompts are large and stable (1024+ tokens each)</li>
+	 * <li>You want maximum cache coverage</li>
+	 * <li>You're willing to use 2 of your 4 available cache breakpoints</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Important Model Compatibility:</strong>
+	 * <ul>
+	 * <li><strong>Supported:</strong> Claude 3.x and Claude 4.x models (all
+	 * variants)</li>
+	 * <li><strong>Not Supported:</strong> Amazon Nova models (Nova Micro, Lite, Pro,
+	 * Premier) - these models only support caching for system and messages, not
+	 * tools</li>
+	 * </ul>
+	 * <p>
+	 * If you use this strategy with an unsupported model, AWS will return a
+	 * ValidationException. Use {@link #SYSTEM_ONLY} instead for Amazon Nova models.
+	 * <p>
+	 * <strong>Cache Invalidation:</strong>
+	 * <ul>
+	 * <li>Changing tools invalidates both cache breakpoints (tools are the prefix)</li>
+	 * <li>Changing system prompts only invalidates the system cache (tools remain
+	 * cached)</li>
+	 * </ul>
+	 * <p>
+	 * This provides the most comprehensive caching but uses more cache breakpoints.
+	 */
+	SYSTEM_AND_TOOLS,
+
+	/**
+	 * Cache the entire conversation history up to and including the current user
+	 * question. This is ideal for multi-turn conversations where you want to reuse the
+	 * conversation context while asking new questions.
+	 * <p>
+	 * A cache breakpoint is placed on the last user message in the conversation. This
+	 * enables incremental caching where each conversation turn builds on the previous
+	 * cached prefix, providing significant cost savings and performance improvements.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Building multi-turn conversational applications (chatbots, assistants)</li>
+	 * <li>Conversation history is substantial (1024+ tokens)</li>
+	 * <li>Users are asking follow-up questions that require context from earlier
+	 * messages</li>
+	 * <li>You want to reduce latency and costs for ongoing conversations</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Model Compatibility:</strong>
+	 * <ul>
+	 * <li><strong>Verified:</strong> Claude 3.x and Claude 4.x models (all variants)</li>
+	 * <li><strong>Note:</strong> Amazon Nova models theoretically support conversation
+	 * caching, but have not been verified in integration tests</li>
+	 * </ul>
+	 * <p>
+	 * <strong>How it works:</strong>
+	 * <ol>
+	 * <li>Identifies the last user message in the conversation</li>
+	 * <li>Places cache breakpoint as the last content block on that message</li>
+	 * <li>All messages up to and including the last user message are cached (system,
+	 * previous user/assistant turns, and current user question)</li>
+	 * <li>On the next turn, the cached context is reused and a new cache is created
+	 * including the assistant response and new user question</li>
+	 * </ol>
+	 * <p>
+	 * <strong>Example conversation flow:</strong>
+	 *
+	 * <pre>
+	 * Turn 1: "My name is Alice" → Response cached
+	 * Turn 2: "I work as a data scientist" → Response cached
+	 * Turn 3: "What career advice would you give me?" ← Cache applies here
+	 *         (Turns 1-2 are read from cache, Turn 3 question is fresh)
+	 * </pre>
+	 * <p>
+	 * <strong>Cache behavior:</strong>
+	 * <ul>
+	 * <li>First request: Creates cache (cacheWriteInputTokens &gt; 0)</li>
+	 * <li>Subsequent requests: Reads from cache (cacheReadInputTokens &gt; 0)</li>
+	 * <li>Cache TTL: 5 minutes (resets on each cache hit)</li>
+	 * <li>Minimum content: 1024+ tokens required for caching to activate</li>
+	 * </ul>
+	 * <p>
+	 */
+	CONVERSATION_HISTORY
+
+}
diff --git a/models/spring-ai-bedrock-converse/src/test/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModelIT.java b/models/spring-ai-bedrock-converse/src/test/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModelIT.java
index 2b2361cba03..1ede468204c 100644
--- a/models/spring-ai-bedrock-converse/src/test/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModelIT.java
+++ b/models/spring-ai-bedrock-converse/src/test/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModelIT.java
@@ -31,9 +31,12 @@
 import org.slf4j.LoggerFactory;
 import reactor.core.publisher.Flux;
 
+import org.springframework.ai.bedrock.converse.api.BedrockCacheOptions;
+import org.springframework.ai.bedrock.converse.api.BedrockCacheStrategy;
 import org.springframework.ai.chat.client.ChatClient;
 import org.springframework.ai.chat.messages.AssistantMessage;
 import org.springframework.ai.chat.messages.Message;
+import org.springframework.ai.chat.messages.SystemMessage;
 import org.springframework.ai.chat.messages.UserMessage;
 import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.chat.model.ChatResponse;
@@ -366,6 +369,478 @@ void validateStreamCallResponseMetadata() {
 		validateChatResponseMetadata(response, model);
 	}
 
+	@Test
+	void testSystemOnlyPromptCaching() {
+		// NOTE: Prompt caching is supported by the following models (as of 2025):
+		// - Claude 3 Opus 4.1, Claude Opus 4, Claude Sonnet 4.5, Claude Sonnet 4, Claude
+		// 3.7 Sonnet
+		// - Claude 3.5 Haiku, Claude 3.5 Sonnet v2
+		// - Amazon Nova Micro, Lite, Pro, Premier
+		//
+		// IMPORTANT: Newer Claude models require AWS Bedrock inference profiles instead
+		// of direct model IDs.
+		// If you get ValidationException about "on-demand throughput isn't supported",
+		// you need to:
+		// 1. Use an inference profile ARN/ID (e.g.,
+		// "us.anthropic.claude-3-5-haiku-20241022-v1:0")
+		// 2. Ensure your AWS account/region has cross-region inference profiles enabled
+		// 3. Or use Amazon Nova models which work with direct model IDs
+		//
+		// Amazon Nova models work without inference profiles and are used in this test
+		// for reliability.
+		String model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0";
+
+		// Create a large system prompt (needs to exceed minimum token threshold for
+		// caching)
+		// Amazon Nova models require 1024+ tokens for caching to activate
+		// Each repetition adds ~160 tokens, so 7 repetitions = ~1120 tokens
+		String basePrompt = """
+				You are an expert software architect with deep knowledge of distributed systems,
+				microservices, cloud computing, and software design patterns. Your role is to provide
+				detailed technical guidance on system architecture, design decisions, and best practices.
+
+				Key areas of expertise:
+				- Distributed systems design and architecture
+				- Microservices patterns and anti-patterns
+				- Cloud-native application development
+				- Event-driven architectures
+				- Database design and scaling strategies
+				- API design and RESTful services
+				- Security best practices
+				- Performance optimization and scalability
+
+				""";
+
+		// Repeat to exceed 1024 token minimum (approximate: 1 token ≈ 4 characters)
+		StringBuilder largeSystemPromptBuilder = new StringBuilder();
+		for (int i = 0; i < 12; i++) {
+			largeSystemPromptBuilder.append(basePrompt);
+		}
+		largeSystemPromptBuilder.append("When answering questions, provide clear, structured responses with examples.");
+
+		String largeSystemPrompt = largeSystemPromptBuilder.toString();
+
+		BedrockCacheOptions cacheOptions = BedrockCacheOptions.builder()
+			.strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+			.build();
+
+		BedrockChatOptions chatOptions = BedrockChatOptions.builder()
+			.model(model)
+			.cacheOptions(cacheOptions)
+			.maxTokens(500)
+			.build();
+
+		// First request - should create cache
+		ChatResponse response1 = this.chatModel.call(new Prompt(
+				List.of(new SystemMessage(largeSystemPrompt), new UserMessage("What is a monolith?")), chatOptions));
+
+		// Verify first response is valid
+		assertThat(response1.getResults()).hasSize(1);
+		assertThat(response1.getResult().getOutput().getText()).isNotEmpty();
+
+		// Verify cache write tokens are present and positive (cache was created)
+		Integer cacheWrite1 = response1.getMetadata().get("cacheWriteInputTokens");
+		logger.info("First request - cacheWriteInputTokens: {}", cacheWrite1);
+		assertThat(cacheWrite1).as("First request should write tokens to cache").isNotNull().isPositive();
+
+		// Verify no cache read on first request
+		Integer cacheRead1 = response1.getMetadata().get("cacheReadInputTokens");
+		assertThat(cacheRead1).as("First request should not read from cache").isIn(null, 0);
+
+		// Second request with same system prompt - should hit cache
+		ChatResponse response2 = this.chatModel
+			.call(new Prompt(List.of(new SystemMessage(largeSystemPrompt), new UserMessage("What is a microservice?")),
+					chatOptions));
+
+		// Verify second response is valid
+		assertThat(response2.getResults()).hasSize(1);
+		assertThat(response2.getResult().getOutput().getText()).isNotEmpty();
+
+		// Verify cache read tokens are present and positive (cache was used)
+		Integer cacheRead2 = response2.getMetadata().get("cacheReadInputTokens");
+		logger.info("Second request - cacheReadInputTokens: {}", cacheRead2);
+		assertThat(cacheRead2).as("Second request should read tokens from cache").isNotNull().isPositive();
+
+		// Verify cache read matches what was written
+		assertThat(cacheRead2).as("Cache read tokens should match cache write tokens").isEqualTo(cacheWrite1);
+
+		// Verify no cache write on second request (reusing existing cache)
+		Integer cacheWrite2 = response2.getMetadata().get("cacheWriteInputTokens");
+		assertThat(cacheWrite2).as("Second request should not write new tokens to cache").isIn(null, 0);
+	}
+
+	@Test
+	void testToolsOnlyPromptCaching() {
+		// NOTE: Testing tools-only caching requires multiple large tool definitions to
+		// exceed 1K tokens
+		// IMPORTANT: This test requires a Claude model (e.g., Claude 3.5 Haiku, Claude
+		// 3.7 Sonnet)
+		// Amazon Nova models do NOT support tool caching and will return
+		// ValidationException
+		String model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0";
+
+		// Create multiple tool callbacks to exceed the 1K token minimum for caching
+		// Each tool definition adds ~200-300 tokens, so we need 4-5 tools
+		List<FunctionToolCallback> toolCallbacks = createLargeToolCallbacks();
+
+		BedrockCacheOptions cacheOptions = BedrockCacheOptions.builder()
+			.strategy(BedrockCacheStrategy.TOOLS_ONLY)
+			.build();
+
+		BedrockChatOptions chatOptions = BedrockChatOptions.builder()
+			.model(model)
+			.cacheOptions(cacheOptions)
+			.toolCallbacks(List.copyOf(toolCallbacks))
+			.maxTokens(500)
+			.build();
+
+		// First request - should create cache for tools
+		ChatResponse response1 = this.chatModel.call(new Prompt("What's the weather in Paris?", chatOptions));
+
+		// Verify first response is valid
+		assertThat(response1.getResults()).hasSize(1);
+		assertThat(response1.getResult().getOutput().getText()).isNotEmpty();
+
+		// Extract cache metrics from first request
+		Integer cacheWrite1 = response1.getMetadata().get("cacheWriteInputTokens");
+		Integer cacheRead1 = response1.getMetadata().get("cacheReadInputTokens");
+		logger.info("First request - cacheWriteInputTokens: {}, cacheReadInputTokens: {}", cacheWrite1, cacheRead1);
+
+		// The first request may either:
+		// 1. Create a new cache (cacheWrite > 0, cacheRead = 0) if no prior cache exists
+		// 2. Use existing cache (cacheRead > 0) if previous test ran within 5min TTL
+		// At least one should be positive to confirm caching is working
+		int firstRequestCache = (cacheWrite1 != null ? cacheWrite1 : 0) + (cacheRead1 != null ? cacheRead1 : 0);
+		assertThat(firstRequestCache).as("First request should either write or read from cache").isPositive();
+
+		// Second request with same tools - should hit cache
+		ChatResponse response2 = this.chatModel.call(new Prompt("What's the weather in Tokyo?", chatOptions));
+
+		// Verify second response is valid
+		assertThat(response2.getResults()).hasSize(1);
+		assertThat(response2.getResult().getOutput().getText()).isNotEmpty();
+
+		// Verify cache read tokens are present (tools were read from cache)
+		Integer cacheRead2 = response2.getMetadata().get("cacheReadInputTokens");
+		logger.info("Second request - cacheReadInputTokens: {}", cacheRead2);
+		assertThat(cacheRead2).as("Second request should read tool definitions from cache").isNotNull().isPositive();
+
+		// Verify the second request uses the same cache as was established in first
+		// request
+		int expectedTotalCache = (cacheWrite1 != null ? cacheWrite1 : 0) + (cacheRead1 != null ? cacheRead1 : 0);
+		assertThat(cacheRead2).as("Second request should read the same total cache").isEqualTo(expectedTotalCache);
+	}
+
+	@Test
+	void testSystemAndToolsPromptCaching() {
+		// NOTE: Testing combined caching requires both large system prompt and multiple
+		// tools
+		// IMPORTANT: This test requires a Claude model (e.g., Claude 3.5 Haiku, Claude
+		// 3.7 Sonnet)
+		// Amazon Nova models do NOT support tool caching and will return
+		// ValidationException
+		String model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0";
+
+		// Create large system prompt (1K+ tokens)
+		String basePrompt = """
+				You are an expert weather analyst with deep knowledge of meteorology,
+				climate patterns, and weather forecasting. Your role is to provide detailed
+				weather analysis and recommendations.
+
+				Key areas of expertise:
+				- Weather pattern analysis and forecasting
+				- Climate change impacts on weather
+				- Severe weather prediction and safety
+				- Seasonal weather trends
+				- Microclimate analysis
+				- Weather data interpretation
+				- Agricultural weather impacts
+				- Travel and event weather planning
+
+				""";
+
+		StringBuilder largeSystemPromptBuilder = new StringBuilder();
+		for (int i = 0; i < 12; i++) {
+			largeSystemPromptBuilder.append(basePrompt);
+		}
+		largeSystemPromptBuilder.append("Provide detailed weather analysis with context and recommendations.");
+
+		String largeSystemPrompt = largeSystemPromptBuilder.toString();
+
+		// Create multiple tool callbacks
+		List<FunctionToolCallback> toolCallbacks = createLargeToolCallbacks();
+
+		BedrockCacheOptions cacheOptions = BedrockCacheOptions.builder()
+			.strategy(BedrockCacheStrategy.SYSTEM_AND_TOOLS)
+			.build();
+
+		BedrockChatOptions chatOptions = BedrockChatOptions.builder()
+			.model(model)
+			.cacheOptions(cacheOptions)
+			.toolCallbacks(List.copyOf(toolCallbacks))
+			.maxTokens(500)
+			.build();
+
+		// First request - should create cache for both tools and system
+		ChatResponse response1 = this.chatModel.call(new Prompt(
+				List.of(new SystemMessage(largeSystemPrompt), new UserMessage("What's the weather in Paris?")),
+				chatOptions));
+
+		// Verify first response is valid
+		assertThat(response1.getResults()).hasSize(1);
+		assertThat(response1.getResult().getOutput().getText()).isNotEmpty();
+
+		// Extract cache metrics from first request
+		Integer cacheWrite1 = response1.getMetadata().get("cacheWriteInputTokens");
+		Integer cacheRead1 = response1.getMetadata().get("cacheReadInputTokens");
+		logger.info("First request - cacheWriteInputTokens: {}, cacheReadInputTokens: {}", cacheWrite1, cacheRead1);
+
+		// The first request may either:
+		// 1. Create a new cache (cacheWrite > 0, cacheRead = 0) if no prior cache exists
+		// 2. Use existing cache (cacheRead > 0) if previous test ran within 5min TTL
+		// At least one should be positive to confirm caching is working
+		int firstRequestCache = (cacheWrite1 != null ? cacheWrite1 : 0) + (cacheRead1 != null ? cacheRead1 : 0);
+		assertThat(firstRequestCache).as("First request should either write or read from cache").isPositive();
+
+		// Second request with same tools and system - should hit both caches
+		ChatResponse response2 = this.chatModel.call(new Prompt(
+				List.of(new SystemMessage(largeSystemPrompt), new UserMessage("What's the weather in Tokyo?")),
+				chatOptions));
+
+		// Verify second response is valid
+		assertThat(response2.getResults()).hasSize(1);
+		assertThat(response2.getResult().getOutput().getText()).isNotEmpty();
+
+		// Verify cache read tokens are present (both caches were used)
+		Integer cacheRead2 = response2.getMetadata().get("cacheReadInputTokens");
+		Integer cacheWrite2 = response2.getMetadata().get("cacheWriteInputTokens");
+		logger.info("Second request - cacheReadInputTokens: {}, cacheWriteInputTokens: {}", cacheRead2, cacheWrite2);
+		assertThat(cacheRead2).as("Second request should read from both caches").isNotNull().isPositive();
+
+		// Verify the second request uses the same cache as was established in first
+		// request
+		// The total cache should be: what was written in first + what was read in first
+		int expectedTotalCache = (cacheWrite1 != null ? cacheWrite1 : 0) + (cacheRead1 != null ? cacheRead1 : 0);
+		assertThat(cacheRead2).as("Second request should read the same total cache").isEqualTo(expectedTotalCache);
+
+		// The combined cache should be substantial (tools + system > 3000 tokens)
+		assertThat(cacheRead2).as("Combined cache should be substantial").isGreaterThan(3000);
+	}
+
+	@Test
+	void testConversationHistoryPromptCachingWithClaude() {
+		// NOTE: Conversation history caching is verified to work with Claude models
+		// Amazon Nova models theoretically support this but haven't been verified in
+		// tests
+		String model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0";
+
+		// Create a large system prompt to contribute to total token count
+		// Need 1024+ tokens total for caching to activate
+		String systemPrompt = """
+				You are a helpful AI assistant with expertise in career counseling and professional development.
+				You remember details from our conversation and use them to provide personalized responses.
+				Always acknowledge information shared by the user in previous messages when relevant to the current question.
+				Your advice should be specific, actionable, and tailored to the user's background, industry, and goals.
+				When providing career guidance, consider market trends, skill development, networking, and work-life balance.
+				""";
+
+		// Repeat system prompt to ensure we have enough tokens (need 1024+ total)
+		// Claude tokenizes efficiently, so we need many repetitions
+		StringBuilder largeSystemPromptBuilder = new StringBuilder();
+		for (int i = 0; i < 15; i++) {
+			largeSystemPromptBuilder.append(systemPrompt);
+		}
+		String largeSystemPrompt = largeSystemPromptBuilder.toString();
+
+		// Build conversation history with multiple turns to exceed token minimum
+		// Each turn adds context that should be cached
+		List<Message> conversationHistory = new ArrayList<>();
+		conversationHistory.add(new SystemMessage(largeSystemPrompt));
+		conversationHistory
+			.add(new UserMessage("My name is Alice and I work as a data scientist at TechCorp in San Francisco."));
+		conversationHistory.add(new AssistantMessage("Nice to meet you, Alice! It's great to hear you work as a "
+				+ "data scientist at TechCorp in San Francisco. Data science is such an exciting field. "
+				+ "How long have you been working there?"));
+		conversationHistory.add(new UserMessage(
+				"I've been there for 3 years. I specialize in machine learning and natural language processing."));
+		conversationHistory.add(new AssistantMessage("That's wonderful, Alice! Three years at TechCorp working on ML "
+				+ "and NLP is impressive. Those are cutting-edge areas of data science. "
+				+ "What kind of NLP projects do you typically work on?"));
+		conversationHistory.add(new UserMessage(
+				"Recently I've been building a recommendation system that analyzes user behavior and preferences."));
+		conversationHistory
+			.add(new AssistantMessage("A recommendation system is a fantastic application of your ML and NLP skills! "
+					+ "Analyzing user behavior and preferences can really enhance user experience. "
+					+ "Are you using collaborative filtering, content-based methods, or hybrid approaches?"));
+
+		// NOW add the current user question with CONVERSATION_HISTORY caching enabled
+		// This will cache all previous conversation turns
+		conversationHistory
+			.add(new UserMessage("Based on what I've told you about my work, what career advice would you give me?"));
+
+		BedrockCacheOptions cacheOptions = BedrockCacheOptions.builder()
+			.strategy(BedrockCacheStrategy.CONVERSATION_HISTORY)
+			.build();
+
+		BedrockChatOptions chatOptions = BedrockChatOptions.builder()
+			.model(model)
+			.cacheOptions(cacheOptions)
+			.maxTokens(500)
+			.build();
+
+		// First request - should create cache for conversation history
+		ChatResponse response1 = this.chatModel.call(new Prompt(conversationHistory, chatOptions));
+
+		// Verify first response is valid
+		assertThat(response1.getResults()).hasSize(1);
+		assertThat(response1.getResult().getOutput().getText()).isNotEmpty();
+
+		// Verify response references the context (Alice, data scientist, etc.)
+		String responseText1 = response1.getResult().getOutput().getText().toLowerCase();
+		logger.info("First response: {}", responseText1);
+
+		// Extract cache metrics from first request
+		Integer cacheWrite1 = response1.getMetadata().get("cacheWriteInputTokens");
+		Integer cacheRead1 = response1.getMetadata().get("cacheReadInputTokens");
+		logger.info("First request - cacheWriteInputTokens: {}, cacheReadInputTokens: {}", cacheWrite1, cacheRead1);
+
+		// The first request may either:
+		// 1. Create a new cache (cacheWrite > 0, cacheRead = 0) if no prior cache
+		// exists
+		// 2. Use existing cache (cacheRead > 0) if previous test ran within 5min TTL
+		int firstRequestCache = (cacheWrite1 != null ? cacheWrite1 : 0) + (cacheRead1 != null ? cacheRead1 : 0);
+		assertThat(firstRequestCache).as("First request should either write or read from cache").isPositive();
+
+		// Second request: Continue the conversation with a follow-up question
+		// The conversation history should be read from cache
+		List<Message> extendedConversation = new ArrayList<>(conversationHistory);
+		extendedConversation.add(response1.getResult().getOutput()); // Add assistant's
+																		// response
+		extendedConversation.add(new UserMessage("What skills should I focus on developing to advance in my career?"));
+
+		ChatResponse response2 = this.chatModel.call(new Prompt(extendedConversation, chatOptions));
+
+		// Verify second response is valid
+		assertThat(response2.getResults()).hasSize(1);
+		assertThat(response2.getResult().getOutput().getText()).isNotEmpty();
+
+		// Verify cache read tokens are present
+		Integer cacheRead2 = response2.getMetadata().get("cacheReadInputTokens");
+		logger.info("Second request - cacheReadInputTokens: {}", cacheRead2);
+		assertThat(cacheRead2).as("Second request should read conversation history from cache")
+			.isNotNull()
+			.isPositive();
+
+		// The cache should be substantial (conversation history > 500 tokens)
+		assertThat(cacheRead2).as("Conversation cache should be substantial").isGreaterThan(500);
+	}
+
+	/**
+	 * Helper method to create multiple tool callbacks to exceed 1K token minimum for
+	 * caching. Creates 5 different weather-related tools with verbose descriptions to
+	 * ensure sufficient token count for Claude models (which tokenize more efficiently
+	 * than Nova models).
+	 */
+	private List<FunctionToolCallback> createLargeToolCallbacks() {
+		return List.of(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService()).description("""
+				Get the current weather conditions for a specific location anywhere in the world.
+				This comprehensive weather service provides real-time meteorological data including:
+				- Current temperature in Celsius and Fahrenheit with feels-like temperature
+				- Humidity levels and dew point information
+				- Atmospheric pressure readings (both sea level and station pressure)
+				- Wind speed, direction, and gusts information
+				- Cloud coverage percentage and type (cumulus, stratus, cirrus, etc.)
+				- Visibility distance in kilometers and miles
+				- Current precipitation status (rain, snow, sleet, hail)
+				- UV index and solar radiation levels
+				- Air quality index (AQI) and pollutant concentrations
+				- Sunrise and sunset times for the location
+				The service uses data from multiple meteorological stations and satellites to ensure
+				accuracy and reliability. Data is updated every 15 minutes for most locations worldwide.
+				""").inputType(MockWeatherService.Request.class).build(), FunctionToolCallback
+			.builder("getWeatherForecast", new MockWeatherService())
+			.description("""
+					Get the weather forecast for the next 7 days for a specific location with detailed predictions.
+					This advanced forecasting service provides comprehensive weather predictions including:
+					- Daily high and low temperatures with hourly breakdowns
+					- Precipitation probability percentage for each day and hour
+					- Expected precipitation amounts (rain, snow) in millimeters and inches
+					- Wind forecasts including speed, direction, and gust predictions
+					- Cloud coverage predictions and sky conditions (sunny, partly cloudy, overcast)
+					- Humidity levels and heat index/wind chill calculations
+					- Severe weather warnings and advisories if applicable
+					- Sunrise and sunset times for each day
+					- Moon phase information for planning outdoor activities
+					- Detailed text descriptions of expected conditions for each day
+					The forecast uses advanced meteorological models combining numerical weather prediction,
+					machine learning algorithms, and historical climate data to provide highly accurate
+					predictions. Forecasts are updated four times daily with improving accuracy for near-term
+					predictions and reasonable accuracy extending to 7 days out.
+					""")
+			.inputType(MockWeatherService.Request.class)
+			.build(), FunctionToolCallback.builder("getHistoricalWeather", new MockWeatherService()).description("""
+					Get historical weather data for a specific location and date range with comprehensive analysis.
+					This powerful historical weather service provides access to decades of weather records including:
+					- Temperature records: daily highs, lows, and averages for any date range
+					- Precipitation history: rainfall and snowfall amounts with accumulation totals
+					- Temperature trend analysis comparing to long-term averages and records
+					- Extreme weather events: heat waves, cold snaps, severe storms in the time period
+					- Climate comparisons showing how conditions compare to historical norms
+					- Monthly and seasonal summaries with statistical analysis
+					- Detailed day-by-day weather observations from official weather stations
+					- Notable weather events and their impacts during the requested time period
+					The historical data is sourced from official meteorological agencies and weather stations
+					with records extending back multiple decades. This tool is invaluable for understanding
+					climate trends, planning activities based on historical patterns, agricultural planning,
+					research purposes, and understanding how current weather compares to historical context.
+					Data quality indicators are provided to show the reliability of older records.
+					""").inputType(MockWeatherService.Request.class).build(),
+				FunctionToolCallback.builder("getWeatherAlerts", new MockWeatherService())
+					.description(
+							"""
+									Get active weather alerts and warnings for a specific location with critical safety information.
+									This essential safety service provides real-time alerts from official meteorological services including:
+									- Severe thunderstorm warnings with timing and intensity information
+									- Tornado warnings and watches with affected areas and safety instructions
+									- Hurricane and tropical storm alerts with projected paths and wind speeds
+									- Flash flood warnings and flood watches with affected waterways
+									- Winter storm warnings including snow, ice, and blizzard conditions
+									- Heat advisories and excessive heat warnings with health recommendations
+									- Wind advisories and high wind warnings with expected peak gusts
+									- Dense fog advisories affecting visibility and travel
+									- Air quality alerts for unhealthy pollution levels
+									- Fire weather warnings for dangerous wildfire conditions
+									Each alert includes the official alert level (advisory, watch, warning), affected geographic
+									areas, start and end times, detailed descriptions of the hazard, recommended actions for
+									safety, and contact information for local emergency management. Alerts are issued by
+									official national weather services and are updated in real-time as conditions evolve.
+									This service is critical for public safety and emergency preparedness.
+									""")
+					.inputType(MockWeatherService.Request.class)
+					.build(),
+				FunctionToolCallback.builder("getClimateData", new MockWeatherService()).description("""
+						Get long-term climate data and comprehensive statistics for a specific location.
+						This climate analysis service provides in-depth climatological information including:
+						- Long-term average temperatures: monthly and annual means over 30+ year periods
+						- Precipitation patterns: average rainfall and snowfall by month and season
+						- Seasonal trend analysis showing typical weather patterns throughout the year
+						- Climate classification according to Köppen-Geiger system
+						- Record high and low temperatures for each month with dates
+						- Average humidity levels, cloud coverage, and sunshine hours
+						- Wind patterns including prevailing wind directions and average speeds
+						- Growing season length and frost dates important for agriculture
+						- Climate change indicators showing temperature and precipitation trends
+						- Extreme weather frequency: how often severe events typically occur
+						- Comparison with global and regional climate averages
+						- Microclimate variations within the region based on elevation and geography
+						- Best and worst months for various outdoor activities based on climate
+						This comprehensive climate data is essential for long-term planning, understanding regional
+						climate characteristics, agricultural planning, construction projects, tourism planning,
+						and understanding local climate change impacts. Data is derived from decades of official
+						meteorological observations and is continuously updated as new climate normals are established.
+						""").inputType(MockWeatherService.Request.class).build());
+	}
+
 	record ActorsFilmsRecord(String actor, List<String> movies) {
 
 	}
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/bedrock-converse.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/bedrock-converse.adoc
index 2be1b4ed86b..d281a4568f3 100644
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/bedrock-converse.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/bedrock-converse.adoc
@@ -133,6 +133,593 @@ String response = ChatClient.create(this.chatModel)
     .content();
 ----
 
+== Prompt Caching
+
+AWS Bedrock's https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html[prompt caching feature] allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions.
+When you cache a prompt, subsequent identical requests can reuse the cached content, significantly reducing the number of input tokens processed.
+
+[NOTE]
+====
+*Supported Models*
+
+Prompt caching is supported on Claude 3.x, Claude 4.x, and Amazon Nova models available through AWS Bedrock.
+
+*Token Requirements*
+
+Different models have different minimum token thresholds for cache effectiveness:
+- Claude Sonnet 4 and most models: 1024+ tokens
+- Model-specific requirements may vary - consult AWS Bedrock documentation
+====
+
+=== Cache Strategies
+
+Spring AI provides strategic cache placement through the `BedrockCacheStrategy` enum:
+
+* `NONE`: Disables prompt caching completely (default)
+* `SYSTEM_ONLY`: Caches only the system message content
+* `TOOLS_ONLY`: Caches tool definitions only (Claude models only)
+* `SYSTEM_AND_TOOLS`: Caches both system message and tool definitions (Claude models only)
+* `CONVERSATION_HISTORY`: Caches entire conversation history in chat memory scenarios
+
+This strategic approach ensures optimal cache breakpoint placement while staying within AWS Bedrock's 4-breakpoint limit.
+
+[NOTE]
+====
+*Amazon Nova Limitations*
+
+Amazon Nova models (Nova Micro, Lite, Pro, Premier) only support caching for `system` and `messages` content.
+They do **not** support caching for `tools`.
+
+If you attempt to use `TOOLS_ONLY` or `SYSTEM_AND_TOOLS` strategies with Nova models, AWS will return a `ValidationException`.
+Use `SYSTEM_ONLY` strategy for Amazon Nova models.
+====
+
+=== Enabling Prompt Caching
+
+Enable prompt caching by setting `cacheOptions` on `BedrockChatOptions` and choosing a `strategy`.
+
+==== System-Only Caching
+
+The most common use case - cache system instructions across multiple requests:
+
+[source,java]
+----
+// Cache system message content
+ChatResponse response = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage("You are a helpful AI assistant with extensive knowledge..."),
+            new UserMessage("What is machine learning?")
+        ),
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                .build())
+            .maxTokens(500)
+            .build()
+    )
+);
+----
+
+==== Tools-Only Caching
+
+Cache large tool definitions while keeping system prompts dynamic (Claude models only):
+
+[source,java]
+----
+// Cache tool definitions only
+ChatResponse response = chatModel.call(
+    new Prompt(
+        "What's the weather in San Francisco?",
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.TOOLS_ONLY)
+                .build())
+            .toolCallbacks(weatherToolCallbacks)  // Large tool definitions
+            .maxTokens(500)
+            .build()
+    )
+);
+----
+
+NOTE: This strategy is only supported on Claude models.
+Amazon Nova models will return a `ValidationException`.
+
+==== System and Tools Caching
+
+Cache both system instructions and tool definitions for maximum reuse (Claude models only):
+
+[source,java]
+----
+// Cache system message and tool definitions
+ChatResponse response = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage("You are a weather analysis assistant..."),
+            new UserMessage("What's the weather like in Tokyo?")
+        ),
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_AND_TOOLS)
+                .build())
+            .toolCallbacks(weatherToolCallbacks)
+            .maxTokens(500)
+            .build()
+    )
+);
+----
+
+NOTE: This strategy uses 2 cache breakpoints (one for tools, one for system).
+Only supported on Claude models.
+
+==== Conversation History Caching
+
+Cache growing conversation history for multi-turn chatbots and assistants:
+
+[source,java]
+----
+// Cache conversation history with ChatClient and memory
+ChatClient chatClient = ChatClient.builder(chatModel)
+    .defaultSystem("You are a personalized career counselor...")
+    .defaultAdvisors(MessageChatMemoryAdvisor.builder(chatMemory)
+        .conversationId(conversationId)
+        .build())
+    .build();
+
+String response = chatClient.prompt()
+    .user("What career advice would you give me?")
+    .options(BedrockChatOptions.builder()
+        .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+        .cacheOptions(BedrockCacheOptions.builder()
+            .strategy(BedrockCacheStrategy.CONVERSATION_HISTORY)
+            .build())
+        .maxTokens(500)
+        .build())
+    .call()
+    .content();
+----
+
+==== Using ChatClient Fluent API
+
+[source,java]
+----
+String response = ChatClient.create(chatModel)
+    .prompt()
+    .system("You are an expert document analyst...")
+    .user("Analyze this large document: " + document)
+    .options(BedrockChatOptions.builder()
+        .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+        .cacheOptions(BedrockCacheOptions.builder()
+            .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+            .build())
+        .build())
+    .call()
+    .content();
+----
+
+=== Usage Example
+
+Here's a complete example demonstrating prompt caching with cost tracking:
+
+[source,java]
+----
+// Create system content that will be reused multiple times
+String largeSystemPrompt = "You are an expert software architect specializing in distributed systems...";
+// (Ensure this is 1024+ tokens for cache effectiveness)
+
+// First request - creates cache
+ChatResponse firstResponse = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage(largeSystemPrompt),
+            new UserMessage("What is microservices architecture?")
+        ),
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                .build())
+            .maxTokens(500)
+            .build()
+    )
+);
+
+// Access cache-related token usage from metadata
+Integer cacheWrite1 = (Integer) firstResponse.getMetadata()
+    .getMetadata()
+    .get("cacheWriteInputTokens");
+Integer cacheRead1 = (Integer) firstResponse.getMetadata()
+    .getMetadata()
+    .get("cacheReadInputTokens");
+
+System.out.println("Cache creation tokens: " + cacheWrite1);
+System.out.println("Cache read tokens: " + cacheRead1);
+
+// Second request with same system prompt - reads from cache
+ChatResponse secondResponse = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage(largeSystemPrompt),  // Same prompt - cache hit
+            new UserMessage("What are the benefits of event sourcing?")
+        ),
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                .build())
+            .maxTokens(500)
+            .build()
+    )
+);
+
+Integer cacheWrite2 = (Integer) secondResponse.getMetadata()
+    .getMetadata()
+    .get("cacheWriteInputTokens");
+Integer cacheRead2 = (Integer) secondResponse.getMetadata()
+    .getMetadata()
+    .get("cacheReadInputTokens");
+
+System.out.println("Cache creation tokens: " + cacheWrite2); // Should be 0
+System.out.println("Cache read tokens: " + cacheRead2);      // Should be > 0
+----
+
+=== Token Usage Tracking
+
+AWS Bedrock provides cache-specific metrics through the response metadata.
+Cache metrics are accessible via the metadata Map:
+
+[source,java]
+----
+ChatResponse response = chatModel.call(/* ... */);
+
+// Access cache metrics from metadata Map
+Integer cacheWrite = (Integer) response.getMetadata()
+    .getMetadata()
+    .get("cacheWriteInputTokens");
+Integer cacheRead = (Integer) response.getMetadata()
+    .getMetadata()
+    .get("cacheReadInputTokens");
+----
+
+Cache-specific metrics include:
+
+* `cacheWriteInputTokens`: Returns the number of tokens used when creating a cache entry
+* `cacheReadInputTokens`: Returns the number of tokens read from an existing cache entry
+
+When you first send a cached prompt:
+- `cacheWriteInputTokens` will be greater than 0
+- `cacheReadInputTokens` will be 0
+
+When you send the same cached prompt again (within 5-minute TTL):
+- `cacheWriteInputTokens` will be 0
+- `cacheReadInputTokens` will be greater than 0
+
+=== Real-World Use Cases
+
+==== Legal Document Analysis
+
+Analyze large legal contracts or compliance documents efficiently by caching document content across multiple questions:
+
+[source,java]
+----
+// Load a legal contract (PDF or text)
+String legalContract = loadDocument("merger-agreement.pdf"); // ~3000 tokens
+
+// System prompt with legal expertise
+String legalSystemPrompt = "You are an expert legal analyst specializing in corporate law. " +
+    "Analyze the following contract and provide precise answers about terms, obligations, and risks: " +
+    legalContract;
+
+// First analysis - creates cache
+ChatResponse riskAnalysis = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage(legalSystemPrompt),
+            new UserMessage("What are the key termination clauses and associated penalties?")
+        ),
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                .build())
+            .maxTokens(1000)
+            .build()
+    )
+);
+
+// Subsequent questions reuse cached document - 90% cost savings
+ChatResponse obligationAnalysis = chatModel.call(
+    new Prompt(
+        List.of(
+            new SystemMessage(legalSystemPrompt), // Same content - cache hit
+            new UserMessage("List all financial obligations and payment schedules.")
+        ),
+        BedrockChatOptions.builder()
+            .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                .build())
+            .maxTokens(1000)
+            .build()
+    )
+);
+----
+
+==== Batch Code Review
+
+Process multiple code files with consistent review criteria while caching the review guidelines:
+
+[source,java]
+----
+// Define comprehensive code review guidelines
+String reviewGuidelines = """
+    You are a senior software engineer conducting code reviews. Apply these criteria:
+    - Security vulnerabilities and best practices
+    - Performance optimizations and memory usage
+    - Code maintainability and readability
+    - Testing coverage and edge cases
+    - Design patterns and architecture compliance
+    """;
+
+List<String> codeFiles = Arrays.asList(
+    "UserService.java", "PaymentController.java", "SecurityConfig.java"
+);
+
+List<String> reviews = new ArrayList<>();
+
+for (String filename : codeFiles) {
+    String sourceCode = loadSourceFile(filename);
+
+    ChatResponse review = chatModel.call(
+        new Prompt(
+            List.of(
+                new SystemMessage(reviewGuidelines), // Cached across all reviews
+                new UserMessage("Review this " + filename + " code:\n\n" + sourceCode)
+            ),
+            BedrockChatOptions.builder()
+                .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+                .cacheOptions(BedrockCacheOptions.builder()
+                    .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                    .build())
+                .maxTokens(800)
+                .build()
+        )
+    );
+
+    reviews.add(review.getResult().getOutput().getText());
+}
+
+// Guidelines cached after first request, subsequent reviews are faster and cheaper
+----
+
+==== Customer Support with Knowledge Base
+
+Create a customer support system that caches your product knowledge base for consistent, accurate responses:
+
+[source,java]
+----
+// Load comprehensive product knowledge
+String knowledgeBase = """
+    PRODUCT DOCUMENTATION:
+    - API endpoints and authentication methods
+    - Common troubleshooting procedures
+    - Billing and subscription details
+    - Integration guides and examples
+    - Known issues and workarounds
+    """ + loadProductDocs(); // ~2500 tokens
+
+@Service
+public class CustomerSupportService {
+
+    public String handleCustomerQuery(String customerQuery, String customerId) {
+        ChatResponse response = chatModel.call(
+            new Prompt(
+                List.of(
+                    new SystemMessage("You are a helpful customer support agent. " +
+                        "Use this knowledge base to provide accurate solutions: " + knowledgeBase),
+                    new UserMessage("Customer " + customerId + " asks: " + customerQuery)
+                ),
+                BedrockChatOptions.builder()
+                    .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+                    .cacheOptions(BedrockCacheOptions.builder()
+                        .strategy(BedrockCacheStrategy.SYSTEM_ONLY)
+                        .build())
+                    .maxTokens(600)
+                    .build()
+            )
+        );
+
+        return response.getResult().getOutput().getText();
+    }
+}
+
+// Knowledge base is cached across all customer queries
+// Multiple support agents can benefit from the same cached content
+----
+
+==== Multi-Tenant SaaS Application
+
+Cache shared tool definitions across different tenants while customizing system prompts per tenant:
+
+[source,java]
+----
+// Shared tool definitions (cached once, used across all tenants)
+List<FunctionToolCallback> sharedTools = createLargeToolRegistry(); // ~2000 tokens
+
+// Tenant-specific configuration
+@Service
+public class MultiTenantAIService {
+
+    public String processRequest(String tenantId, String userQuery) {
+        // Load tenant-specific system prompt (changes per tenant)
+        String tenantPrompt = loadTenantSystemPrompt(tenantId);
+
+        ChatResponse response = chatModel.call(
+            new Prompt(
+                List.of(
+                    new SystemMessage(tenantPrompt), // Tenant-specific, not cached
+                    new UserMessage(userQuery)
+                ),
+                BedrockChatOptions.builder()
+                    .model("us.anthropic.claude-3-7-sonnet-20250219-v1:0")
+                    .cacheOptions(BedrockCacheOptions.builder()
+                        .strategy(BedrockCacheStrategy.TOOLS_ONLY)
+                        .build())
+                    .toolCallbacks(sharedTools) // Shared tools - cached
+                    .maxTokens(500)
+                    .build()
+            )
+        );
+
+        return response.getResult().getOutput().getText();
+    }
+}
+
+// Tools cached once, each tenant gets customized system prompt
+----
+
+=== Best Practices
+
+1. **Choose the Right Strategy**:
+   - Use `SYSTEM_ONLY` for reusable system prompts and instructions (works with all models)
+   - Use `TOOLS_ONLY` when you have large stable tools but dynamic system prompts (Claude only)
+   - Use `SYSTEM_AND_TOOLS` when both system and tools are large and stable (Claude only)
+   - Use `CONVERSATION_HISTORY` with ChatClient memory for multi-turn conversations
+   - Use `NONE` to explicitly disable caching
+
+2. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for most models).
+
+3. **Reuse Identical Content**: Caching works best with exact matches of prompt content.
+Even small changes will require a new cache entry.
+
+4. **Monitor Token Usage**: Track cache effectiveness using the metadata metrics:
+
+   Integer cacheWrite = (Integer) response.getMetadata().getMetadata().get("cacheWriteInputTokens");
+   Integer cacheRead = (Integer) response.getMetadata().getMetadata().get("cacheReadInputTokens");
+   if (cacheRead != null && cacheRead > 0) {
+       System.out.println("Cache hit: " + cacheRead + " tokens saved");
+   }
+
+5. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with AWS Bedrock's 4-breakpoint limit.
+
+6. **Cache Lifetime**: AWS Bedrock caches have a fixed 5-minute TTL (Time To Live).
+Each cache access resets the timer.
+
+7. **Model Compatibility**: Be aware of model-specific limitations:
+   - **Claude models**: Support all caching strategies
+   - **Amazon Nova models**: Only support `SYSTEM_ONLY` and `CONVERSATION_HISTORY` (tool caching not supported)
+
+8. **Tool Stability**: When using `TOOLS_ONLY`, `SYSTEM_AND_TOOLS`, or `CONVERSATION_HISTORY` strategies, ensure tools remain stable.
+Changing tool definitions will invalidate all downstream cache breakpoints due to cascade invalidation.
+
+=== Cache Invalidation and Cascade Behavior
+
+AWS Bedrock follows a hierarchical cache model with cascade invalidation:
+
+**Cache Hierarchy**: `Tools → System → Messages`
+
+Changes at each level invalidate that level and all subsequent levels:
+
+[cols="1,1,1,1", stripes=even]
+|====
+| What Changes | Tools Cache | System Cache | Messages Cache
+
+| Tools | ❌ Invalid | ❌ Invalid | ❌ Invalid
+| System | ✅ Valid | ❌ Invalid | ❌ Invalid
+| Messages | ✅ Valid | ✅ Valid | ❌ Invalid
+|====
+
+**Example with `SYSTEM_AND_TOOLS` strategy**:
+
+[source,java]
+----
+// Request 1: Cache both tools and system
+ChatResponse r1 = chatModel.call(
+    new Prompt(
+        List.of(new SystemMessage("System prompt"), new UserMessage("Question")),
+        BedrockChatOptions.builder()
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_AND_TOOLS)
+                .build())
+            .toolCallbacks(tools)
+            .build()
+    )
+);
+// Result: Both caches created
+
+// Request 2: Change only system prompt (tools same)
+ChatResponse r2 = chatModel.call(
+    new Prompt(
+        List.of(new SystemMessage("DIFFERENT system prompt"), new UserMessage("Question")),
+        BedrockChatOptions.builder()
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_AND_TOOLS)
+                .build())
+            .toolCallbacks(tools) // SAME tools
+            .build()
+    )
+);
+// Result: Tools cache HIT (reused), system cache MISS (recreated)
+
+// Request 3: Change tools (system same as Request 2)
+ChatResponse r3 = chatModel.call(
+    new Prompt(
+        List.of(new SystemMessage("DIFFERENT system prompt"), new UserMessage("Question")),
+        BedrockChatOptions.builder()
+            .cacheOptions(BedrockCacheOptions.builder()
+                .strategy(BedrockCacheStrategy.SYSTEM_AND_TOOLS)
+                .build())
+            .toolCallbacks(newTools) // DIFFERENT tools
+            .build()
+    )
+);
+// Result: BOTH caches MISS (tools change invalidates everything downstream)
+----
+
+=== Implementation Details
+
+The prompt caching implementation in Spring AI follows these key design principles:
+
+1. **Strategic Cache Placement**: Cache breakpoints are automatically placed at optimal locations based on the chosen strategy, ensuring compliance with AWS Bedrock's 4-breakpoint limit.
+
+2. **Provider Portability**: Cache configuration is done through `BedrockChatOptions` rather than individual messages, preserving compatibility when switching between different AI providers.
+
+3. **Thread Safety**: The cache breakpoint tracking is implemented with thread-safe mechanisms to handle concurrent requests correctly.
+
+4. **UNION Type Pattern**: AWS SDK uses UNION types where cache points are added as separate blocks rather than properties.
+This is different from direct API approaches but ensures type safety and API compliance.
+
+5. **Incremental Caching**: The `CONVERSATION_HISTORY` strategy places cache breakpoints on the last user message, enabling incremental caching where each conversation turn builds on the previous cached prefix.
+
+=== Cost Considerations
+
+AWS Bedrock pricing for prompt caching (approximate, varies by model):
+
+* **Cache writes**: ~25% more expensive than base input tokens
+* **Cache reads**: ~90% cheaper (only 10% of base input token price)
+* **Break-even point**: After just 1 cache read, you've saved money
+
+**Example cost calculation**:
+
+[source,java]
+----
+// System prompt: 2000 tokens
+// User question: 50 tokens
+
+// Without caching (5 requests):
+// Cost: 5 × (2000 + 50) = 10,250 tokens at base rate
+
+// With caching (5 requests):
+// Request 1: 2000 tokens × 1.25 (cache write) + 50 = 2,550 tokens
+// Requests 2-5: 4 × (2000 × 0.10 (cache read) + 50) = 4 × 250 = 1,000 tokens
+// Total: 2,550 + 1,000 = 3,550 tokens equivalent
+
+// Savings: (10,250 - 3,550) / 10,250 = 65% cost reduction
+----
+
 == Tool Calling
 
 The Bedrock Converse API supports tool calling capabilities, allowing models to use tools during conversations.