diff --git a/docs/USAGE.md b/docs/USAGE.md
index db64d4ad5..ed00a450d 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -451,6 +451,19 @@ models:
 | `role_session_name` | string | Session name for assumed role | cagent-bedrock-session |
 | `external_id` | string | External ID for role assumption | (none) |
 | `endpoint_url` | string | Custom endpoint (VPC/testing) | (none) |
+| `interleaved_thinking` | bool | Enable reasoning during tool calls (requires thinking_budget) | false |
+| `disable_prompt_caching` | bool | Disable automatic prompt caching | false |
+
+#### Prompt Caching (Bedrock)
+
+Prompt caching is automatically enabled for models that support it (detected via models.dev) to reduce latency and costs. System prompts, tool definitions, and recent messages are cached with a 5-minute TTL.
+
+To disable:
+
+```yaml
+provider_opts:
+  disable_prompt_caching: true
+```
 
 **Supported models (via Converse API):**
 
diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go
index cd2ed9ff0..e8b11287a 100644
--- a/pkg/model/provider/bedrock/client.go
+++ b/pkg/model/provider/bedrock/client.go
@@ -20,13 +20,15 @@ import (
 	"github.com/docker/cagent/pkg/environment"
 	"github.com/docker/cagent/pkg/model/provider/base"
 	"github.com/docker/cagent/pkg/model/provider/options"
+	"github.com/docker/cagent/pkg/modelsdev"
 	"github.com/docker/cagent/pkg/tools"
 )
 
 // Client represents a Bedrock client wrapper implementing provider.Provider
 type Client struct {
 	base.Config
-	bedrockClient *bedrockruntime.Client
+	bedrockClient    *bedrockruntime.Client
+	cachingSupported bool // Cached at init time for efficiency
 }
 
 // bearerTokenTransport adds Authorization header with bearer token to requests
@@ -40,7 +42,6 @@ func (t *bearerTokenTransport) RoundTrip(req *http.Request) (*http.Response, err
 	return t.base.RoundTrip(req)
 }
 
-// NewClient creates a new Bedrock client from the provided configuration
 func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider, opts ...options.Opt) (*Client, error) {
 	if cfg == nil {
 		slog.Error("Bedrock client creation failed", "error", "model configuration is required")
@@ -109,7 +110,14 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 
 	bedrockClient := bedrockruntime.NewFromConfig(awsCfg, clientOpts...)
 
-	slog.Debug("Bedrock client created successfully", "model", cfg.Model, "region", awsCfg.Region)
+	// Detect prompt caching capability at init time for efficiency.
+	// Uses models.dev cache pricing as proxy for capability detection.
+	cachingSupported := detectCachingSupport(ctx, cfg.Model)
+
+	slog.Debug("Bedrock client created successfully",
+		"model", cfg.Model,
+		"region", awsCfg.Region,
+		"caching_supported", cachingSupported)
 
 	return &Client{
 		Config: base.Config{
@@ -117,11 +125,32 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Pro
 			ModelOptions: globalOptions,
 			Env:          env,
 		},
-		bedrockClient: bedrockClient,
+		bedrockClient:    bedrockClient,
+		cachingSupported: cachingSupported,
 	}, nil
 }
 
-// buildAWSConfig creates AWS config with proper credentials using the default credential chain
+// detectCachingSupport checks if a model supports prompt caching using models.dev data.
+// Models with non-zero CacheRead/CacheWrite costs support prompt caching.
+// Returns false on lookup failure (safe default for unsupported models).
+func detectCachingSupport(ctx context.Context, model string) bool {
+	store, err := modelsdev.NewStore()
+	if err != nil {
+		slog.Debug("Bedrock models store unavailable, prompt caching disabled", "error", err)
+		return false
+	}
+
+	modelID := "amazon-bedrock/" + model
+	m, err := store.GetModel(ctx, modelID)
+	if err != nil {
+		slog.Debug("Bedrock prompt caching disabled: model not found in models.dev",
+			"model_id", modelID, "error", err)
+		return false
+	}
+
+	return m.Cost != nil && (m.Cost.CacheRead > 0 || m.Cost.CacheWrite > 0)
+}
+
 func buildAWSConfig(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider) (aws.Config, error) {
 	var configOpts []func(*config.LoadOptions) error
 
@@ -169,7 +198,6 @@ func buildAWSConfig(ctx context.Context, cfg *latest.ModelConfig, env environmen
 	return awsCfg, nil
 }
 
-// CreateChatCompletionStream creates a streaming chat completion request
 func (c *Client) CreateChatCompletionStream(
 	ctx context.Context,
 	messages []chat.Message,
@@ -198,21 +226,22 @@ func (c *Client) CreateChatCompletionStream(
 	return newStreamAdapter(output.GetStream(), c.ModelConfig.Model, trackUsage), nil
 }
 
-// buildConverseStreamInput creates the ConverseStream input parameters
 func (c *Client) buildConverseStreamInput(messages []chat.Message, requestTools []tools.Tool) *bedrockruntime.ConverseStreamInput {
 	input := &bedrockruntime.ConverseStreamInput{
 		ModelId: aws.String(c.ModelConfig.Model),
 	}
 
+	enableCaching := c.promptCachingEnabled()
+
 	// Convert and set messages (excluding system)
-	input.Messages, input.System = convertMessages(messages)
+	input.Messages, input.System = convertMessages(messages, enableCaching)
 
 	// Set inference configuration
 	input.InferenceConfig = c.buildInferenceConfig()
 
 	// Convert and set tools
 	if len(requestTools) > 0 {
-		input.ToolConfig = convertToolConfig(requestTools)
+		input.ToolConfig = convertToolConfig(requestTools, enableCaching)
 	}
 
 	// Set extended thinking configuration for Claude models
@@ -223,7 +252,6 @@ func (c *Client) buildConverseStreamInput(messages []chat.Message, requestTools
 	return input
 }
 
-// buildInferenceConfig creates the inference configuration
 func (c *Client) buildInferenceConfig() *types.InferenceConfiguration {
 	cfg := &types.InferenceConfiguration{}
 
@@ -247,8 +275,8 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration {
 	return cfg
 }
 
-// isThinkingEnabled checks if extended thinking will be enabled for this request.
-// This mirrors the validation logic in buildAdditionalModelRequestFields.
+// isThinkingEnabled mirrors the validation in buildAdditionalModelRequestFields
+// to determine if thinking params will affect inference config (temp/topP constraints).
 func (c *Client) isThinkingEnabled() bool {
 	if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
 		return false
@@ -269,13 +297,18 @@ func (c *Client) isThinkingEnabled() bool {
 	return true
 }
 
-// interleavedThinkingEnabled returns true when provider_opts.interleaved_thinking is set.
 func (c *Client) interleavedThinkingEnabled() bool {
 	return getProviderOpt[bool](c.ModelConfig.ProviderOpts, "interleaved_thinking")
 }
 
-// buildAdditionalModelRequestFields creates model-specific parameters.
-// Used for extended thinking (reasoning) configuration on Claude models.
+func (c *Client) promptCachingEnabled() bool {
+	if getProviderOpt[bool](c.ModelConfig.ProviderOpts, "disable_prompt_caching") {
+		return false
+	}
+	return c.cachingSupported
+}
+
+// buildAdditionalModelRequestFields configures Claude's extended thinking (reasoning) mode.
 func (c *Client) buildAdditionalModelRequestFields() document.Interface {
 	if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
 		return nil
@@ -316,7 +349,6 @@ func (c *Client) buildAdditionalModelRequestFields() document.Interface {
 	return document.NewLazyDocument(fields)
 }
 
-// getProviderOpt extracts a typed value from provider_opts
 func getProviderOpt[T any](opts map[string]any, key string) T {
 	var zero T
 	if opts == nil {
@@ -328,6 +360,11 @@ func getProviderOpt[T any](opts map[string]any, key string) T {
 	}
 	typed, ok := v.(T)
 	if !ok {
+		slog.Warn("Bedrock provider_opts type mismatch",
+			"key", key,
+			"expected_type", fmt.Sprintf("%T", zero),
+			"actual_type", fmt.Sprintf("%T", v),
+			"value", v)
 		return zero
 	}
 	return typed
diff --git a/pkg/model/provider/bedrock/client_test.go b/pkg/model/provider/bedrock/client_test.go
index fa4220653..57920eee8 100644
--- a/pkg/model/provider/bedrock/client_test.go
+++ b/pkg/model/provider/bedrock/client_test.go
@@ -26,7 +26,7 @@ func TestConvertMessages_UserText(t *testing.T) {
 		Content: "Hello, world!",
 	}}
 
-	bedrockMsgs, system := convertMessages(msgs)
+	bedrockMsgs, system := convertMessages(msgs, false)
 
 	require.Len(t, bedrockMsgs, 1)
 	assert.Empty(t, system)
@@ -46,7 +46,7 @@ func TestConvertMessages_SystemExtraction(t *testing.T) {
 		{Role: chat.MessageRoleUser, Content: "Hi"},
 	}
 
-	bedrockMsgs, system := convertMessages(msgs)
+	bedrockMsgs, system := convertMessages(msgs, false)
 
 	require.Len(t, bedrockMsgs, 1) // Only user message
 	require.Len(t, system, 1)      // System extracted
@@ -71,7 +71,7 @@ func TestConvertMessages_AssistantWithToolCalls(t *testing.T) {
 		}},
 	}}
 
-	bedrockMsgs, _ := convertMessages(msgs)
+	bedrockMsgs, _ := convertMessages(msgs, false)
 
 	require.Len(t, bedrockMsgs, 1)
 	require.Len(t, bedrockMsgs[0].Content, 1)
@@ -92,7 +92,7 @@ func TestConvertMessages_ToolResult(t *testing.T) {
 		Content:    "Weather is sunny",
 	}}
 
-	bedrockMsgs, _ := convertMessages(msgs)
+	bedrockMsgs, _ := convertMessages(msgs, false)
 
 	require.Len(t, bedrockMsgs, 1)
 	assert.Equal(t, types.ConversationRoleUser, bedrockMsgs[0].Role)
@@ -111,7 +111,7 @@ func TestConvertMessages_EmptyContent(t *testing.T) {
 		{Role: chat.MessageRoleUser, Content: "   "},
 	}
 
-	bedrockMsgs, _ := convertMessages(msgs)
+	bedrockMsgs, _ := convertMessages(msgs, false)
 	assert.Empty(t, bedrockMsgs)
 }
 
@@ -129,7 +129,7 @@ func TestConvertToolConfig(t *testing.T) {
 		},
 	}}
 
-	config := convertToolConfig(requestTools)
+	config := convertToolConfig(requestTools, false)
 
 	require.NotNil(t, config)
 	require.Len(t, config.Tools, 1)
@@ -143,10 +143,10 @@ func TestConvertToolConfig(t *testing.T) {
 func TestConvertToolConfig_Empty(t *testing.T) {
 	t.Parallel()
 
-	config := convertToolConfig(nil)
+	config := convertToolConfig(nil, false)
 	assert.Nil(t, config)
 
-	config = convertToolConfig([]tools.Tool{})
+	config = convertToolConfig([]tools.Tool{}, false)
 	assert.Nil(t, config)
 }
 
@@ -176,7 +176,7 @@ func TestConvertMessages_MultiContent(t *testing.T) {
 		},
 	}}
 
-	bedrockMsgs, _ := convertMessages(msgs)
+	bedrockMsgs, _ := convertMessages(msgs, false)
 
 	require.Len(t, bedrockMsgs, 1)
 	require.Len(t, bedrockMsgs[0].Content, 2)
@@ -200,7 +200,7 @@ func TestConvertMessages_ConsecutiveToolResults(t *testing.T) {
 		{Role: chat.MessageRoleUser, Content: "Continue"},
 	}
 
-	bedrockMsgs, _ := convertMessages(msgs)
+	bedrockMsgs, _ := convertMessages(msgs, false)
 
 	// Expect: user, assistant, user (grouped tool results), user
 	require.Len(t, bedrockMsgs, 4)
@@ -1074,3 +1074,275 @@ func TestConvertAssistantContent_NoThinkingWhenBothEmpty(t *testing.T) {
 	require.True(t, ok)
 	assert.Equal(t, "Here's my answer", textBlock.Value)
 }
+
+// Prompt Caching Tests
+
+func TestPromptCachingEnabled_SupportedModel(t *testing.T) {
+	t.Parallel()
+
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Model: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+			},
+		},
+		cachingSupported: true, // Detected at init time
+	}
+
+	assert.True(t, client.promptCachingEnabled())
+}
+
+func TestPromptCachingEnabled_UnsupportedModel(t *testing.T) {
+	t.Parallel()
+
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Model: "meta.llama3-8b-instruct-v1:0",
+			},
+		},
+		cachingSupported: false, // Model doesn't support caching
+	}
+
+	assert.False(t, client.promptCachingEnabled())
+}
+
+func TestPromptCachingEnabled_Disabled(t *testing.T) {
+	t.Parallel()
+
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Model: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+				ProviderOpts: map[string]any{
+					"disable_prompt_caching": true,
+				},
+			},
+		},
+		cachingSupported: true, // Model supports it, but user disabled
+	}
+
+	assert.False(t, client.promptCachingEnabled())
+}
+
+func TestPromptCachingEnabled_CachingNotSupported(t *testing.T) {
+	t.Parallel()
+
+	// Simulates scenario where detectCachingSupport returned false at init
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Model: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+			},
+		},
+		cachingSupported: false,
+	}
+
+	assert.False(t, client.promptCachingEnabled())
+}
+
+func TestConvertMessages_WithCaching(t *testing.T) {
+	t.Parallel()
+
+	msgs := []chat.Message{
+		{Role: chat.MessageRoleSystem, Content: "You are helpful"},
+		{Role: chat.MessageRoleUser, Content: "Hello"},
+		{Role: chat.MessageRoleAssistant, Content: "Hi there"},
+		{Role: chat.MessageRoleUser, Content: "How are you?"},
+	}
+
+	bedrockMsgs, system := convertMessages(msgs, true)
+
+	// System should have text block + cache point
+	require.Len(t, system, 2)
+	_, isCachePoint := system[1].(*types.SystemContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint)
+
+	// Last 2 messages should have cache points appended
+	require.Len(t, bedrockMsgs, 3)
+
+	// Last message (user) should have cache point
+	lastMsg := bedrockMsgs[2]
+	require.Len(t, lastMsg.Content, 2) // text + cache point
+	_, isCachePoint = lastMsg.Content[1].(*types.ContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint)
+
+	// Second to last message (assistant) should have cache point
+	secondLastMsg := bedrockMsgs[1]
+	require.Len(t, secondLastMsg.Content, 2) // text + cache point
+	_, isCachePoint = secondLastMsg.Content[1].(*types.ContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint)
+}
+
+func TestConvertMessages_WithoutCaching(t *testing.T) {
+	t.Parallel()
+
+	msgs := []chat.Message{
+		{Role: chat.MessageRoleSystem, Content: "You are helpful"},
+		{Role: chat.MessageRoleUser, Content: "Hello"},
+	}
+
+	bedrockMsgs, system := convertMessages(msgs, false)
+
+	// System should only have text block, no cache point
+	require.Len(t, system, 1)
+	_, isText := system[0].(*types.SystemContentBlockMemberText)
+	assert.True(t, isText)
+
+	// Message should not have cache point
+	require.Len(t, bedrockMsgs, 1)
+	require.Len(t, bedrockMsgs[0].Content, 1) // just text, no cache point
+}
+
+func TestConvertToolConfig_WithCaching(t *testing.T) {
+	t.Parallel()
+
+	requestTools := []tools.Tool{{
+		Name:        "test_tool",
+		Description: "A test tool",
+	}}
+
+	config := convertToolConfig(requestTools, true)
+
+	require.NotNil(t, config)
+	require.Len(t, config.Tools, 2) // tool spec + cache point
+
+	// Last tool should be cache point
+	_, isCachePoint := config.Tools[1].(*types.ToolMemberCachePoint)
+	assert.True(t, isCachePoint)
+}
+
+func TestConvertToolConfig_WithoutCaching(t *testing.T) {
+	t.Parallel()
+
+	requestTools := []tools.Tool{{
+		Name:        "test_tool",
+		Description: "A test tool",
+	}}
+
+	config := convertToolConfig(requestTools, false)
+
+	require.NotNil(t, config)
+	require.Len(t, config.Tools, 1) // just tool spec, no cache point
+}
+
+func TestPromptCachingEnabled_TypeMismatch(t *testing.T) {
+	t.Parallel()
+
+	client := &Client{
+		Config: base.Config{
+			ModelConfig: latest.ModelConfig{
+				Model: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+				ProviderOpts: map[string]any{
+					"disable_prompt_caching": "true", // string instead of bool
+				},
+			},
+		},
+		cachingSupported: true,
+	}
+
+	// Type mismatch returns zero value (false), so caching stays enabled
+	assert.True(t, client.promptCachingEnabled())
+}
+
+func TestDetectCachingSupport_SupportedModel(t *testing.T) {
+	t.Parallel()
+
+	// Uses real models.dev lookup to verify Claude models support caching
+	supported := detectCachingSupport(t.Context(), "anthropic.claude-3-5-sonnet-20241022-v2:0")
+	assert.True(t, supported)
+}
+
+func TestDetectCachingSupport_UnsupportedModel(t *testing.T) {
+	t.Parallel()
+
+	// Llama doesn't have cache pricing in models.dev
+	supported := detectCachingSupport(t.Context(), "meta.llama3-8b-instruct-v1:0")
+	assert.False(t, supported)
+}
+
+func TestDetectCachingSupport_UnknownModel(t *testing.T) {
+	t.Parallel()
+
+	// Unknown model should gracefully return false, not panic
+	supported := detectCachingSupport(t.Context(), "nonexistent.model.that.does.not.exist:v1")
+	assert.False(t, supported)
+}
+
+func TestConvertMessages_EmptyWithCaching(t *testing.T) {
+	t.Parallel()
+
+	// Empty message list should not panic with caching enabled
+	bedrockMsgs, system := convertMessages([]chat.Message{}, true)
+
+	assert.Empty(t, bedrockMsgs)
+	assert.Empty(t, system)
+}
+
+func TestConvertMessages_SingleMessageWithCaching(t *testing.T) {
+	t.Parallel()
+
+	msgs := []chat.Message{
+		{Role: chat.MessageRoleUser, Content: "Hello"},
+	}
+
+	bedrockMsgs, _ := convertMessages(msgs, true)
+
+	require.Len(t, bedrockMsgs, 1)
+	// Single message should get a cache point appended
+	require.Len(t, bedrockMsgs[0].Content, 2) // text + cache point
+	_, isCachePoint := bedrockMsgs[0].Content[1].(*types.ContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint)
+}
+
+func TestConvertMessages_MultiContentWithCaching(t *testing.T) {
+	t.Parallel()
+
+	msgs := []chat.Message{{
+		Role: chat.MessageRoleUser,
+		MultiContent: []chat.MessagePart{
+			{Type: chat.MessagePartTypeText, Text: "First part"},
+			{Type: chat.MessagePartTypeText, Text: "Second part"},
+		},
+	}}
+
+	bedrockMsgs, _ := convertMessages(msgs, true)
+
+	require.Len(t, bedrockMsgs, 1)
+	// 2 text blocks + cache point = 3 content blocks
+	require.Len(t, bedrockMsgs[0].Content, 3)
+	_, isCachePoint := bedrockMsgs[0].Content[2].(*types.ContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint)
+}
+
+func TestConvertMessages_ToolResultWithCaching(t *testing.T) {
+	t.Parallel()
+
+	msgs := []chat.Message{
+		{Role: chat.MessageRoleUser, Content: "Call a tool"},
+		{
+			Role: chat.MessageRoleAssistant,
+			ToolCalls: []tools.ToolCall{
+				{ID: "tool-1", Function: tools.FunctionCall{Name: "test", Arguments: "{}"}},
+			},
+		},
+		{Role: chat.MessageRoleTool, ToolCallID: "tool-1", Content: "Result"},
+	}
+
+	bedrockMsgs, _ := convertMessages(msgs, true)
+
+	// Expect: user, assistant, user (tool result)
+	require.Len(t, bedrockMsgs, 3)
+
+	// Last message (tool result as user) should have cache point
+	lastMsg := bedrockMsgs[len(bedrockMsgs)-1]
+	lastContent := lastMsg.Content[len(lastMsg.Content)-1]
+	_, isCachePoint := lastContent.(*types.ContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint, "tool result message should have cache point")
+
+	// Second to last (assistant with tool call) should also have cache point
+	secondLastMsg := bedrockMsgs[len(bedrockMsgs)-2]
+	secondLastContent := secondLastMsg.Content[len(secondLastMsg.Content)-1]
+	_, isCachePoint = secondLastContent.(*types.ContentBlockMemberCachePoint)
+	assert.True(t, isCachePoint, "assistant tool call message should have cache point")
+}
diff --git a/pkg/model/provider/bedrock/convert.go b/pkg/model/provider/bedrock/convert.go
index e15a70fd7..77bebf175 100644
--- a/pkg/model/provider/bedrock/convert.go
+++ b/pkg/model/provider/bedrock/convert.go
@@ -3,6 +3,7 @@ package bedrock
 import (
 	"encoding/base64"
 	"encoding/json"
+	"log/slog"
 	"strings"
 
 	"github.com/aws/aws-sdk-go-v2/aws"
@@ -13,13 +14,10 @@ import (
 	"github.com/docker/cagent/pkg/tools"
 )
 
-// convertMessages converts chat.Messages to Bedrock Message format
-// Returns (messages, system content blocks)
-//
-// Bedrock's Converse API requires that:
-// 1. Tool results must immediately follow the assistant message with tool_use
-// 2. Multiple consecutive tool results must be grouped into a single user message
-func convertMessages(messages []chat.Message) ([]types.Message, []types.SystemContentBlock) {
+// convertMessages handles Bedrock's Converse API constraints:
+// - Tool results must immediately follow the assistant message with tool_use
+// - Multiple consecutive tool results must be grouped into a single user message
+func convertMessages(messages []chat.Message, enableCaching bool) ([]types.Message, []types.SystemContentBlock) {
 	var bedrockMessages []types.Message
 	var systemBlocks []types.SystemContentBlock
 
@@ -93,10 +91,34 @@ func convertMessages(messages []chat.Message) ([]types.Message, []types.SystemCo
 		}
 	}
 
+	// Cache points after system and on last 2 messages optimize multi-turn conversations:
+	// the stable prefix (system + tools + older context) gets cached while recent messages change.
+	if enableCaching {
+		if len(systemBlocks) > 0 {
+			systemBlocks = append(systemBlocks, &types.SystemContentBlockMemberCachePoint{
+				Value: types.CachePointBlock{Type: types.CachePointTypeDefault},
+			})
+		}
+		applyCachePointsToMessages(bedrockMessages)
+	}
+
 	return bedrockMessages, systemBlocks
 }
 
-// convertUserContent converts user message content to Bedrock ContentBlocks
+func applyCachePointsToMessages(messages []types.Message) {
+	// Add cache points to the last 2 messages (or all if fewer exist)
+	start := max(0, len(messages)-2)
+	for i := len(messages) - 1; i >= start; i-- {
+		msg := &messages[i]
+		if len(msg.Content) == 0 {
+			continue
+		}
+		msg.Content = append(msg.Content, &types.ContentBlockMemberCachePoint{
+			Value: types.CachePointBlock{Type: types.CachePointTypeDefault},
+		})
+	}
+}
+
 func convertUserContent(msg *chat.Message) []types.ContentBlock {
 	var blocks []types.ContentBlock
 
@@ -126,7 +148,6 @@ func convertUserContent(msg *chat.Message) []types.ContentBlock {
 	return blocks
 }
 
-// convertImageURL converts an image URL to Bedrock ImageBlock
 func convertImageURL(imageURL *chat.MessageImageURL) types.ContentBlock {
 	if !strings.HasPrefix(imageURL.URL, "data:") {
 		return nil
@@ -168,7 +189,6 @@ func convertImageURL(imageURL *chat.MessageImageURL) types.ContentBlock {
 	}
 }
 
-// convertAssistantContent converts assistant message to Bedrock ContentBlocks
 func convertAssistantContent(msg *chat.Message) []types.ContentBlock {
 	var blocks []types.ContentBlock
 
@@ -223,23 +243,19 @@ func convertAssistantContent(msg *chat.Message) []types.ContentBlock {
 	return blocks
 }
 
-// mapToDocument converts a map to Bedrock document format
 func mapToDocument(m map[string]any) document.Interface {
 	return document.NewLazyDocument(m)
 }
 
-// convertToolConfig converts tools to Bedrock ToolConfiguration
-func convertToolConfig(requestTools []tools.Tool) *types.ToolConfiguration {
+func convertToolConfig(requestTools []tools.Tool, enableCaching bool) *types.ToolConfiguration {
 	if len(requestTools) == 0 {
 		return nil
 	}
 
-	toolSpecs := make([]types.Tool, len(requestTools))
-	for i, tool := range requestTools {
-		// Convert parameters to JSON schema format
+	toolSpecs := make([]types.Tool, 0, len(requestTools)+1)
+	for _, tool := range requestTools {
 		schema := convertToolSchema(tool.Parameters)
-
-		toolSpecs[i] = &types.ToolMemberToolSpec{
+		toolSpecs = append(toolSpecs, &types.ToolMemberToolSpec{
 			Value: types.ToolSpecification{
 				Name:        aws.String(tool.Name),
 				Description: aws.String(tool.Description),
@@ -247,22 +263,26 @@ func convertToolConfig(requestTools []tools.Tool) *types.ToolConfiguration {
 					Value: schema,
 				},
 			},
-		}
+		})
+	}
+
+	// Cache point after tools: tool definitions remain stable across conversation turns
+	if enableCaching {
+		toolSpecs = append(toolSpecs, &types.ToolMemberCachePoint{
+			Value: types.CachePointBlock{Type: types.CachePointTypeDefault},
+		})
 	}
 
 	return &types.ToolConfiguration{
-		Tools: toolSpecs,
-		// Auto tool choice lets the model decide
-		ToolChoice: &types.ToolChoiceMemberAuto{
-			Value: types.AutoToolChoice{},
-		},
+		Tools:      toolSpecs,
+		ToolChoice: &types.ToolChoiceMemberAuto{Value: types.AutoToolChoice{}},
 	}
 }
 
-// convertToolSchema converts tool parameters to Bedrock-compatible JSON schema
 func convertToolSchema(params any) document.Interface {
 	schema, err := tools.SchemaToMap(params)
 	if err != nil {
+		slog.Debug("Bedrock tool schema conversion failed, using empty schema", "error", err)
 		schema = map[string]any{
 			"type":       "object",
 			"properties": map[string]any{},