Skip to content

Commit ba67460

Browse files
sobychackoSenreySong
authored andcommitted
spring-projectsGH-1403: Add Anthropic prompt caching via AnthropicChatOptions
- Add cacheControl field to AnthropicChatOptions with builder method - Create AnthropicCacheType enum with EPHEMERAL type for type-safe cache creation - Update AnthropicChatModel.createRequest() to apply cache control from options to user message ContentBlocks - Extend ContentBlock record with cacheControl parameter and constructor for API compatibility - Update Usage record to include cacheCreationInputTokens and cacheReadInputTokens fields - Update StreamHelper to handle new Usage constructor with cache token parameters - Add AnthropicApiIT.chatWithPromptCache() test for low-level API validation - Add AnthropicChatModelIT.chatWithPromptCacheViaOptions() integration test - Add comprehensive unit tests for AnthropicChatOptions cache control functionality - Update documentation with cacheControl() method examples and usage patterns Cache control is configured through AnthropicChatOptions rather than message classes to maintain provider portability. The cache control gets applied during request creation in AnthropicChatModel when building ContentBlocks for user messages. Original implementation provided by @Claudio-code (Claudio Silva Junior) See spring-projects@15e5026 Fixes spring-projects#1403 Signed-off-by: Soby Chacko <[email protected]>
1 parent 315c208 commit ba67460

File tree

9 files changed

+506
-17
lines changed

9 files changed

+506
-17
lines changed

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -483,12 +483,25 @@ private Map<String, String> mergeHttpHeaders(Map<String, String> runtimeHttpHead
483483

484484
ChatCompletionRequest createRequest(Prompt prompt, boolean stream) {
485485

486+
// Get cache control from options
487+
AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions();
488+
AnthropicApi.ChatCompletionRequest.CacheControl cacheControl = (requestOptions != null)
489+
? requestOptions.getCacheControl() : null;
490+
486491
List<AnthropicMessage> userMessages = prompt.getInstructions()
487492
.stream()
488493
.filter(message -> message.getMessageType() != MessageType.SYSTEM)
489494
.map(message -> {
490495
if (message.getMessageType() == MessageType.USER) {
491-
List<ContentBlock> contents = new ArrayList<>(List.of(new ContentBlock(message.getText())));
496+
List<ContentBlock> contents = new ArrayList<>();
497+
498+
// Apply cache control if enabled for user messages
499+
if (cacheControl != null) {
500+
contents.add(new ContentBlock(message.getText(), cacheControl));
501+
}
502+
else {
503+
contents.add(new ContentBlock(message.getText()));
504+
}
492505
if (message instanceof UserMessage userMessage) {
493506
if (!CollectionUtils.isEmpty(userMessage.getMedia())) {
494507
List<ContentBlock> mediaContent = userMessage.getMedia().stream().map(media -> {
@@ -538,7 +551,6 @@ else if (message.getMessageType() == MessageType.TOOL) {
538551
ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages,
539552
systemPrompt, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream);
540553

541-
AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions();
542554
request = ModelOptionsUtils.merge(requestOptions, request, ChatCompletionRequest.class);
543555

544556
// Add the tool definitions to the request's tools parameter.

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
* @author Thomas Vitale
4545
* @author Alexandros Pappas
4646
* @author Ilayaperumal Gopinathan
47+
* @author Soby Chacko
4748
* @since 1.0.0
4849
*/
4950
@JsonInclude(Include.NON_NULL)
@@ -59,6 +60,20 @@ public class AnthropicChatOptions implements ToolCallingChatOptions {
5960
private @JsonProperty("top_k") Integer topK;
6061
private @JsonProperty("thinking") ChatCompletionRequest.ThinkingConfig thinking;
6162

63+
/**
64+
* Cache control for user messages. When set, enables caching for user messages.
65+
* Uses the existing CacheControl record from AnthropicApi.ChatCompletionRequest.
66+
*/
67+
private @JsonProperty("cache_control") ChatCompletionRequest.CacheControl cacheControl;
68+
69+
public ChatCompletionRequest.CacheControl getCacheControl() {
70+
return this.cacheControl;
71+
}
72+
73+
public void setCacheControl(ChatCompletionRequest.CacheControl cacheControl) {
74+
this.cacheControl = cacheControl;
75+
}
76+
6277
/**
6378
* Collection of {@link ToolCallback}s to be used for tool calling in the chat
6479
* completion requests.
@@ -111,6 +126,7 @@ public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions)
111126
.internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled())
112127
.toolContext(fromOptions.getToolContext() != null ? new HashMap<>(fromOptions.getToolContext()) : null)
113128
.httpHeaders(fromOptions.getHttpHeaders() != null ? new HashMap<>(fromOptions.getHttpHeaders()) : null)
129+
.cacheControl(fromOptions.getCacheControl())
114130
.build();
115131
}
116132

@@ -282,14 +298,15 @@ public boolean equals(Object o) {
282298
&& Objects.equals(this.toolNames, that.toolNames)
283299
&& Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled)
284300
&& Objects.equals(this.toolContext, that.toolContext)
285-
&& Objects.equals(this.httpHeaders, that.httpHeaders);
301+
&& Objects.equals(this.httpHeaders, that.httpHeaders)
302+
&& Objects.equals(this.cacheControl, that.cacheControl);
286303
}
287304

288305
@Override
289306
public int hashCode() {
290307
return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP,
291308
this.topK, this.thinking, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
292-
this.toolContext, this.httpHeaders);
309+
this.toolContext, this.httpHeaders, this.cacheControl);
293310
}
294311

295312
public static class Builder {
@@ -389,6 +406,14 @@ public Builder httpHeaders(Map<String, String> httpHeaders) {
389406
return this;
390407
}
391408

409+
/**
410+
* Set cache control for user messages
411+
*/
412+
public Builder cacheControl(ChatCompletionRequest.CacheControl cacheControl) {
413+
this.options.cacheControl = cacheControl;
414+
return this;
415+
}
416+
392417
public AnthropicChatOptions build() {
393418
return this.options;
394419
}

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import reactor.core.publisher.Flux;
3636
import reactor.core.publisher.Mono;
3737

38+
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
3839
import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder;
3940
import org.springframework.ai.model.ApiKey;
4041
import org.springframework.ai.model.ChatModelDescription;
@@ -65,6 +66,7 @@
6566
* @author Jonghoon Park
6667
* @author Claudio Silva Junior
6768
* @author Filip Hrisafov
69+
* @author Soby Chacko
6870
* @since 1.0.0
6971
*/
7072
public final class AnthropicApi {
@@ -557,6 +559,14 @@ public record Metadata(@JsonProperty("user_id") String userId) {
557559

558560
}
559561

562+
/**
563+
* @param type is the cache type supported by anthropic. <a href=
564+
* "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#cache-limitations">Doc</a>
565+
*/
566+
@JsonInclude(Include.NON_NULL)
567+
public record CacheControl(String type) {
568+
}
569+
560570
/**
561571
* Configuration for the model's thinking mode.
562572
*
@@ -763,8 +773,11 @@ public record ContentBlock(
763773
@JsonProperty("thinking") String thinking,
764774

765775
// Redacted Thinking only
766-
@JsonProperty("data") String data
767-
) {
776+
@JsonProperty("data") String data,
777+
778+
// cache object
779+
@JsonProperty("cache_control") CacheControl cacheControl
780+
) {
768781
// @formatter:on
769782

770783
/**
@@ -782,23 +795,27 @@ public ContentBlock(String mediaType, String data) {
782795
* @param source The source of the content.
783796
*/
784797
public ContentBlock(Type type, Source source) {
785-
this(type, source, null, null, null, null, null, null, null, null, null, null);
798+
this(type, source, null, null, null, null, null, null, null, null, null, null, null);
786799
}
787800

788801
/**
789802
* Create content block
790803
* @param source The source of the content.
791804
*/
792805
public ContentBlock(Source source) {
793-
this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null);
806+
this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null, null);
794807
}
795808

796809
/**
797810
* Create content block
798811
* @param text The text of the content.
799812
*/
800813
public ContentBlock(String text) {
801-
this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null);
814+
this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, null);
815+
}
816+
817+
public ContentBlock(String text, CacheControl cache) {
818+
this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, cache);
802819
}
803820

804821
// Tool result
@@ -809,7 +826,7 @@ public ContentBlock(String text) {
809826
* @param content The content of the tool result.
810827
*/
811828
public ContentBlock(Type type, String toolUseId, String content) {
812-
this(type, null, null, null, null, null, null, toolUseId, content, null, null, null);
829+
this(type, null, null, null, null, null, null, toolUseId, content, null, null, null, null);
813830
}
814831

815832
/**
@@ -820,7 +837,7 @@ public ContentBlock(Type type, String toolUseId, String content) {
820837
* @param index The index of the content block.
821838
*/
822839
public ContentBlock(Type type, Source source, String text, Integer index) {
823-
this(type, source, text, index, null, null, null, null, null, null, null, null);
840+
this(type, source, text, index, null, null, null, null, null, null, null, null, null);
824841
}
825842

826843
// Tool use input JSON delta streaming
@@ -832,7 +849,7 @@ public ContentBlock(Type type, Source source, String text, Integer index) {
832849
* @param input The input of the tool use.
833850
*/
834851
public ContentBlock(Type type, String id, String name, Map<String, Object> input) {
835-
this(type, null, null, null, id, name, input, null, null, null, null, null);
852+
this(type, null, null, null, id, name, input, null, null, null, null, null, null);
836853
}
837854

838855
/**
@@ -1026,7 +1043,9 @@ public record ChatCompletionResponse(
10261043
public record Usage(
10271044
// @formatter:off
10281045
@JsonProperty("input_tokens") Integer inputTokens,
1029-
@JsonProperty("output_tokens") Integer outputTokens) {
1046+
@JsonProperty("output_tokens") Integer outputTokens,
1047+
@JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens,
1048+
@JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) {
10301049
// @formatter:off
10311050
}
10321051

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright 2025-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.anthropic.api;
18+
19+
import java.util.function.Supplier;
20+
21+
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl;
22+
23+
/**
24+
* Cache types supported by Anthropic's prompt caching feature.
25+
*
26+
* <p>
27+
* Prompt caching allows reusing frequently used prompts to reduce costs and improve
28+
* response times for repeated interactions.
29+
*
30+
* @see <a href=
31+
* "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching">Anthropic Prompt
32+
* Caching</a>
33+
* @author Claudio Silva Junior
34+
* @author Soby Chacko
35+
*/
36+
public enum AnthropicCacheType {
37+
38+
/**
39+
* Ephemeral cache with 5-minute lifetime, refreshed on each use.
40+
*/
41+
EPHEMERAL(() -> new CacheControl("ephemeral"));
42+
43+
private final Supplier<CacheControl> value;
44+
45+
AnthropicCacheType(Supplier<CacheControl> value) {
46+
this.value = value;
47+
}
48+
49+
/**
50+
* Returns a new CacheControl instance for this cache type.
51+
* @return a CacheControl instance configured for this cache type
52+
*/
53+
public CacheControl cacheControl() {
54+
return this.value.get();
55+
}
56+
57+
}

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
* @author Christian Tzolov
5656
* @author Jihoon Kim
5757
* @author Alexandros Pappas
58+
* @author Claudio Silva Junior
59+
* @author Soby Chacko
5860
* @since 1.0.0
5961
*/
6062
public class StreamHelper {
@@ -159,7 +161,7 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_START)) {
159161
}
160162
else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) {
161163
ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null,
162-
null, null, null, null, null, thinkingBlock.thinking(), null);
164+
null, null, null, null, null, thinkingBlock.thinking(), null, null);
163165
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
164166
}
165167
else {
@@ -176,12 +178,12 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) {
176178
}
177179
else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) {
178180
ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(),
179-
null, null, null, null, null, null, thinking.thinking(), null);
181+
null, null, null, null, null, null, thinking.thinking(), null, null);
180182
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
181183
}
182184
else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) {
183185
ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(),
184-
null, null, null, null, null, sig.signature(), null, null);
186+
null, null, null, null, null, sig.signature(), null, null, null);
185187
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
186188
}
187189
else {
@@ -205,7 +207,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) {
205207

206208
if (messageDeltaEvent.usage() != null) {
207209
Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
208-
messageDeltaEvent.usage().outputTokens());
210+
messageDeltaEvent.usage().outputTokens(),
211+
contentBlockReference.get().usage.cacheCreationInputTokens(),
212+
contentBlockReference.get().usage.cacheReadInputTokens());
209213
contentBlockReference.get().withUsage(totalUsage);
210214
}
211215
}

models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import reactor.core.publisher.Flux;
3333

3434
import org.springframework.ai.anthropic.api.AnthropicApi;
35+
import org.springframework.ai.anthropic.api.AnthropicCacheType;
3536
import org.springframework.ai.anthropic.api.tool.MockWeatherService;
3637
import org.springframework.ai.chat.client.ChatClient;
3738
import org.springframework.ai.chat.messages.AssistantMessage;
@@ -491,6 +492,59 @@ void testToolUseContentBlock() {
491492
}
492493
}
493494

495+
@Test
496+
void chatWithPromptCacheViaOptions() {
497+
String userMessageText = "It could be eitherr a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which "
498+
+ "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" "
499+
+ "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English "
500+
+ "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". "
501+
+ "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\"";
502+
503+
// Repeat content to meet minimum token requirements for caching (1024+ tokens)
504+
String largeContent = userMessageText.repeat(20);
505+
506+
// First request - should create cache
507+
ChatResponse firstResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)),
508+
AnthropicChatOptions.builder()
509+
.model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue())
510+
.cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
511+
.maxTokens(100)
512+
.temperature(0.8)
513+
.build()));
514+
515+
// Access native Anthropic usage data
516+
AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata().getUsage().getNativeUsage();
517+
518+
// Verify first request created cache
519+
assertThat(firstUsage.cacheCreationInputTokens()).isGreaterThan(0);
520+
assertThat(firstUsage.cacheReadInputTokens()).isEqualTo(0);
521+
522+
// Second request with identical content - should read from cache
523+
ChatResponse secondResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)),
524+
AnthropicChatOptions.builder()
525+
.model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue())
526+
.cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl())
527+
.maxTokens(100)
528+
.temperature(0.8)
529+
.build()));
530+
531+
// Access native Anthropic usage data
532+
AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata().getUsage().getNativeUsage();
533+
534+
// Verify second request used cache
535+
assertThat(secondUsage.cacheCreationInputTokens()).isEqualTo(0);
536+
assertThat(secondUsage.cacheReadInputTokens()).isGreaterThan(0);
537+
538+
// Both responses should be valid
539+
assertThat(firstResponse.getResult().getOutput().getText()).isNotBlank();
540+
assertThat(secondResponse.getResult().getOutput().getText()).isNotBlank();
541+
542+
logger.info("First request - Cache creation: {}, Cache read: {}", firstUsage.cacheCreationInputTokens(),
543+
firstUsage.cacheReadInputTokens());
544+
logger.info("Second request - Cache creation: {}, Cache read: {}", secondUsage.cacheCreationInputTokens(),
545+
secondUsage.cacheReadInputTokens());
546+
}
547+
494548
record ActorsFilmsRecord(String actor, List<String> movies) {
495549

496550
}

0 commit comments

Comments
 (0)