Skip to content

Commit f013a74

Browse files
committed
Fixes #4424 and #4399 - token metadta and cached content
Signed-off-by: ddobrin <[email protected]>
1 parent 07688d5 commit f013a74

16 files changed

+3151
-10
lines changed

models/spring-ai-google-genai/README.md

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,112 @@
1818

1919
### Environment variables
2020
```shell
21-
export GOOGLE_GENAI_USE_VERTEXAI=true
22-
export GOOGLE_CLOUD_PROJECT='your-project-id'
21+
export GOOGLE_GENAI_USE_VERTEXAI=true
22+
export GOOGLE_CLOUD_PROJECT='your-project-id'
2323
export GOOGLE_CLOUD_LOCATION='your-region'
24+
```
25+
26+
## Extended Usage Metadata
27+
28+
The Google GenAI module provides comprehensive usage metadata tracking through the `GoogleGenAiUsage` class, which extends the standard `Usage` interface with additional token tracking capabilities specific to Google GenAI models.
29+
30+
### Features
31+
32+
#### Thinking Tokens
33+
Track reasoning tokens for thinking-enabled models like Gemini 2.0 Flash Thinking:
34+
```java
35+
ChatResponse response = chatModel.call(prompt);
36+
GoogleGenAiUsage usage = (GoogleGenAiUsage) response.getMetadata().getUsage();
37+
Integer thoughtsTokens = usage.getThoughtsTokenCount(); // Reasoning tokens
38+
```
39+
40+
#### Cached Content Tokens
41+
Monitor tokens from cached context to optimize API costs:
42+
```java
43+
Integer cachedTokens = usage.getCachedContentTokenCount(); // Cached context tokens
44+
```
45+
46+
#### Tool-Use Tokens
47+
Track tokens consumed by function calling and tool use:
48+
```java
49+
Integer toolUseTokens = usage.getToolUsePromptTokenCount(); // Tool-use tokens
50+
```
51+
52+
#### Modality Breakdowns
53+
Get detailed token counts by modality (text, image, audio, video):
54+
```java
55+
List<GoogleGenAiModalityTokenCount> promptDetails = usage.getPromptTokensDetails();
56+
for (GoogleGenAiModalityTokenCount detail : promptDetails) {
57+
System.out.println(detail.getModality() + ": " + detail.getTokenCount());
58+
}
59+
```
60+
61+
#### Traffic Type
62+
Identify whether requests use Pay-As-You-Go or Provisioned Throughput:
63+
```java
64+
GoogleGenAiTrafficType trafficType = usage.getTrafficType();
65+
// Returns: ON_DEMAND, PROVISIONED_THROUGHPUT, or UNKNOWN
66+
```
67+
68+
### Configuration
69+
70+
Control whether to include extended metadata (enabled by default):
71+
```java
72+
GoogleGenAiChatOptions options = GoogleGenAiChatOptions.builder()
73+
.model("gemini-2.0-flash")
74+
.includeExtendedUsageMetadata(true) // Enable extended metadata
75+
.build();
76+
```
77+
78+
### Complete Example
79+
80+
```java
81+
@Component
82+
public class ExtendedUsageExample {
83+
84+
private final GoogleGenAiChatModel chatModel;
85+
86+
public void demonstrateExtendedUsage() {
87+
Prompt prompt = new Prompt("Analyze this complex multi-modal request");
88+
ChatResponse response = chatModel.call(prompt);
89+
90+
// Cast to GoogleGenAiUsage for extended metadata
91+
GoogleGenAiUsage usage = (GoogleGenAiUsage) response.getMetadata().getUsage();
92+
93+
// Basic token counts (standard Usage interface)
94+
System.out.println("Prompt tokens: " + usage.getPromptTokens());
95+
System.out.println("Completion tokens: " + usage.getCompletionTokens());
96+
System.out.println("Total tokens: " + usage.getTotalTokens());
97+
98+
// Extended metadata (Google GenAI specific)
99+
System.out.println("Thinking tokens: " + usage.getThoughtsTokenCount());
100+
System.out.println("Cached tokens: " + usage.getCachedContentTokenCount());
101+
System.out.println("Tool-use tokens: " + usage.getToolUsePromptTokenCount());
102+
103+
// Modality breakdowns
104+
if (usage.getPromptTokensDetails() != null) {
105+
usage.getPromptTokensDetails().forEach(detail ->
106+
System.out.println(" " + detail.getModality() + ": " + detail.getTokenCount())
107+
);
108+
}
109+
110+
// Traffic type
111+
System.out.println("Traffic type: " + usage.getTrafficType());
112+
113+
// Access native SDK object for any additional metadata
114+
GenerateContentResponseUsageMetadata nativeUsage =
115+
(GenerateContentResponseUsageMetadata) usage.getNativeUsage();
116+
}
117+
}
118+
```
119+
120+
### Backward Compatibility
121+
122+
The extended usage metadata maintains full backward compatibility with the standard `Usage` interface. Code using the basic interface continues to work without modification:
123+
124+
```java
125+
// Works with any Spring AI model
126+
Usage usage = response.getMetadata().getUsage();
127+
Long promptTokens = usage.getPromptTokens();
128+
Long completionTokens = usage.getCompletionTokens();
24129
```

models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
5959
import org.springframework.ai.chat.metadata.ChatResponseMetadata;
6060
import org.springframework.ai.chat.metadata.DefaultUsage;
61-
import org.springframework.ai.chat.metadata.EmptyUsage;
6261
import org.springframework.ai.chat.metadata.Usage;
6362
import org.springframework.ai.chat.model.ChatModel;
6463
import org.springframework.ai.chat.model.ChatResponse;
@@ -71,8 +70,10 @@
7170
import org.springframework.ai.chat.prompt.ChatOptions;
7271
import org.springframework.ai.chat.prompt.Prompt;
7372
import org.springframework.ai.content.Media;
73+
import org.springframework.ai.google.genai.cache.GoogleGenAiCachedContentService;
7474
import org.springframework.ai.google.genai.common.GoogleGenAiConstants;
7575
import org.springframework.ai.google.genai.common.GoogleGenAiSafetySetting;
76+
import org.springframework.ai.google.genai.metadata.GoogleGenAiUsage;
7677
import org.springframework.ai.google.genai.schema.GoogleGenAiToolCallingManager;
7778
import org.springframework.ai.model.ChatModelDescription;
7879
import org.springframework.ai.model.ModelOptionsUtils;
@@ -157,6 +158,11 @@ public class GoogleGenAiChatModel implements ChatModel, DisposableBean {
157158
*/
158159
private final RetryTemplate retryTemplate;
159160

161+
/**
162+
* The cached content service for managing cached content.
163+
*/
164+
private final GoogleGenAiCachedContentService cachedContentService;
165+
160166
// GenerationConfig is now built dynamically per request
161167

162168
/**
@@ -225,6 +231,9 @@ public GoogleGenAiChatModel(Client genAiClient, GoogleGenAiChatOptions defaultOp
225231
this.retryTemplate = retryTemplate;
226232
this.observationRegistry = observationRegistry;
227233
this.toolExecutionEligibilityPredicate = toolExecutionEligibilityPredicate;
234+
// Initialize cached content service only if the client supports it
235+
this.cachedContentService = (genAiClient != null && genAiClient.caches != null && genAiClient.async != null
236+
&& genAiClient.async.caches != null) ? new GoogleGenAiCachedContentService(genAiClient) : null;
228237

229238
// Wrap the provided tool calling manager in a GoogleGenAiToolCallingManager to
230239
// ensure
@@ -414,8 +423,9 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon
414423
.toList();
415424

416425
var usage = generateContentResponse.usageMetadata();
417-
Usage currentUsage = (usage.isPresent()) ? new DefaultUsage(usage.get().promptTokenCount().orElse(0),
418-
usage.get().candidatesTokenCount().orElse(0)) : new EmptyUsage();
426+
GoogleGenAiChatOptions options = (GoogleGenAiChatOptions) prompt.getOptions();
427+
Usage currentUsage = (usage.isPresent()) ? getDefaultUsage(usage.get(), options)
428+
: getDefaultUsage(null, options);
419429
Usage cumulativeUsage = UsageCalculator.getCumulativeUsage(currentUsage, previousChatResponse);
420430
ChatResponse chatResponse = new ChatResponse(generations,
421431
toChatResponseMetadata(cumulativeUsage, generateContentResponse.modelVersion().get()));
@@ -533,7 +543,9 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
533543
.toList();
534544

535545
var usage = response.usageMetadata();
536-
Usage currentUsage = usage.isPresent() ? getDefaultUsage(usage.get()) : new EmptyUsage();
546+
GoogleGenAiChatOptions options = (GoogleGenAiChatOptions) prompt.getOptions();
547+
Usage currentUsage = usage.isPresent() ? getDefaultUsage(usage.get(), options)
548+
: getDefaultUsage(null, options);
537549
Usage cumulativeUsage = UsageCalculator.getCumulativeUsage(currentUsage, previousChatResponse);
538550
ChatResponse chatResponse = new ChatResponse(generations,
539551
toChatResponseMetadata(cumulativeUsage, response.modelVersion().get()));
@@ -643,9 +655,26 @@ private ChatResponseMetadata toChatResponseMetadata(Usage usage, String modelVer
643655
return ChatResponseMetadata.builder().usage(usage).model(modelVersion).build();
644656
}
645657

646-
private DefaultUsage getDefaultUsage(com.google.genai.types.GenerateContentResponseUsageMetadata usageMetadata) {
647-
return new DefaultUsage(usageMetadata.promptTokenCount().orElse(0),
648-
usageMetadata.candidatesTokenCount().orElse(0), usageMetadata.totalTokenCount().orElse(0));
658+
private Usage getDefaultUsage(com.google.genai.types.GenerateContentResponseUsageMetadata usageMetadata,
659+
GoogleGenAiChatOptions options) {
660+
// Check if extended metadata should be included (default to true if not
661+
// configured)
662+
boolean includeExtended = true;
663+
if (options != null && options.getIncludeExtendedUsageMetadata() != null) {
664+
includeExtended = options.getIncludeExtendedUsageMetadata();
665+
}
666+
else if (this.defaultOptions.getIncludeExtendedUsageMetadata() != null) {
667+
includeExtended = this.defaultOptions.getIncludeExtendedUsageMetadata();
668+
}
669+
670+
if (includeExtended) {
671+
return GoogleGenAiUsage.from(usageMetadata);
672+
}
673+
else {
674+
// Fall back to basic usage for backward compatibility
675+
return new DefaultUsage(usageMetadata.promptTokenCount().orElse(0),
676+
usageMetadata.candidatesTokenCount().orElse(0), usageMetadata.totalTokenCount().orElse(0));
677+
}
649678
}
650679

651680
GeminiRequest createGeminiRequest(Prompt prompt) {
@@ -723,6 +752,14 @@ GeminiRequest createGeminiRequest(Prompt prompt) {
723752
configBuilder.tools(tools);
724753
}
725754

755+
// Handle cached content
756+
if (requestOptions.getUseCachedContent() != null && requestOptions.getUseCachedContent()
757+
&& requestOptions.getCachedContentName() != null) {
758+
// Set the cached content name in the config
759+
configBuilder.cachedContent(requestOptions.getCachedContentName());
760+
logger.debug("Using cached content: {}", requestOptions.getCachedContentName());
761+
}
762+
726763
// Handle system instruction
727764
List<Content> systemContents = toGeminiContent(
728765
prompt.getInstructions().stream().filter(m -> m.getMessageType() == MessageType.SYSTEM).toList());
@@ -832,6 +869,14 @@ public ChatOptions getDefaultOptions() {
832869
return GoogleGenAiChatOptions.fromOptions(this.defaultOptions);
833870
}
834871

872+
/**
873+
* Gets the cached content service for managing cached content.
874+
* @return the cached content service
875+
*/
876+
public GoogleGenAiCachedContentService getCachedContentService() {
877+
return this.cachedContentService;
878+
}
879+
835880
@Override
836881
public void destroy() throws Exception {
837882
// GenAI Client doesn't need explicit closing

models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,39 @@ public class GoogleGenAiChatOptions implements ToolCallingChatOptions {
113113
*/
114114
private @JsonProperty("thinkingBudget") Integer thinkingBudget;
115115

116+
/**
117+
* Optional. Whether to include extended usage metadata in responses.
118+
* When true, includes thinking tokens, cached content, tool-use tokens, and modality details.
119+
* Defaults to true for full metadata access.
120+
*/
121+
private @JsonProperty("includeExtendedUsageMetadata") Boolean includeExtendedUsageMetadata;
122+
123+
/**
124+
* Optional. The name of cached content to use for this request.
125+
* When set, the cached content will be used as context for the request.
126+
*/
127+
private @JsonProperty("cachedContentName") String cachedContentName;
128+
129+
/**
130+
* Optional. Whether to use cached content if available.
131+
* When true and cachedContentName is set, the system will use the cached content.
132+
*/
133+
private @JsonProperty("useCachedContent") Boolean useCachedContent;
134+
135+
/**
136+
* Optional. Automatically cache prompts that exceed this token threshold.
137+
* When set, prompts larger than this value will be automatically cached for reuse.
138+
* Set to null to disable auto-caching.
139+
*/
140+
private @JsonProperty("autoCacheThreshold") Integer autoCacheThreshold;
141+
142+
/**
143+
* Optional. Time-to-live for auto-cached content.
144+
* Used when auto-caching is enabled. Defaults to 1 hour if not specified.
145+
*/
146+
@JsonIgnore
147+
private java.time.Duration autoCacheTtl;
148+
116149
/**
117150
* Collection of {@link ToolCallback}s to be used for tool calling in the chat
118151
* completion requests.
@@ -174,6 +207,11 @@ public static GoogleGenAiChatOptions fromOptions(GoogleGenAiChatOptions fromOpti
174207
options.setToolContext(fromOptions.getToolContext());
175208
options.setThinkingBudget(fromOptions.getThinkingBudget());
176209
options.setLabels(fromOptions.getLabels());
210+
options.setIncludeExtendedUsageMetadata(fromOptions.getIncludeExtendedUsageMetadata());
211+
options.setCachedContentName(fromOptions.getCachedContentName());
212+
options.setUseCachedContent(fromOptions.getUseCachedContent());
213+
options.setAutoCacheThreshold(fromOptions.getAutoCacheThreshold());
214+
options.setAutoCacheTtl(fromOptions.getAutoCacheTtl());
177215
return options;
178216
}
179217

@@ -319,6 +357,46 @@ public void setThinkingBudget(Integer thinkingBudget) {
319357
this.thinkingBudget = thinkingBudget;
320358
}
321359

360+
public Boolean getIncludeExtendedUsageMetadata() {
361+
return this.includeExtendedUsageMetadata;
362+
}
363+
364+
public void setIncludeExtendedUsageMetadata(Boolean includeExtendedUsageMetadata) {
365+
this.includeExtendedUsageMetadata = includeExtendedUsageMetadata;
366+
}
367+
368+
public String getCachedContentName() {
369+
return this.cachedContentName;
370+
}
371+
372+
public void setCachedContentName(String cachedContentName) {
373+
this.cachedContentName = cachedContentName;
374+
}
375+
376+
public Boolean getUseCachedContent() {
377+
return this.useCachedContent;
378+
}
379+
380+
public void setUseCachedContent(Boolean useCachedContent) {
381+
this.useCachedContent = useCachedContent;
382+
}
383+
384+
public Integer getAutoCacheThreshold() {
385+
return this.autoCacheThreshold;
386+
}
387+
388+
public void setAutoCacheThreshold(Integer autoCacheThreshold) {
389+
this.autoCacheThreshold = autoCacheThreshold;
390+
}
391+
392+
public java.time.Duration getAutoCacheTtl() {
393+
return this.autoCacheTtl;
394+
}
395+
396+
public void setAutoCacheTtl(java.time.Duration autoCacheTtl) {
397+
this.autoCacheTtl = autoCacheTtl;
398+
}
399+
322400
public Boolean getGoogleSearchRetrieval() {
323401
return this.googleSearchRetrieval;
324402
}
@@ -524,12 +602,37 @@ public Builder thinkingBudget(Integer thinkingBudget) {
524602
return this;
525603
}
526604

605+
public Builder includeExtendedUsageMetadata(Boolean includeExtendedUsageMetadata) {
606+
this.options.setIncludeExtendedUsageMetadata(includeExtendedUsageMetadata);
607+
return this;
608+
}
609+
527610
public Builder labels(Map<String, String> labels) {
528611
Assert.notNull(labels, "labels must not be null");
529612
this.options.labels = labels;
530613
return this;
531614
}
532615

616+
public Builder cachedContentName(String cachedContentName) {
617+
this.options.setCachedContentName(cachedContentName);
618+
return this;
619+
}
620+
621+
public Builder useCachedContent(Boolean useCachedContent) {
622+
this.options.setUseCachedContent(useCachedContent);
623+
return this;
624+
}
625+
626+
public Builder autoCacheThreshold(Integer autoCacheThreshold) {
627+
this.options.setAutoCacheThreshold(autoCacheThreshold);
628+
return this;
629+
}
630+
631+
public Builder autoCacheTtl(java.time.Duration autoCacheTtl) {
632+
this.options.setAutoCacheTtl(autoCacheTtl);
633+
return this;
634+
}
635+
533636
public GoogleGenAiChatOptions build() {
534637
return this.options;
535638
}

0 commit comments

Comments
 (0)