spring-projects
diff --git a/‎models/spring-ai-google-genai/README.md‎
Lines changed: 107 additions & 2 deletions b/‎models/spring-ai-google-genai/README.md‎
Lines changed: 107 additions & 2 deletions
diff --git a/‎models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java‎
Lines changed: 52 additions & 7 deletions b/‎models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java‎
Lines changed: 52 additions & 7 deletions
diff --git a/‎models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java‎
Lines changed: 103 additions & 0 deletions b/‎models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java‎
Lines changed: 103 additions & 0 deletions
@@ -18,7 +18,112 @@
 
 ### Environment variables
 ```shell
-export GOOGLE_GENAI_USE_VERTEXAI=true  
-export GOOGLE_CLOUD_PROJECT='your-project-id'  
+export GOOGLE_GENAI_USE_VERTEXAI=true
+export GOOGLE_CLOUD_PROJECT='your-project-id'
 export GOOGLE_CLOUD_LOCATION='your-region'
+```
+
+## Extended Usage Metadata
+
+The Google GenAI module provides comprehensive usage metadata tracking through the `GoogleGenAiUsage` class, which extends the standard `Usage` interface with additional token tracking capabilities specific to Google GenAI models.
+
+### Features
+
+#### Thinking Tokens
+Track reasoning tokens for thinking-enabled models like Gemini 2.0 Flash Thinking:
+```java
+ChatResponse response = chatModel.call(prompt);
+GoogleGenAiUsage usage = (GoogleGenAiUsage) response.getMetadata().getUsage();
+Integer thoughtsTokens = usage.getThoughtsTokenCount(); // Reasoning tokens
+```
+
+#### Cached Content Tokens
+Monitor tokens from cached context to optimize API costs:
+```java
+Integer cachedTokens = usage.getCachedContentTokenCount(); // Cached context tokens
+```
+
+#### Tool-Use Tokens
+Track tokens consumed by function calling and tool use:
+```java
+Integer toolUseTokens = usage.getToolUsePromptTokenCount(); // Tool-use tokens
+```
+
+#### Modality Breakdowns
+Get detailed token counts by modality (text, image, audio, video):
+```java
+List<GoogleGenAiModalityTokenCount> promptDetails = usage.getPromptTokensDetails();
+for (GoogleGenAiModalityTokenCount detail : promptDetails) {
+    System.out.println(detail.getModality() + ": " + detail.getTokenCount());
+}
+```
+
+#### Traffic Type
+Identify whether requests use Pay-As-You-Go or Provisioned Throughput:
+```java
+GoogleGenAiTrafficType trafficType = usage.getTrafficType();
+// Returns: ON_DEMAND, PROVISIONED_THROUGHPUT, or UNKNOWN
+```
+
+### Configuration
+
+Control whether to include extended metadata (enabled by default):
+```java
+GoogleGenAiChatOptions options = GoogleGenAiChatOptions.builder()
+    .model("gemini-2.0-flash")
+    .includeExtendedUsageMetadata(true) // Enable extended metadata
+    .build();
+```
+
+### Complete Example
+
+```java
+@Component
+public class ExtendedUsageExample {
+
+    private final GoogleGenAiChatModel chatModel;
+
+    public void demonstrateExtendedUsage() {
+        Prompt prompt = new Prompt("Analyze this complex multi-modal request");
+        ChatResponse response = chatModel.call(prompt);
+
+        // Cast to GoogleGenAiUsage for extended metadata
+        GoogleGenAiUsage usage = (GoogleGenAiUsage) response.getMetadata().getUsage();
+
+        // Basic token counts (standard Usage interface)
+        System.out.println("Prompt tokens: " + usage.getPromptTokens());
+        System.out.println("Completion tokens: " + usage.getCompletionTokens());
+        System.out.println("Total tokens: " + usage.getTotalTokens());
+
+        // Extended metadata (Google GenAI specific)
+        System.out.println("Thinking tokens: " + usage.getThoughtsTokenCount());
+        System.out.println("Cached tokens: " + usage.getCachedContentTokenCount());
+        System.out.println("Tool-use tokens: " + usage.getToolUsePromptTokenCount());
+
+        // Modality breakdowns
+        if (usage.getPromptTokensDetails() != null) {
+            usage.getPromptTokensDetails().forEach(detail ->
+                System.out.println("  " + detail.getModality() + ": " + detail.getTokenCount())
+            );
+        }
+
+        // Traffic type
+        System.out.println("Traffic type: " + usage.getTrafficType());
+
+        // Access native SDK object for any additional metadata
+        GenerateContentResponseUsageMetadata nativeUsage =
+            (GenerateContentResponseUsageMetadata) usage.getNativeUsage();
+    }
+}
+```
+
+### Backward Compatibility
+
+The extended usage metadata maintains full backward compatibility with the standard `Usage` interface. Code using the basic interface continues to work without modification:
+
+```java
+// Works with any Spring AI model
+Usage usage = response.getMetadata().getUsage();
+Long promptTokens = usage.getPromptTokens();
+Long completionTokens = usage.getCompletionTokens();
 ```
@@ -58,7 +58,6 @@
 import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
 import org.springframework.ai.chat.metadata.ChatResponseMetadata;
 import org.springframework.ai.chat.metadata.DefaultUsage;
-import org.springframework.ai.chat.metadata.EmptyUsage;
 import org.springframework.ai.chat.metadata.Usage;
 import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.chat.model.ChatResponse;
@@ -71,8 +70,10 @@
 import org.springframework.ai.chat.prompt.ChatOptions;
 import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.ai.content.Media;
+import org.springframework.ai.google.genai.cache.GoogleGenAiCachedContentService;
 import org.springframework.ai.google.genai.common.GoogleGenAiConstants;
 import org.springframework.ai.google.genai.common.GoogleGenAiSafetySetting;
+import org.springframework.ai.google.genai.metadata.GoogleGenAiUsage;
 import org.springframework.ai.google.genai.schema.GoogleGenAiToolCallingManager;
 import org.springframework.ai.model.ChatModelDescription;
 import org.springframework.ai.model.ModelOptionsUtils;
@@ -157,6 +158,11 @@ public class GoogleGenAiChatModel implements ChatModel, DisposableBean {
 	 */
 	private final RetryTemplate retryTemplate;
 
+	/**
+	 * The cached content service for managing cached content.
+	 */
+	private final GoogleGenAiCachedContentService cachedContentService;
+
 	// GenerationConfig is now built dynamically per request
 
 	/**
@@ -225,6 +231,9 @@ public GoogleGenAiChatModel(Client genAiClient, GoogleGenAiChatOptions defaultOp
 		this.retryTemplate = retryTemplate;
 		this.observationRegistry = observationRegistry;
 		this.toolExecutionEligibilityPredicate = toolExecutionEligibilityPredicate;
+		// Initialize cached content service only if the client supports it
+		this.cachedContentService = (genAiClient != null && genAiClient.caches != null && genAiClient.async != null
+				&& genAiClient.async.caches != null) ? new GoogleGenAiCachedContentService(genAiClient) : null;
 
 		// Wrap the provided tool calling manager in a GoogleGenAiToolCallingManager to
 		// ensure
@@ -414,8 +423,9 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon
 					.toList();
 
 				var usage = generateContentResponse.usageMetadata();
-				Usage currentUsage = (usage.isPresent()) ? new DefaultUsage(usage.get().promptTokenCount().orElse(0),
-						usage.get().candidatesTokenCount().orElse(0)) : new EmptyUsage();
+				GoogleGenAiChatOptions options = (GoogleGenAiChatOptions) prompt.getOptions();
+				Usage currentUsage = (usage.isPresent()) ? getDefaultUsage(usage.get(), options)
+						: getDefaultUsage(null, options);
 				Usage cumulativeUsage = UsageCalculator.getCumulativeUsage(currentUsage, previousChatResponse);
 				ChatResponse chatResponse = new ChatResponse(generations,
 						toChatResponseMetadata(cumulativeUsage, generateContentResponse.modelVersion().get()));
@@ -533,7 +543,9 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 						.toList();
 
 					var usage = response.usageMetadata();
-					Usage currentUsage = usage.isPresent() ? getDefaultUsage(usage.get()) : new EmptyUsage();
+					GoogleGenAiChatOptions options = (GoogleGenAiChatOptions) prompt.getOptions();
+					Usage currentUsage = usage.isPresent() ? getDefaultUsage(usage.get(), options)
+							: getDefaultUsage(null, options);
 					Usage cumulativeUsage = UsageCalculator.getCumulativeUsage(currentUsage, previousChatResponse);
 					ChatResponse chatResponse = new ChatResponse(generations,
 							toChatResponseMetadata(cumulativeUsage, response.modelVersion().get()));
@@ -643,9 +655,26 @@ private ChatResponseMetadata toChatResponseMetadata(Usage usage, String modelVer
 		return ChatResponseMetadata.builder().usage(usage).model(modelVersion).build();
 	}
 
-	private DefaultUsage getDefaultUsage(com.google.genai.types.GenerateContentResponseUsageMetadata usageMetadata) {
-		return new DefaultUsage(usageMetadata.promptTokenCount().orElse(0),
-				usageMetadata.candidatesTokenCount().orElse(0), usageMetadata.totalTokenCount().orElse(0));
+	private Usage getDefaultUsage(com.google.genai.types.GenerateContentResponseUsageMetadata usageMetadata,
+			GoogleGenAiChatOptions options) {
+		// Check if extended metadata should be included (default to true if not
+		// configured)
+		boolean includeExtended = true;
+		if (options != null && options.getIncludeExtendedUsageMetadata() != null) {
+			includeExtended = options.getIncludeExtendedUsageMetadata();
+		}
+		else if (this.defaultOptions.getIncludeExtendedUsageMetadata() != null) {
+			includeExtended = this.defaultOptions.getIncludeExtendedUsageMetadata();
+		}
+
+		if (includeExtended) {
+			return GoogleGenAiUsage.from(usageMetadata);
+		}
+		else {
+			// Fall back to basic usage for backward compatibility
+			return new DefaultUsage(usageMetadata.promptTokenCount().orElse(0),
+					usageMetadata.candidatesTokenCount().orElse(0), usageMetadata.totalTokenCount().orElse(0));
+		}
 	}
 
 	GeminiRequest createGeminiRequest(Prompt prompt) {
@@ -723,6 +752,14 @@ GeminiRequest createGeminiRequest(Prompt prompt) {
 			configBuilder.tools(tools);
 		}
 
+		// Handle cached content
+		if (requestOptions.getUseCachedContent() != null && requestOptions.getUseCachedContent()
+				&& requestOptions.getCachedContentName() != null) {
+			// Set the cached content name in the config
+			configBuilder.cachedContent(requestOptions.getCachedContentName());
+			logger.debug("Using cached content: {}", requestOptions.getCachedContentName());
+		}
+
 		// Handle system instruction
 		List<Content> systemContents = toGeminiContent(
 				prompt.getInstructions().stream().filter(m -> m.getMessageType() == MessageType.SYSTEM).toList());
@@ -832,6 +869,14 @@ public ChatOptions getDefaultOptions() {
 		return GoogleGenAiChatOptions.fromOptions(this.defaultOptions);
 	}
 
+	/**
+	 * Gets the cached content service for managing cached content.
+	 * @return the cached content service
+	 */
+	public GoogleGenAiCachedContentService getCachedContentService() {
+		return this.cachedContentService;
+	}
+
 	@Override
 	public void destroy() throws Exception {
 		// GenAI Client doesn't need explicit closing
 
@@ -113,6 +113,39 @@ public class GoogleGenAiChatOptions implements ToolCallingChatOptions {
 	 */
 	private @JsonProperty("thinkingBudget") Integer thinkingBudget;
 
+	/**
+	 * Optional. Whether to include extended usage metadata in responses.
+	 * When true, includes thinking tokens, cached content, tool-use tokens, and modality details.
+	 * Defaults to true for full metadata access.
+	 */
+	private @JsonProperty("includeExtendedUsageMetadata") Boolean includeExtendedUsageMetadata;
+
+	/**
+	 * Optional. The name of cached content to use for this request.
+	 * When set, the cached content will be used as context for the request.
+	 */
+	private @JsonProperty("cachedContentName") String cachedContentName;
+
+	/**
+	 * Optional. Whether to use cached content if available.
+	 * When true and cachedContentName is set, the system will use the cached content.
+	 */
+	private @JsonProperty("useCachedContent") Boolean useCachedContent;
+
+	/**
+	 * Optional. Automatically cache prompts that exceed this token threshold.
+	 * When set, prompts larger than this value will be automatically cached for reuse.
+	 * Set to null to disable auto-caching.
+	 */
+	private @JsonProperty("autoCacheThreshold") Integer autoCacheThreshold;
+
+	/**
+	 * Optional. Time-to-live for auto-cached content.
+	 * Used when auto-caching is enabled. Defaults to 1 hour if not specified.
+	 */
+	@JsonIgnore
+	private java.time.Duration autoCacheTtl;
+
 	/**
 	 * Collection of {@link ToolCallback}s to be used for tool calling in the chat
 	 * completion requests.
@@ -174,6 +207,11 @@ public static GoogleGenAiChatOptions fromOptions(GoogleGenAiChatOptions fromOpti
 		options.setToolContext(fromOptions.getToolContext());
 		options.setThinkingBudget(fromOptions.getThinkingBudget());
 		options.setLabels(fromOptions.getLabels());
+		options.setIncludeExtendedUsageMetadata(fromOptions.getIncludeExtendedUsageMetadata());
+		options.setCachedContentName(fromOptions.getCachedContentName());
+		options.setUseCachedContent(fromOptions.getUseCachedContent());
+		options.setAutoCacheThreshold(fromOptions.getAutoCacheThreshold());
+		options.setAutoCacheTtl(fromOptions.getAutoCacheTtl());
 		return options;
 	}
 
@@ -319,6 +357,46 @@ public void setThinkingBudget(Integer thinkingBudget) {
 		this.thinkingBudget = thinkingBudget;
 	}
 
+	public Boolean getIncludeExtendedUsageMetadata() {
+		return this.includeExtendedUsageMetadata;
+	}
+
+	public void setIncludeExtendedUsageMetadata(Boolean includeExtendedUsageMetadata) {
+		this.includeExtendedUsageMetadata = includeExtendedUsageMetadata;
+	}
+
+	public String getCachedContentName() {
+		return this.cachedContentName;
+	}
+
+	public void setCachedContentName(String cachedContentName) {
+		this.cachedContentName = cachedContentName;
+	}
+
+	public Boolean getUseCachedContent() {
+		return this.useCachedContent;
+	}
+
+	public void setUseCachedContent(Boolean useCachedContent) {
+		this.useCachedContent = useCachedContent;
+	}
+
+	public Integer getAutoCacheThreshold() {
+		return this.autoCacheThreshold;
+	}
+
+	public void setAutoCacheThreshold(Integer autoCacheThreshold) {
+		this.autoCacheThreshold = autoCacheThreshold;
+	}
+
+	public java.time.Duration getAutoCacheTtl() {
+		return this.autoCacheTtl;
+	}
+
+	public void setAutoCacheTtl(java.time.Duration autoCacheTtl) {
+		this.autoCacheTtl = autoCacheTtl;
+	}
+
 	public Boolean getGoogleSearchRetrieval() {
 		return this.googleSearchRetrieval;
 	}
@@ -524,12 +602,37 @@ public Builder thinkingBudget(Integer thinkingBudget) {
 			return this;
 		}
 
+		public Builder includeExtendedUsageMetadata(Boolean includeExtendedUsageMetadata) {
+			this.options.setIncludeExtendedUsageMetadata(includeExtendedUsageMetadata);
+			return this;
+		}
+
 		public Builder labels(Map<String, String> labels) {
 			Assert.notNull(labels, "labels must not be null");
 			this.options.labels = labels;
 			return this;
 		}
 
+		public Builder cachedContentName(String cachedContentName) {
+			this.options.setCachedContentName(cachedContentName);
+			return this;
+		}
+
+		public Builder useCachedContent(Boolean useCachedContent) {
+			this.options.setUseCachedContent(useCachedContent);
+			return this;
+		}
+
+		public Builder autoCacheThreshold(Integer autoCacheThreshold) {
+			this.options.setAutoCacheThreshold(autoCacheThreshold);
+			return this;
+		}
+
+		public Builder autoCacheTtl(java.time.Duration autoCacheTtl) {
+			this.options.setAutoCacheTtl(autoCacheTtl);
+			return this;
+		}
+
 		public GoogleGenAiChatOptions build() {
 			return this.options;
 		}