|
58 | 58 | import org.springframework.ai.chat.metadata.ChatGenerationMetadata; |
59 | 59 | import org.springframework.ai.chat.metadata.ChatResponseMetadata; |
60 | 60 | import org.springframework.ai.chat.metadata.DefaultUsage; |
61 | | -import org.springframework.ai.chat.metadata.EmptyUsage; |
62 | 61 | import org.springframework.ai.chat.metadata.Usage; |
63 | 62 | import org.springframework.ai.chat.model.ChatModel; |
64 | 63 | import org.springframework.ai.chat.model.ChatResponse; |
|
71 | 70 | import org.springframework.ai.chat.prompt.ChatOptions; |
72 | 71 | import org.springframework.ai.chat.prompt.Prompt; |
73 | 72 | import org.springframework.ai.content.Media; |
| 73 | +import org.springframework.ai.google.genai.cache.GoogleGenAiCachedContentService; |
74 | 74 | import org.springframework.ai.google.genai.common.GoogleGenAiConstants; |
75 | 75 | import org.springframework.ai.google.genai.common.GoogleGenAiSafetySetting; |
| 76 | +import org.springframework.ai.google.genai.metadata.GoogleGenAiUsage; |
76 | 77 | import org.springframework.ai.google.genai.schema.GoogleGenAiToolCallingManager; |
77 | 78 | import org.springframework.ai.model.ChatModelDescription; |
78 | 79 | import org.springframework.ai.model.ModelOptionsUtils; |
@@ -157,6 +158,11 @@ public class GoogleGenAiChatModel implements ChatModel, DisposableBean { |
157 | 158 | */ |
158 | 159 | private final RetryTemplate retryTemplate; |
159 | 160 |
|
| 161 | + /** |
| 162 | + * The cached content service for managing cached content. |
| 163 | + */ |
| 164 | + private final GoogleGenAiCachedContentService cachedContentService; |
| 165 | + |
160 | 166 | // GenerationConfig is now built dynamically per request |
161 | 167 |
|
162 | 168 | /** |
@@ -225,6 +231,9 @@ public GoogleGenAiChatModel(Client genAiClient, GoogleGenAiChatOptions defaultOp |
225 | 231 | this.retryTemplate = retryTemplate; |
226 | 232 | this.observationRegistry = observationRegistry; |
227 | 233 | this.toolExecutionEligibilityPredicate = toolExecutionEligibilityPredicate; |
| 234 | + // Initialize cached content service only if the client supports it |
| 235 | + this.cachedContentService = (genAiClient != null && genAiClient.caches != null && genAiClient.async != null |
| 236 | + && genAiClient.async.caches != null) ? new GoogleGenAiCachedContentService(genAiClient) : null; |
228 | 237 |
|
229 | 238 | // Wrap the provided tool calling manager in a GoogleGenAiToolCallingManager to |
230 | 239 | // ensure |
@@ -414,8 +423,9 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon |
414 | 423 | .toList(); |
415 | 424 |
|
416 | 425 | var usage = generateContentResponse.usageMetadata(); |
417 | | - Usage currentUsage = (usage.isPresent()) ? new DefaultUsage(usage.get().promptTokenCount().orElse(0), |
418 | | - usage.get().candidatesTokenCount().orElse(0)) : new EmptyUsage(); |
| 426 | + GoogleGenAiChatOptions options = (GoogleGenAiChatOptions) prompt.getOptions(); |
| 427 | + Usage currentUsage = (usage.isPresent()) ? getDefaultUsage(usage.get(), options) |
| 428 | + : getDefaultUsage(null, options); |
419 | 429 | Usage cumulativeUsage = UsageCalculator.getCumulativeUsage(currentUsage, previousChatResponse); |
420 | 430 | ChatResponse chatResponse = new ChatResponse(generations, |
421 | 431 | toChatResponseMetadata(cumulativeUsage, generateContentResponse.modelVersion().get())); |
@@ -533,7 +543,9 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha |
533 | 543 | .toList(); |
534 | 544 |
|
535 | 545 | var usage = response.usageMetadata(); |
536 | | - Usage currentUsage = usage.isPresent() ? getDefaultUsage(usage.get()) : new EmptyUsage(); |
| 546 | + GoogleGenAiChatOptions options = (GoogleGenAiChatOptions) prompt.getOptions(); |
| 547 | + Usage currentUsage = usage.isPresent() ? getDefaultUsage(usage.get(), options) |
| 548 | + : getDefaultUsage(null, options); |
537 | 549 | Usage cumulativeUsage = UsageCalculator.getCumulativeUsage(currentUsage, previousChatResponse); |
538 | 550 | ChatResponse chatResponse = new ChatResponse(generations, |
539 | 551 | toChatResponseMetadata(cumulativeUsage, response.modelVersion().get())); |
@@ -643,9 +655,26 @@ private ChatResponseMetadata toChatResponseMetadata(Usage usage, String modelVer |
643 | 655 | return ChatResponseMetadata.builder().usage(usage).model(modelVersion).build(); |
644 | 656 | } |
645 | 657 |
|
646 | | - private DefaultUsage getDefaultUsage(com.google.genai.types.GenerateContentResponseUsageMetadata usageMetadata) { |
647 | | - return new DefaultUsage(usageMetadata.promptTokenCount().orElse(0), |
648 | | - usageMetadata.candidatesTokenCount().orElse(0), usageMetadata.totalTokenCount().orElse(0)); |
| 658 | + private Usage getDefaultUsage(com.google.genai.types.GenerateContentResponseUsageMetadata usageMetadata, |
| 659 | + GoogleGenAiChatOptions options) { |
| 660 | + // Check if extended metadata should be included (default to true if not |
| 661 | + // configured) |
| 662 | + boolean includeExtended = true; |
| 663 | + if (options != null && options.getIncludeExtendedUsageMetadata() != null) { |
| 664 | + includeExtended = options.getIncludeExtendedUsageMetadata(); |
| 665 | + } |
| 666 | + else if (this.defaultOptions.getIncludeExtendedUsageMetadata() != null) { |
| 667 | + includeExtended = this.defaultOptions.getIncludeExtendedUsageMetadata(); |
| 668 | + } |
| 669 | + |
| 670 | + if (includeExtended) { |
| 671 | + return GoogleGenAiUsage.from(usageMetadata); |
| 672 | + } |
| 673 | + else { |
| 674 | + // Fall back to basic usage for backward compatibility |
| 675 | + return new DefaultUsage(usageMetadata.promptTokenCount().orElse(0), |
| 676 | + usageMetadata.candidatesTokenCount().orElse(0), usageMetadata.totalTokenCount().orElse(0)); |
| 677 | + } |
649 | 678 | } |
650 | 679 |
|
651 | 680 | GeminiRequest createGeminiRequest(Prompt prompt) { |
@@ -723,6 +752,14 @@ GeminiRequest createGeminiRequest(Prompt prompt) { |
723 | 752 | configBuilder.tools(tools); |
724 | 753 | } |
725 | 754 |
|
| 755 | + // Handle cached content |
| 756 | + if (requestOptions.getUseCachedContent() != null && requestOptions.getUseCachedContent() |
| 757 | + && requestOptions.getCachedContentName() != null) { |
| 758 | + // Set the cached content name in the config |
| 759 | + configBuilder.cachedContent(requestOptions.getCachedContentName()); |
| 760 | + logger.debug("Using cached content: {}", requestOptions.getCachedContentName()); |
| 761 | + } |
| 762 | + |
726 | 763 | // Handle system instruction |
727 | 764 | List<Content> systemContents = toGeminiContent( |
728 | 765 | prompt.getInstructions().stream().filter(m -> m.getMessageType() == MessageType.SYSTEM).toList()); |
@@ -832,6 +869,14 @@ public ChatOptions getDefaultOptions() { |
832 | 869 | return GoogleGenAiChatOptions.fromOptions(this.defaultOptions); |
833 | 870 | } |
834 | 871 |
|
| 872 | + /** |
| 873 | + * Gets the cached content service for managing cached content. |
| 874 | + * @return the cached content service |
| 875 | + */ |
| 876 | + public GoogleGenAiCachedContentService getCachedContentService() { |
| 877 | + return this.cachedContentService; |
| 878 | + } |
| 879 | + |
835 | 880 | @Override |
836 | 881 | public void destroy() throws Exception { |
837 | 882 | // GenAI Client doesn't need explicit closing |
|
0 commit comments