4747import org .springframework .ai .chat .messages .UserMessage ;
4848import org .springframework .ai .chat .metadata .ChatGenerationMetadata ;
4949import org .springframework .ai .chat .metadata .ChatResponseMetadata ;
50+ import org .springframework .ai .chat .metadata .EmptyUsage ;
51+ import org .springframework .ai .chat .metadata .Usage ;
52+ import org .springframework .ai .chat .metadata .UsageUtils ;
5053import org .springframework .ai .chat .model .AbstractToolCallSupport ;
5154import org .springframework .ai .chat .model .ChatModel ;
5255import org .springframework .ai .chat .model .ChatResponse ;
@@ -210,6 +213,10 @@ public AnthropicChatModel(AnthropicApi anthropicApi, AnthropicChatOptions defaul
210213
211214 @ Override
212215 public ChatResponse call (Prompt prompt ) {
216+ return this .internalCall (prompt , null );
217+ }
218+
219+ public ChatResponse internalCall (Prompt prompt , ChatResponse previousChatResponse ) {
213220 ChatCompletionRequest request = createRequest (prompt , false );
214221
215222 ChatModelObservationContext observationContext = ChatModelObservationContext .builder ()
@@ -226,8 +233,14 @@ public ChatResponse call(Prompt prompt) {
226233 ResponseEntity <ChatCompletionResponse > completionEntity = this .retryTemplate
227234 .execute (ctx -> this .anthropicApi .chatCompletionEntity (request ));
228235
229- ChatResponse chatResponse = toChatResponse (completionEntity .getBody ());
236+ AnthropicApi .ChatCompletionResponse completionResponse = completionEntity .getBody ();
237+ AnthropicApi .Usage usage = completionResponse .usage ();
230238
239+ Usage currentChatResponseUsage = usage != null ? AnthropicUsage .from (completionResponse .usage ())
240+ : new EmptyUsage ();
241+ Usage accumulatedUsage = UsageUtils .getCumulativeUsage (currentChatResponseUsage , previousChatResponse );
242+
243+ ChatResponse chatResponse = toChatResponse (completionEntity .getBody (), accumulatedUsage );
231244 observationContext .setResponse (chatResponse );
232245
233246 return chatResponse ;
@@ -236,14 +249,18 @@ public ChatResponse call(Prompt prompt) {
236249 if (!isProxyToolCalls (prompt , this .defaultOptions ) && response != null
237250 && this .isToolCall (response , Set .of ("tool_use" ))) {
238251 var toolCallConversation = handleToolCalls (prompt , response );
239- return this .call (new Prompt (toolCallConversation , prompt .getOptions ()));
252+ return this .internalCall (new Prompt (toolCallConversation , prompt .getOptions ()), response );
240253 }
241254
242255 return response ;
243256 }
244257
245258 @ Override
246259 public Flux <ChatResponse > stream (Prompt prompt ) {
260+ return this .internalStream (prompt , null );
261+ }
262+
263+ public Flux <ChatResponse > internalStream (Prompt prompt , ChatResponse previousChatResponse ) {
247264 return Flux .deferContextual (contextView -> {
248265 ChatCompletionRequest request = createRequest (prompt , true );
249266
@@ -263,11 +280,14 @@ public Flux<ChatResponse> stream(Prompt prompt) {
263280
264281 // @formatter:off
265282 Flux <ChatResponse > chatResponseFlux = response .switchMap (chatCompletionResponse -> {
266- ChatResponse chatResponse = toChatResponse (chatCompletionResponse );
283+ AnthropicApi .Usage usage = chatCompletionResponse .usage ();
284+ Usage currentChatResponseUsage = usage != null ? AnthropicUsage .from (chatCompletionResponse .usage ()) : new EmptyUsage ();
285+ Usage accumulatedUsage = UsageUtils .getCumulativeUsage (currentChatResponseUsage , previousChatResponse );
286+ ChatResponse chatResponse = toChatResponse (chatCompletionResponse , accumulatedUsage );
267287
268288 if (!isProxyToolCalls (prompt , this .defaultOptions ) && this .isToolCall (chatResponse , Set .of ("tool_use" ))) {
269289 var toolCallConversation = handleToolCalls (prompt , chatResponse );
270- return this .stream (new Prompt (toolCallConversation , prompt .getOptions ()));
290+ return this .internalStream (new Prompt (toolCallConversation , prompt .getOptions ()), chatResponse );
271291 }
272292
273293 return Mono .just (chatResponse );
@@ -281,7 +301,7 @@ public Flux<ChatResponse> stream(Prompt prompt) {
281301 });
282302 }
283303
284- private ChatResponse toChatResponse (ChatCompletionResponse chatCompletion ) {
304+ private ChatResponse toChatResponse (ChatCompletionResponse chatCompletion , Usage usage ) {
285305
286306 if (chatCompletion == null ) {
287307 logger .warn ("Null chat completion returned" );
@@ -327,12 +347,15 @@ private ChatResponse toChatResponse(ChatCompletionResponse chatCompletion) {
327347 allGenerations .add (toolCallGeneration );
328348 }
329349
330- return new ChatResponse (allGenerations , this .from (chatCompletion ));
350+ return new ChatResponse (allGenerations , this .from (chatCompletion , usage ));
331351 }
332352
333353 private ChatResponseMetadata from (AnthropicApi .ChatCompletionResponse result ) {
354+ return from (result , AnthropicUsage .from (result .usage ()));
355+ }
356+
357+ private ChatResponseMetadata from (AnthropicApi .ChatCompletionResponse result , Usage usage ) {
334358 Assert .notNull (result , "Anthropic ChatCompletionResult must not be null" );
335- AnthropicUsage usage = AnthropicUsage .from (result .usage ());
336359 return ChatResponseMetadata .builder ()
337360 .withId (result .id ())
338361 .withModel (result .model ())
0 commit comments