diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatModel.java index 75ee33eb984..2b6fba9ef7d 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatModel.java @@ -196,26 +196,26 @@ public ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespons return new ChatResponse(List.of()); } - List generations = choices.stream().map(choice -> { // @formatter:off + List generations = choices.stream().map(choice -> { Map metadata = Map.of( "id", chatCompletion.id() != null ? chatCompletion.id() : "", "role", choice.message().role() != null ? choice.message().role().name() : "", "index", choice.index(), "finishReason", choice.finishReason() != null ? choice.finishReason().name() : "", "refusal", StringUtils.hasText(choice.message().refusal()) ? choice.message().refusal() : ""); - // @formatter:on return buildGeneration(choice, metadata, request); }).toList(); + // @formatter:on RateLimit rateLimit = OpenAiResponseHeaderExtractor.extractAiResponseHeaders(completionEntity); // Current usage - OpenAiApi.Usage usage = completionEntity.getBody().usage(); + OpenAiApi.Usage usage = chatCompletion.usage(); Usage currentChatResponseUsage = usage != null ? getDefaultUsage(usage) : new EmptyUsage(); Usage accumulatedUsage = UsageUtils.getCumulativeUsage(currentChatResponseUsage, previousChatResponse); ChatResponse chatResponse = new ChatResponse(generations, - from(completionEntity.getBody(), rateLimit, accumulatedUsage)); + from(chatCompletion, rateLimit, accumulatedUsage)); observationContext.setResponse(chatResponse);