-
Notifications
You must be signed in to change notification settings - Fork 2k
Fix Bedrock Converse streaming and token handling #1763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -169,6 +169,10 @@ public BedrockProxyChatModel(BedrockRuntimeClient bedrockRuntimeClient, | |
| */ | ||
| @Override | ||
| public ChatResponse call(Prompt prompt) { | ||
| return this.internalCall(prompt, null); | ||
| } | ||
|
|
||
| private ChatResponse internalCall(Prompt prompt, ChatResponse perviousChatResponse) { | ||
|
|
||
| ConverseRequest converseRequest = this.createRequest(prompt); | ||
|
|
||
|
|
@@ -185,7 +189,7 @@ public ChatResponse call(Prompt prompt) { | |
|
|
||
| ConverseResponse converseResponse = this.bedrockRuntimeClient.converse(converseRequest); | ||
|
|
||
| var response = this.toChatResponse(converseResponse); | ||
| var response = this.toChatResponse(converseResponse, perviousChatResponse); | ||
|
|
||
| observationContext.setResponse(response); | ||
|
|
||
|
|
@@ -195,7 +199,7 @@ public ChatResponse call(Prompt prompt) { | |
| if (!this.isProxyToolCalls(prompt, this.defaultOptions) && chatResponse != null | ||
| && this.isToolCall(chatResponse, Set.of("tool_use"))) { | ||
| var toolCallConversation = this.handleToolCalls(prompt, chatResponse); | ||
| return this.call(new Prompt(toolCallConversation, prompt.getOptions())); | ||
| return this.internalCall(new Prompt(toolCallConversation, prompt.getOptions()), chatResponse); | ||
| } | ||
|
|
||
| return chatResponse; | ||
|
|
@@ -402,7 +406,7 @@ else if (mediaData instanceof URL url) { | |
| * @param response The Bedrock Converse response. | ||
| * @return The ChatResponse entity. | ||
| */ | ||
| private ChatResponse toChatResponse(ConverseResponse response) { | ||
| private ChatResponse toChatResponse(ConverseResponse response, ChatResponse perviousChatResponse) { | ||
|
|
||
| Assert.notNull(response, "'response' must not be null."); | ||
|
|
||
|
|
@@ -448,8 +452,19 @@ private ChatResponse toChatResponse(ConverseResponse response) { | |
| allGenerations.add(toolCallGeneration); | ||
| } | ||
|
|
||
| DefaultUsage usage = new DefaultUsage(response.usage().inputTokens().longValue(), | ||
| response.usage().outputTokens().longValue(), response.usage().totalTokens().longValue()); | ||
| Long promptTokens = response.usage().inputTokens().longValue(); | ||
| Long generationTokens = response.usage().outputTokens().longValue(); | ||
| Long totalTokens = response.usage().totalTokens().longValue(); | ||
|
|
||
| if (perviousChatResponse != null && perviousChatResponse.getMetadata() != null | ||
| && perviousChatResponse.getMetadata().getUsage() != null) { | ||
|
|
||
| promptTokens += perviousChatResponse.getMetadata().getUsage().getPromptTokens(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have manually verified this step. Is there an effective way we can add a test to validate this additive operation in our tests? |
||
| generationTokens += perviousChatResponse.getMetadata().getUsage().getGenerationTokens(); | ||
| totalTokens += perviousChatResponse.getMetadata().getUsage().getTotalTokens(); | ||
| } | ||
|
|
||
| DefaultUsage usage = new DefaultUsage(promptTokens, generationTokens, totalTokens); | ||
|
|
||
| Document modelResponseFields = response.additionalModelResponseFields(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't seem to use |
||
|
|
||
|
|
@@ -473,14 +488,16 @@ private ChatResponse toChatResponse(ConverseResponse response) { | |
| */ | ||
| @Override | ||
| public Flux<ChatResponse> stream(Prompt prompt) { | ||
| return this.internalStream(prompt, null); | ||
| } | ||
|
|
||
| private Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse perviousChatResponse) { | ||
| Assert.notNull(prompt, "'prompt' must not be null"); | ||
|
|
||
| return Flux.deferContextual(contextView -> { | ||
|
|
||
| ConverseRequest converseRequest = this.createRequest(prompt); | ||
|
|
||
| // System.out.println(">>>>> CONVERSE REQUEST: " + converseRequest); | ||
|
|
||
| ChatModelObservationContext observationContext = ChatModelObservationContext.builder() | ||
| .prompt(prompt) | ||
| .provider(AiProvider.BEDROCK_CONVERSE.value()) | ||
|
|
@@ -504,13 +521,14 @@ public Flux<ChatResponse> stream(Prompt prompt) { | |
| Flux<ConverseStreamOutput> response = converseStream(converseStreamRequest); | ||
|
|
||
| // @formatter:off | ||
| Flux<ChatResponse> chatResponses = ConverseApiUtils.toChatResponse(response); | ||
| Flux<ChatResponse> chatResponses = ConverseApiUtils.toChatResponse(response, perviousChatResponse); | ||
|
|
||
| Flux<ChatResponse> chatResponseFlux = chatResponses.switchMap(chatResponse -> { | ||
| if (!this.isProxyToolCalls(prompt, this.defaultOptions) && chatResponse != null | ||
| && this.isToolCall(chatResponse, Set.of("tool_use"))) { | ||
|
|
||
| var toolCallConversation = this.handleToolCalls(prompt, chatResponse); | ||
| return this.stream(new Prompt(toolCallConversation, prompt.getOptions())); | ||
| return this.internalStream(new Prompt(toolCallConversation, prompt.getOptions()), chatResponse); | ||
| } | ||
| return Mono.just(chatResponse); | ||
| }) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need test coverage to test this flow - especially to verify the final ChatResponse containing the preceding one.