Skip to content

Commit 5e4d0a7

Browse files
authored
Merge pull request #1876 from ejstuart/feature/intermediate-chat-event
Add intermediate chat event
2 parents 66bd02a + 7d25d3c commit 5e4d0a7

File tree

5 files changed

+89
-2
lines changed

5 files changed

+89
-2
lines changed

core/runtime/src/main/java/io/quarkiverse/langchain4j/runtime/aiservice/ChatEvent.java

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* @see ContentFetchedEvent
2727
* @see AccumulatedResponseEvent
2828
* @see PartialThinkingEvent
29+
* @see IntermediateResponseEvent
2930
*/
3031
public class ChatEvent {
3132

@@ -60,7 +61,11 @@ public enum ChatEventType {
6061
/**
6162
* Indicates that a partial thinking/reasoning chunk has been received (experimental).
6263
*/
63-
PartialThinking
64+
PartialThinking,
65+
/**
66+
* Indicates that an intermediate response has been received during multi-turn interactions.
67+
*/
68+
IntermediateResponse
6469

6570
}
6671

@@ -301,4 +306,61 @@ public String getText() {
301306
}
302307
}
303308

309+
/**
310+
* Event emitted when an intermediate response is received during multi-turn interactions.
311+
* This event occurs when the AI responds with tool execution requests, indicating that
312+
* additional turns are needed to complete the conversation.
313+
*
314+
* <p>
315+
* This is particularly useful for:
316+
* <ul>
317+
* <li>Capturing AI reasoning (thinking) that led to tool execution decisions</li>
318+
* <li>Retrieving thinking signatures for validation/auditing</li>
319+
* <li>Supporting interleaved thinking (reasoning between tool calls)</li>
320+
* <li>Building complete audit trails of AI decision-making</li>
321+
* </ul>
322+
* </p>
323+
*
324+
* <p>
325+
* Example flow with extended thinking:
326+
*
327+
* <pre>
328+
* User: "What's the weather in Auckland?"
329+
* → IntermediateResponseEvent: thinking="I should check the time first", tools=[getCurrentTime]
330+
* → ToolExecutedEvent: getCurrentTime → "3:45 PM"
331+
* → IntermediateResponseEvent: thinking="Now I know it's afternoon, check weather", tools=[getWeather]
332+
* → ToolExecutedEvent: getWeather → "Sunny, 72°F"
333+
* → ChatCompletedEvent: text="It's 3:45 PM and sunny in Auckland"
334+
* </pre>
335+
* </p>
336+
*
337+
* <p>
338+
* <b>Note:</b> This event is essential for interleaved thinking to work, as it provides
339+
* access to thinking blocks that occur between tool calls.
340+
* </p>
341+
*/
342+
public static class IntermediateResponseEvent extends ChatEvent {
343+
private final ChatResponse chatResponse;
344+
345+
/**
346+
* Constructs a new IntermediateResponseEvent with the given chat response.
347+
*
348+
* @param chatResponse the intermediate chat response containing the AI's thinking,
349+
* tool execution requests, and metadata for this turn
350+
*/
351+
public IntermediateResponseEvent(ChatResponse chatResponse) {
352+
super(ChatEventType.IntermediateResponse);
353+
this.chatResponse = chatResponse;
354+
}
355+
356+
/**
357+
* Returns the intermediate chat response.
358+
*
359+
* @return the chat response containing thinking, tool requests, and attributes for this turn
360+
*/
361+
public ChatResponse getChatResponse() {
362+
return chatResponse;
363+
}
364+
}
365+
304366
}

core/runtime/src/main/java/io/quarkiverse/langchain4j/runtime/aiservice/QuarkusAiServiceStreamingResponseHandler.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public class QuarkusAiServiceStreamingResponseHandler implements StreamingChatRe
5959
private final Consumer<PartialThinking> partialThinkingHandler;
6060
private final Consumer<Response<AiMessage>> completionHandler;
6161
private final Consumer<BeforeToolExecution> beforeToolExecutionHandler;
62+
private final Consumer<ChatResponse> intermediateResponseHandler;
6263
private final Consumer<ToolExecution> toolExecuteHandler;
6364
private final Consumer<ChatResponse> completeResponseHandler;
6465
private final Consumer<Throwable> errorHandler;
@@ -79,6 +80,7 @@ public class QuarkusAiServiceStreamingResponseHandler implements StreamingChatRe
7980
Consumer<String> partialResponseHandler,
8081
Consumer<PartialThinking> partialThinkingHandler,
8182
Consumer<BeforeToolExecution> beforeToolExecutionHandler,
83+
Consumer<ChatResponse> intermediateResponseHandler,
8284
Consumer<ToolExecution> toolExecuteHandler,
8385
Consumer<ChatResponse> completeResponseHandler,
8486
Consumer<Response<AiMessage>> completionHandler,
@@ -97,6 +99,7 @@ public class QuarkusAiServiceStreamingResponseHandler implements StreamingChatRe
9799
this.partialResponseHandler = ensureNotNull(partialResponseHandler, "partialResponseHandler");
98100
this.partialThinkingHandler = partialThinkingHandler;
99101
this.beforeToolExecutionHandler = beforeToolExecutionHandler;
102+
this.intermediateResponseHandler = intermediateResponseHandler;
100103
this.completeResponseHandler = completeResponseHandler;
101104
this.completionHandler = completionHandler;
102105
this.toolExecuteHandler = toolExecuteHandler;
@@ -125,6 +128,7 @@ public QuarkusAiServiceStreamingResponseHandler(AiServiceContext context, Invoca
125128
Consumer<String> partialResponseHandler,
126129
Consumer<PartialThinking> partialThinkingHandler,
127130
Consumer<BeforeToolExecution> beforeToolExecutionHandler,
131+
Consumer<ChatResponse> intermediateResponseHandler,
128132
Consumer<ToolExecution> toolExecuteHandler, Consumer<ChatResponse> completeResponseHandler,
129133
Consumer<Response<AiMessage>> completionHandler,
130134
Consumer<Throwable> errorHandler, List<ChatMessage> temporaryMemory, TokenUsage sum,
@@ -137,6 +141,7 @@ public QuarkusAiServiceStreamingResponseHandler(AiServiceContext context, Invoca
137141
this.partialResponseHandler = ensureNotNull(partialResponseHandler, "partialResponseHandler");
138142
this.partialThinkingHandler = partialThinkingHandler;
139143
this.beforeToolExecutionHandler = beforeToolExecutionHandler;
144+
this.intermediateResponseHandler = intermediateResponseHandler;
140145
this.toolExecuteHandler = toolExecuteHandler;
141146
this.completeResponseHandler = completeResponseHandler;
142147
this.completionHandler = completionHandler;
@@ -245,6 +250,9 @@ public void onCompleteResponse(ChatResponse completeResponse) {
245250
@Override
246251
public void run() {
247252
addToMemory(aiMessage);
253+
if (intermediateResponseHandler != null) {
254+
intermediateResponseHandler.accept(completeResponse);
255+
}
248256
for (ToolExecutionRequest toolExecutionRequest : aiMessage.toolExecutionRequests()) {
249257
// Call before tool execution handler
250258
if (beforeToolExecutionHandler != null) {
@@ -297,6 +305,7 @@ public void run() {
297305
partialResponseHandler,
298306
partialThinkingHandler,
299307
beforeToolExecutionHandler,
308+
intermediateResponseHandler,
300309
toolExecuteHandler,
301310
completeResponseHandler,
302311
completionHandler,

core/runtime/src/main/java/io/quarkiverse/langchain4j/runtime/aiservice/QuarkusAiServiceTokenStream.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public class QuarkusAiServiceTokenStream implements TokenStream {
5353
private Consumer<Throwable> errorHandler;
5454
private Consumer<Response<AiMessage>> completionHandler;
5555
private Consumer<BeforeToolExecution> beforeToolExecutionHandler;
56+
private Consumer<ChatResponse> intermediateResponseHandler;
5657
private Consumer<ToolExecution> toolExecuteHandler;
5758
private Consumer<ChatResponse> completeResponseHandler;
5859

@@ -63,6 +64,7 @@ public class QuarkusAiServiceTokenStream implements TokenStream {
6364
private int onErrorInvoked;
6465
private int ignoreErrorsInvoked;
6566
private int beforeToolExecutionInvoked;
67+
private int onIntermediateResponseInvoked;
6668
private int toolExecuteInvoked;
6769

6870
public QuarkusAiServiceTokenStream(List<ChatMessage> messages,
@@ -120,6 +122,13 @@ public TokenStream onToolExecuted(Consumer<ToolExecution> toolExecuteHandler) {
120122
return this;
121123
}
122124

125+
@Override
126+
public TokenStream onIntermediateResponse(Consumer<ChatResponse> intermediateResponseHandler) {
127+
this.intermediateResponseHandler = intermediateResponseHandler;
128+
this.onIntermediateResponseInvoked++;
129+
return this;
130+
}
131+
123132
@Override
124133
public TokenStream onCompleteResponse(Consumer<ChatResponse> completionHandler) {
125134
this.completeResponseHandler = completionHandler;
@@ -156,6 +165,7 @@ public void start() {
156165
partialResponseHandler,
157166
partialThinkingHandler,
158167
beforeToolExecutionHandler,
168+
intermediateResponseHandler,
159169
toolExecuteHandler,
160170
completeResponseHandler,
161171
completionHandler,
@@ -209,6 +219,10 @@ private void validateConfiguration() {
209219
throw new IllegalConfigurationException("onPartialThinking can be invoked on TokenStream at most 1 time");
210220
}
211221

222+
if (onIntermediateResponseInvoked > 1) {
223+
throw new IllegalConfigurationException("onIntermediateResponse can be invoked on TokenStream at most 1 time");
224+
}
225+
212226
if (onErrorInvoked + ignoreErrorsInvoked != 1) {
213227
throw new IllegalConfigurationException("One of [onError, ignoreErrors] " +
214228
"must be invoked on TokenStream exactly 1 time");

core/runtime/src/main/java/io/quarkiverse/langchain4j/runtime/aiservice/TokenStreamMulti.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ private void createTokenStream(UnicastProcessor<ChatEvent> processor) {
7070
processor.onComplete();
7171
})
7272
.onRetrieved(content -> processor.onNext(new ChatEvent.ContentFetchedEvent(content)))
73+
.onIntermediateResponse(response -> processor.onNext(new ChatEvent.IntermediateResponseEvent(response)))
7374
.beforeToolExecution(
7475
beforeExecution -> processor.onNext(new ChatEvent.BeforeToolExecutionEvent(beforeExecution.request())))
7576
.onToolExecuted(execution -> processor.onNext(new ChatEvent.ToolExecutedEvent(execution)))

docs/modules/ROOT/pages/guide-streamed-responses.adoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ When using a `Multi<ChatEvent>` each emitted item is a subclass of `ChatEvent` w
6666
* `BeforeToolExecutionEvent` - Emitted before a tool is executed
6767
* `ToolExecutedEvent` - Tool execution metadata
6868
* `ContentFetchedEvent` - Content retrieved from RAG sources
69+
* `IntermediateResponseEvent` - An intermediate response during multi-turn interactions with tool execution
6970
* `ChatCompletedEvent` - The final LLM response
7071

71-
NOTE: When tools are used, the event flow is: `BeforeToolExecutionEvent` → (tool execution) → `ToolExecutedEvent`. This allows monitoring both before and after tool execution phases.
72+
NOTE: When tools are used, the event flow is: `IntermediateResponseEvent` → `BeforeToolExecutionEvent` → (tool execution) → `ToolExecutedEvent` → (recursive LLM call) → `ChatCompletedEvent`. The `IntermediateResponseEvent` captures the AI's thinking and tool execution requests before tools are executed.
7273

7374
[source,java]
7475
----

0 commit comments

Comments
 (0)