Skip to content

Commit 6e0d1bd

Browse files
authored
fix: merge completed input_audio transcripts into realtime history (#151)
1 parent 9f7ab01 commit 6e0d1bd

File tree

4 files changed

+67
-1
lines changed

4 files changed

+67
-1
lines changed

.changeset/salty-actors-grin.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@openai/agents-realtime': patch
3+
---
4+
5+
Fixes issue #106 where overlapping user inputs caused null transcripts in history

packages/agents-realtime/src/realtimeSession.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import { RealtimeAgent } from './realtimeAgent';
3535
import { RealtimeSessionEventTypes } from './realtimeSessionEvents';
3636
import type { ApiKey, RealtimeTransportLayer } from './transportLayer';
3737
import type { TransportToolCallEvent } from './transportLayerEvents';
38+
import type { InputAudioTranscriptionCompletedEvent } from './transportLayerEvents';
3839
import {
3940
getLastTextFromAudioOutputMessage,
4041
hasWebRTCSupport,
@@ -508,6 +509,26 @@ export class RealtimeSession<
508509
#setEventListeners() {
509510
this.#transport.on('*', (event) => {
510511
this.emit('transport_event', event);
512+
// Handle completed user transcription events
513+
if (
514+
event.type === 'conversation.item.input_audio_transcription.completed'
515+
) {
516+
try {
517+
const completedEvent = event as InputAudioTranscriptionCompletedEvent;
518+
this.#history = updateRealtimeHistory(
519+
this.#history,
520+
completedEvent,
521+
this.#shouldIncludeAudioData,
522+
);
523+
this.#context.context.history = this.#history;
524+
this.emit('history_updated', this.#history);
525+
} catch (err) {
526+
this.emit('error', {
527+
type: 'error',
528+
error: err,
529+
});
530+
}
531+
}
511532
});
512533
this.#transport.on('audio', (event) => {
513534
this.emit('audio', event);

packages/agents-realtime/src/transportLayerEvents.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ export type TransportLayerAudio = {
3030
responseId: string;
3131
};
3232

33+
/**
34+
* Event representing the completion of user audio transcription.
35+
* Contains the finalized transcript string and the ID of the associated item.
36+
*/
37+
export type InputAudioTranscriptionCompletedEvent = {
38+
type: 'conversation.item.input_audio_transcription.completed';
39+
item_id: string;
40+
transcript: string;
41+
};
42+
3343
export type TransportLayerTranscriptDelta = {
3444
type: 'transcript_delta';
3545
itemId: string;
@@ -46,6 +56,7 @@ export type ConnectionStatus = 'connecting' | 'connected' | 'disconnected';
4656
export type TransportEvent =
4757
| TransportError
4858
| TransportToolCallEvent
59+
| InputAudioTranscriptionCompletedEvent
4960
| {
5061
type: string;
5162
[key: string]: any;

packages/agents-realtime/src/utils.ts

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { RealtimeItem, RealtimeMessageItem } from './items';
2+
import type { InputAudioTranscriptionCompletedEvent } from './transportLayerEvents';
23
import METADATA from './metadata';
34

45
/**
@@ -182,9 +183,37 @@ export function removeAudioFromContent(
182183
*/
183184
export function updateRealtimeHistory(
184185
history: RealtimeItem[],
185-
event: RealtimeItem,
186+
event: RealtimeItem | InputAudioTranscriptionCompletedEvent,
186187
shouldIncludeAudioData: boolean,
187188
): RealtimeItem[] {
189+
// Merge transcript into placeholder input_audio message
190+
if (event.type === 'conversation.item.input_audio_transcription.completed') {
191+
return history.map((item) => {
192+
if (
193+
item.itemId === event.item_id &&
194+
item.type === 'message' &&
195+
'role' in item &&
196+
item.role === 'user'
197+
) {
198+
const updatedContent = item.content.map((entry: any) => {
199+
if (entry.type === 'input_audio') {
200+
return {
201+
...entry,
202+
transcript: event.transcript,
203+
};
204+
}
205+
return entry;
206+
});
207+
208+
return {
209+
...item,
210+
content: updatedContent,
211+
status: 'completed',
212+
};
213+
}
214+
return item;
215+
});
216+
}
188217
const newEvent =
189218
!shouldIncludeAudioData && event.type === 'message'
190219
? removeAudioFromContent(event as any)

0 commit comments

Comments
 (0)