File tree Expand file tree Collapse file tree 4 files changed +67
-1
lines changed
packages/agents-realtime/src Expand file tree Collapse file tree 4 files changed +67
-1
lines changed Original file line number Diff line number Diff line change
1
+ ---
2
+ ' @openai/agents-realtime ' : patch
3
+ ---
4
+
5
+ Fixes issue #106 where overlapping user inputs caused null transcripts in history
Original file line number Diff line number Diff line change @@ -35,6 +35,7 @@ import { RealtimeAgent } from './realtimeAgent';
35
35
import { RealtimeSessionEventTypes } from './realtimeSessionEvents' ;
36
36
import type { ApiKey , RealtimeTransportLayer } from './transportLayer' ;
37
37
import type { TransportToolCallEvent } from './transportLayerEvents' ;
38
+ import type { InputAudioTranscriptionCompletedEvent } from './transportLayerEvents' ;
38
39
import {
39
40
getLastTextFromAudioOutputMessage ,
40
41
hasWebRTCSupport ,
@@ -508,6 +509,26 @@ export class RealtimeSession<
508
509
#setEventListeners( ) {
509
510
this . #transport. on ( '*' , ( event ) => {
510
511
this . emit ( 'transport_event' , event ) ;
512
+ // Handle completed user transcription events
513
+ if (
514
+ event . type === 'conversation.item.input_audio_transcription.completed'
515
+ ) {
516
+ try {
517
+ const completedEvent = event as InputAudioTranscriptionCompletedEvent ;
518
+ this . #history = updateRealtimeHistory (
519
+ this . #history,
520
+ completedEvent ,
521
+ this . #shouldIncludeAudioData,
522
+ ) ;
523
+ this . #context. context . history = this . #history;
524
+ this . emit ( 'history_updated' , this . #history) ;
525
+ } catch ( err ) {
526
+ this . emit ( 'error' , {
527
+ type : 'error' ,
528
+ error : err ,
529
+ } ) ;
530
+ }
531
+ }
511
532
} ) ;
512
533
this . #transport. on ( 'audio' , ( event ) => {
513
534
this . emit ( 'audio' , event ) ;
Original file line number Diff line number Diff line change @@ -30,6 +30,16 @@ export type TransportLayerAudio = {
30
30
responseId : string ;
31
31
} ;
32
32
33
+ /**
34
+ * Event representing the completion of user audio transcription.
35
+ * Contains the finalized transcript string and the ID of the associated item.
36
+ */
37
+ export type InputAudioTranscriptionCompletedEvent = {
38
+ type : 'conversation.item.input_audio_transcription.completed' ;
39
+ item_id : string ;
40
+ transcript : string ;
41
+ } ;
42
+
33
43
export type TransportLayerTranscriptDelta = {
34
44
type : 'transcript_delta' ;
35
45
itemId : string ;
@@ -46,6 +56,7 @@ export type ConnectionStatus = 'connecting' | 'connected' | 'disconnected';
46
56
export type TransportEvent =
47
57
| TransportError
48
58
| TransportToolCallEvent
59
+ | InputAudioTranscriptionCompletedEvent
49
60
| {
50
61
type : string ;
51
62
[ key : string ] : any ;
Original file line number Diff line number Diff line change 1
1
import { RealtimeItem , RealtimeMessageItem } from './items' ;
2
+ import type { InputAudioTranscriptionCompletedEvent } from './transportLayerEvents' ;
2
3
import METADATA from './metadata' ;
3
4
4
5
/**
@@ -182,9 +183,37 @@ export function removeAudioFromContent(
182
183
*/
183
184
export function updateRealtimeHistory (
184
185
history : RealtimeItem [ ] ,
185
- event : RealtimeItem ,
186
+ event : RealtimeItem | InputAudioTranscriptionCompletedEvent ,
186
187
shouldIncludeAudioData : boolean ,
187
188
) : RealtimeItem [ ] {
189
+ // Merge transcript into placeholder input_audio message
190
+ if ( event . type === 'conversation.item.input_audio_transcription.completed' ) {
191
+ return history . map ( ( item ) => {
192
+ if (
193
+ item . itemId === event . item_id &&
194
+ item . type === 'message' &&
195
+ 'role' in item &&
196
+ item . role === 'user'
197
+ ) {
198
+ const updatedContent = item . content . map ( ( entry : any ) => {
199
+ if ( entry . type === 'input_audio' ) {
200
+ return {
201
+ ...entry ,
202
+ transcript : event . transcript ,
203
+ } ;
204
+ }
205
+ return entry ;
206
+ } ) ;
207
+
208
+ return {
209
+ ...item ,
210
+ content : updatedContent ,
211
+ status : 'completed' ,
212
+ } ;
213
+ }
214
+ return item ;
215
+ } ) ;
216
+ }
188
217
const newEvent =
189
218
! shouldIncludeAudioData && event . type === 'message'
190
219
? removeAudioFromContent ( event as any )
You can’t perform that action at this time.
0 commit comments