@@ -37,6 +37,7 @@ def __init__(self) -> None:
3737 self .analytics_client = AnalyticsClient ()
3838 self .traces_flow_manager : Optional [TracesFlowManager ] = None
3939 self .playground : bool = False
40+
4041 def set_session_id (self , session_id : str ):
4142 """Set the session ID for metrics tracking"""
4243 self .analytics_client .set_session_id (session_id )
@@ -98,44 +99,56 @@ async def _start_new_interaction(self) -> None:
9899 ** RealtimeMetricsCollector ._agent_info
99100 )
100101 self .turns .append (self .current_turn )
102+ self .last_user_activity_time = None
103+
104+ def mark_user_activity (self , timestamp : Optional [float ] = None ) -> None :
105+ """Mark the time of the last user activity (e.g. transcription received)"""
106+ self .last_user_activity_time = timestamp if timestamp is not None else time .perf_counter ()
101107
102108 async def set_user_speech_start (self ) -> None :
103- if self .current_turn :
109+ if self .current_turn and self .current_turn .agent_speech_start_time is not None and self .current_turn .agent_speech_end_time is None :
110+ await self .set_interrupted ()
111+ if self .current_turn and (self .current_turn .user_speech_start_time is not None ) and (self .current_turn .user_speech_end_time is not None ):
104112 self ._finalize_interaction_and_send ()
105113
106114 await self ._start_new_interaction ()
107115 if self .current_turn and self .current_turn .user_speech_start_time is None :
108116 self .current_turn .user_speech_start_time = time .perf_counter ()
109- await self .start_timeline_event ("user_speech" )
110-
111- async def set_user_speech_end (self ) -> None :
112- if self .current_turn and self .current_turn .user_speech_end_time is None :
113- self .current_turn .user_speech_end_time = time .perf_counter ()
114- await self .end_timeline_event ("user_speech" )
117+ await self .start_timeline_event ("user_speech" , self .current_turn .user_speech_start_time )
118+
119+ async def set_user_speech_end (self , timestamp : Optional [float ] = None ) -> None :
120+ if self .current_turn and (self .current_turn .user_speech_start_time is not None ) and (self .current_turn .user_speech_end_time is None ):
121+ if timestamp is not None :
122+ self .current_turn .user_speech_end_time = timestamp
123+ elif self .last_user_activity_time is not None :
124+ self .current_turn .user_speech_end_time = self .last_user_activity_time
125+ else :
126+ self .current_turn .user_speech_end_time = time .perf_counter ()
127+ await self .end_timeline_event ("user_speech" , self .current_turn .user_speech_end_time )
115128
116129 async def set_agent_speech_start (self ) -> None :
117130 if not self .current_turn :
118131 await self ._start_new_interaction ()
119- elif self .current_turn .user_speech_start_time is not None and self .current_turn .user_speech_end_time is None :
120- self .current_turn .user_speech_end_time = time .perf_counter ()
121-
122- await self .end_timeline_event ("user_speech" )
132+ elif (self .current_turn .user_speech_start_time is not None ) and (self .current_turn .user_speech_end_time is None ):
133+ await self .set_user_speech_end ()
123134
124135 if self .current_turn and self .current_turn .agent_speech_start_time is None :
125136 self .current_turn .agent_speech_start_time = time .perf_counter ()
126- await self .start_timeline_event ("agent_speech" )
137+ await self .start_timeline_event ("agent_speech" , self . current_turn . agent_speech_start_time )
127138 if self .agent_speech_end_timer :
128139 self .agent_speech_end_timer .cancel ()
129140
130141 async def set_agent_speech_end (self , timeout : float = 1.0 ) -> None :
131142 if self .current_turn :
143+ if self .current_turn .agent_speech_start_time is None :
144+ return
132145 self .current_turn .agent_speech_end_time = time .perf_counter ()
133146 if self .agent_speech_end_timer :
134147 self .agent_speech_end_timer .cancel ()
135148
136149 loop = asyncio .get_event_loop ()
137150 self .agent_speech_end_timer = loop .call_later (timeout , self ._finalize_interaction_and_send )
138- await self .end_timeline_event ("agent_speech" )
151+ await self .end_timeline_event ("agent_speech" , self . current_turn . agent_speech_end_time )
139152
140153 async def set_a2a_handoff (self ) -> None :
141154 """Set the A2A enabled and handoff occurred flags for the current turn in A2A scenarios."""
@@ -157,12 +170,15 @@ def _finalize_agent_speech(self) -> None:
157170 self .agent_speech_end_timer = None
158171
159172 def _finalize_interaction_and_send (self ) -> None :
173+ if self .agent_speech_end_timer :
174+ self .agent_speech_end_timer .cancel ()
175+ self .agent_speech_end_timer = None
160176 if not self .current_turn :
161177 return
162178
163179 self ._finalize_agent_speech ()
164180
165- if self .current_turn .user_speech_start_time and not self .current_turn .user_speech_end_time :
181+ if ( self .current_turn .user_speech_start_time is not None ) and ( self .current_turn .user_speech_end_time is None ) :
166182 self .current_turn .user_speech_end_time = time .perf_counter ()
167183
168184 current_time = time .perf_counter ()
@@ -216,19 +232,18 @@ async def add_timeline_event(self, event: TimelineEvent) -> None:
216232 if self .current_turn :
217233 self .current_turn .timeline .append (event )
218234
219- async def start_timeline_event (self , event_type : str ) -> None :
235+ async def start_timeline_event (self , event_type : str , start_time : float ) -> None :
220236 """Start a timeline event with a precise start time"""
221237 if self .current_turn :
222238 event = TimelineEvent (
223239 event_type = event_type ,
224- start_time = time . perf_counter ()
240+ start_time = start_time
225241 )
226242 self .current_turn .timeline .append (event )
227243
228- async def end_timeline_event (self , event_type : str ) -> None :
244+ async def end_timeline_event (self , event_type : str , end_time : float ) -> None :
229245 """End a timeline event and calculate duration"""
230246 if self .current_turn :
231- end_time = time .perf_counter ()
232247 for event in reversed (self .current_turn .timeline ):
233248 if event .event_type == event_type and event .end_time is None :
234249 event .end_time = end_time
@@ -253,10 +268,7 @@ async def set_user_transcript(self, text: str) -> None:
253268 if self .current_turn :
254269 if self .current_turn .user_speech_start_time is None :
255270 self .current_turn .user_speech_start_time = time .perf_counter ()
256- await self .start_timeline_event ("user_speech" )
257- if self .current_turn .user_speech_end_time is None :
258- self .current_turn .user_speech_end_time = time .perf_counter ()
259- await self .end_timeline_event ("user_speech" )
271+ await self .start_timeline_event ("user_speech" , self .current_turn .user_speech_start_time )
260272 logger .info (f"user input speech: { text } " )
261273 await self .update_timeline_event_text ("user_speech" , text )
262274
@@ -265,7 +277,7 @@ async def set_agent_response(self, text: str) -> None:
265277 if self .current_turn :
266278 if self .current_turn .agent_speech_start_time is None :
267279 self .current_turn .agent_speech_start_time = time .perf_counter ()
268- await self .start_timeline_event ("agent_speech" )
280+ await self .start_timeline_event ("agent_speech" , self . current_turn . agent_speech_start_time )
269281 logger .info (f"agent output speech: { text } " )
270282 await self .update_timeline_event_text ("agent_speech" , text )
271283
@@ -276,8 +288,19 @@ def set_realtime_model_error(self, error: Dict[str, Any]) -> None:
276288 self .current_turn .realtime_model_errors .append (error )
277289
278290 async def set_interrupted (self ) -> None :
291+ """
292+ Handle interruption by finalizing the current turn immediately.
293+ Only marks as interrupted if the agent was actually speaking.
294+ """
279295 if self .current_turn :
280- self .current_turn .interrupted = True
296+ if self .current_turn .agent_speech_start_time is not None :
297+ self .current_turn .interrupted = True
298+ if self .current_turn .agent_speech_end_time is None :
299+ self .current_turn .agent_speech_end_time = time .perf_counter ()
300+ await self .end_timeline_event ("agent_speech" , self .current_turn .agent_speech_end_time )
301+ self ._finalize_interaction_and_send ()
302+ else :
303+ logger .debug ("Interrupt signal received but agent hadn't started speaking - ignoring to preserve turn" )
281304
282305 def finalize_session (self ) -> None :
283306 asyncio .run_coroutine_threadsafe (self ._start_new_interaction (), asyncio .get_event_loop ())
0 commit comments