@@ -97,6 +97,11 @@ def __init__(self, name: str):
9797 self .total_recv_audio_duration = 0
9898 self .total_recv_audio_chunks_len = 0
9999
100+ # Tracks which request_id's audio is currently being sent
101+ # Set in send_tts_audio_start(), reset in send_tts_audio_end() and flush
102+ # Used by send_tts_audio_data() to attach correct metadata to audio frames
103+ self .current_audio_request_id = None
104+
100105 def _can_transition_to (self , request_id : str , new_state : RequestState ) -> bool :
101106 """Check if state transition is valid."""
102107 current_state = self .request_states .get (request_id )
@@ -298,8 +303,9 @@ async def _flush_input_items(self):
298303 self .request_states .clear ()
299304 self .metadatas .clear ()
300305
301- # Reset processing request ID
306+ # Reset processing request ID and current audio request ID
302307 self ._processing_request_id = None
308+ self .current_audio_request_id = None
303309
304310 self .ten_env .log_debug ("Cleared all request states, metadata, and pending messages after flush" )
305311
@@ -405,6 +411,7 @@ async def send_tts_audio_data(self, audio_data: bytes, timestamp: int = 0) -> No
405411 )
406412 f .alloc_buf (len (combined_data ))
407413 f .set_timestamp (timestamp )
414+ f .set_property_from_json ("metadata" , json .dumps (self .metadatas .get (self .current_audio_request_id , {})))
408415 buff = f .lock_buf ()
409416 buff [:] = combined_data
410417 f .unlock_buf (buff )
@@ -443,6 +450,9 @@ async def send_tts_ttfb_metrics(
443450 async def send_tts_audio_start (
444451 self , request_id : str , turn_id : int = - 1 , extra_metadata : dict | None = None
445452 ) -> None :
453+ # Set current_audio_request_id to track which request's audio is being sent
454+ self .current_audio_request_id = request_id
455+
446456 new_metadata = self .update_metadata (request_id , extra_metadata )
447457
448458 data = Data .create ("tts_audio_start" )
@@ -489,6 +499,10 @@ async def send_tts_audio_end(
489499 # Clean up metadata when audio_end is sent
490500 self .metadatas .pop (request_id , None )
491501
502+ # Reset current_audio_request_id (audio phase complete)
503+ if self .current_audio_request_id == request_id :
504+ self .current_audio_request_id = None
505+
492506 async def send_tts_error (
493507 self ,
494508 request_id : str | None ,
@@ -670,6 +684,10 @@ async def finish_request(
670684 # This is a defensive cleanup in case audio_end wasn't sent
671685 self .metadatas .pop (request_id , None )
672686
687+ # Defensive reset of current_audio_request_id for error paths
688+ if self .current_audio_request_id == request_id :
689+ self .current_audio_request_id = None
690+
673691 # Handle request completion and buffered messages release
674692 # Only process if this is the currently processing request
675693 if self ._processing_request_id == request_id :
0 commit comments