@@ -258,6 +258,22 @@ def on_agent_state_changed(event):
258258 """Handle agent state changes (initializing/listening/thinking/speaking)"""
259259 logger .info (f"π DEBUG: agent_state_changed: { event .old_state } β { event .new_state } " )
260260
261+ # Send TTS events based on state transitions
262+ async def handle_state_change ():
263+ try :
264+ if event .new_state == "speaking" :
265+ logger .info ("π DEBUG: Agent started speaking - sending TTS started event" )
266+ current_emotion = self .current_emotion
267+ await self ._send_tts_event ("started" , "Agent Response" , current_emotion , "session" )
268+ elif event .old_state == "speaking" and event .new_state != "speaking" :
269+ logger .info ("π DEBUG: Agent stopped speaking - sending TTS finished event" )
270+ current_emotion = self .current_emotion
271+ await self ._send_tts_event ("finished" , "Agent Response" , current_emotion , "session" )
272+ except Exception as e :
273+ logger .error (f"Error handling agent state change TTS events: { e } " )
274+
275+ asyncio .create_task (handle_state_change ())
276+
261277 if event .new_state == "speaking" :
262278 logger .info ("π DEBUG: Agent started speaking" )
263279 elif event .new_state == "listening" :
@@ -486,10 +502,10 @@ async def say_with_emotion(self, text: str, emotion: str = None):
486502 logger .info (f"π DEBUG: say_with_emotion emotion: { emotion } " )
487503
488504 if self .session :
489- # Send TTS_STARTED event
490- logger .info ("π DEBUG: About to send TTS_STARTED event" )
491- await self ._send_tts_event ("started" , text , emotion or self .current_emotion , "manual" )
492- logger .info ("π DEBUG: TTS_STARTED event sent successfully" )
505+ # Send TTS_STARTED event - COMMENTED OUT to prevent duplicates (using agent_state_changed instead)
506+ # logger.info("π DEBUG: About to send TTS_STARTED event")
507+ # await self._send_tts_event("started", text, emotion or self.current_emotion, "manual")
508+ # logger.info("π DEBUG: TTS_STARTED event sent successfully")
493509
494510 logger .info ("π DEBUG: Calling session.say() directly (bypasses llm_node)" )
495511 handle = await self .session .say (text )
@@ -504,10 +520,10 @@ async def say_with_emotion(self, text: str, emotion: str = None):
504520 logger .error (f"π DEBUG: handle.wait_for_playout() failed: { e } " )
505521 raise
506522
507- # Send TTS_FINISHED event
508- logger .info ("π DEBUG: About to send TTS_FINISHED event" )
509- await self ._send_tts_event ("finished" , text , emotion or self .current_emotion , "manual" )
510- logger .info ("π DEBUG: TTS_FINISHED event sent successfully" )
523+ # Send TTS_FINISHED event - COMMENTED OUT to prevent duplicates (using agent_state_changed instead)
524+ # logger.info("π DEBUG: About to send TTS_FINISHED event")
525+ # await self._send_tts_event("finished", text, emotion or self.current_emotion, "manual")
526+ # logger.info("π DEBUG: TTS_FINISHED event sent successfully")
511527
512528 if emotion :
513529 logger .info (f"π Speaking with emotion: { emotion } " )
0 commit comments