Skip to content

Commit 311edf8

Browse files
committed
Add TTS events
1 parent 92876d8 commit 311edf8

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

coffee_ws/src/coffee_voice_agent/coffee_voice_agent/voice_agent_bridge.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,21 @@ async def _handle_websocket_message(self, message: str):
194194
# Handle startup/ready events from voice agent
195195
self.get_logger().info(f"Voice agent startup: {data.get('message', 'Ready')} (version: {data.get('version', 'unknown')})")
196196

197+
elif message_type == 'TTS_EVENT':
198+
# Handle TTS started/finished events - parse nested data structure
199+
event_data = data.get('data', {})
200+
event = event_data.get('event', 'unknown')
201+
emotion = event_data.get('emotion', 'unknown')
202+
source = event_data.get('source', 'unknown')
203+
text = event_data.get('text', '')
204+
text_preview = text[:50] + "..." if len(text) > 50 else text
205+
206+
self.get_logger().info(f"TTS {event}: emotion={emotion}, source={source}, text='{text_preview}'")
207+
208+
# TODO: Publish to ROS2 topics when robot coordination is ready
209+
# self.tts_started_pub.publish(...) for event == "started"
210+
# self.tts_finished_pub.publish(...) for event == "finished"
211+
197212
else:
198213
self.get_logger().warn(f"Unknown message type from voice agent: {message_type}")
199214

coffee_ws/src/coffee_voice_agent/scripts/state/state_manager.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,17 @@ async def say_with_emotion(self, text: str, emotion: str = None):
524524
logger.info(f"🔍 DEBUG: say_with_emotion emotion: {emotion}")
525525

526526
if self.session:
527+
# Send TTS_STARTED event
528+
await self._send_tts_event("started", text, emotion or self.current_emotion, "manual")
529+
527530
logger.info("🔍 DEBUG: Calling session.say() directly (bypasses llm_node)")
528-
await self.session.say(text)
531+
handle = await self.session.say(text)
532+
533+
# Wait for TTS completion
534+
await handle.wait_for_playout()
535+
536+
# Send TTS_FINISHED event
537+
await self._send_tts_event("finished", text, emotion or self.current_emotion, "manual")
529538

530539
if emotion:
531540
logger.info(f"🎭 Speaking with emotion: {emotion}")
@@ -537,4 +546,18 @@ def get_random_greeting(self) -> str:
537546
"""Get a random greeting from the greeting pool"""
538547
from utils.greeting_data import get_random_greeting
539548

540-
return get_random_greeting()
549+
return get_random_greeting()
550+
551+
async def _send_tts_event(self, event: str, text: str, emotion: str, source: str):
552+
"""Send TTS event through agent's WebSocket connection"""
553+
if self.agent and hasattr(self.agent, '_send_websocket_event'):
554+
event_data = {
555+
"event": event,
556+
"text": text[:100] + "..." if len(text) > 100 else text, # Truncate long text
557+
"emotion": emotion,
558+
"source": source,
559+
"timestamp": datetime.now().isoformat()
560+
}
561+
await self.agent._send_websocket_event("TTS_EVENT", event_data)
562+
else:
563+
logger.debug(f"Cannot send TTS {event} event - no agent WebSocket connection")

0 commit comments

Comments
 (0)