Skip to content

Commit f9c7357

Browse files
committed
Add preview text and final spoken text by agent to the TTS_EVENT
1 parent db91446 commit f9c7357

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

coffee_ws/src/coffee_voice_agent/scripts/agents/coffee_barista_agent.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,14 @@ def __init__(self):
8686
async def tts_node(self, text, model_settings=None):
8787
"""Override TTS node to process delimiter-based responses (emotion:text) with minimal buffering"""
8888

89+
# Initialize text tracking for TTS events
90+
self.state_manager.current_speech_preview = ""
91+
self.state_manager.current_speech_full_text = ""
92+
preview_set = False
93+
8994
# Process text stream with minimal buffering for emotion extraction
9095
async def process_text_stream():
96+
nonlocal preview_set
9197
first_chunk_buffer = ""
9298
emotion_extracted = False
9399
emotion_check_limit = 50 # Only check first 50 characters for emotion delimiter
@@ -130,6 +136,11 @@ async def process_text_stream():
130136
# Immediately yield the text part (no more buffering)
131137
if text_after_delimiter.strip():
132138
logger.info(f"💬 TTS streaming text immediately: {text_after_delimiter[:30]}{'...' if len(text_after_delimiter) > 30 else ''}")
139+
# Accumulate text and set preview
140+
self.state_manager.current_speech_full_text += text_after_delimiter
141+
if not preview_set:
142+
self.state_manager.current_speech_preview = text_after_delimiter[:50] + "..." if len(text_after_delimiter) > 50 else text_after_delimiter
143+
preview_set = True
133144
yield text_after_delimiter
134145
else:
135146
logger.warning("🔍 DEBUG: text_after_delimiter is empty or whitespace - nothing to yield!")
@@ -149,6 +160,11 @@ async def process_text_stream():
149160

150161
# Yield the buffered content immediately
151162
logger.info(f"💬 TTS fallback streaming: {first_chunk_buffer[:30]}{'...' if len(first_chunk_buffer) > 30 else ''}")
163+
# Accumulate text and set preview
164+
self.state_manager.current_speech_full_text += first_chunk_buffer
165+
if not preview_set:
166+
self.state_manager.current_speech_preview = first_chunk_buffer[:50] + "..." if len(first_chunk_buffer) > 50 else first_chunk_buffer
167+
preview_set = True
152168
yield first_chunk_buffer
153169

154170
# If we haven't extracted emotion yet and haven't hit limit, continue buffering
@@ -157,6 +173,11 @@ async def process_text_stream():
157173
else:
158174
# Either emotion already extracted, or we're past the check limit
159175
# Stream everything immediately
176+
# Accumulate text and set preview if not set
177+
self.state_manager.current_speech_full_text += text_chunk
178+
if not preview_set:
179+
self.state_manager.current_speech_preview = text_chunk[:50] + "..." if len(text_chunk) > 50 else text_chunk
180+
preview_set = True
160181
yield text_chunk
161182

162183
# Process the text stream and pass clean text to default TTS

coffee_ws/src/coffee_voice_agent/scripts/state/state_manager.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ def __init__(self, agent=None):
4242
self.current_emotion = "waiting" # Track current emotional state
4343
self.emotion_history = [] # Log emotional journey
4444
self.ending_conversation = False # Flag to prevent timer conflicts during goodbye
45+
46+
# Text tracking for TTS events
47+
self.current_speech_preview = "" # Preview text for "started" events
48+
self.current_speech_full_text = "" # Accumulated full text for "finished" events
4549
self.virtual_request_queue = [] # Queue for virtual coffee requests
4650
self.announcing_virtual_request = False # Flag to prevent conflicts during announcements
4751
self.recent_greetings = [] # Track recent greetings to avoid repetition
@@ -264,11 +268,15 @@ async def handle_state_change():
264268
if event.new_state == "speaking":
265269
logger.info("🔍 DEBUG: Agent started speaking - sending TTS started event")
266270
current_emotion = self.current_emotion
267-
await self._send_tts_event("started", "Agent Response", current_emotion, "session")
271+
# Use preview text for started event
272+
text_to_send = self.current_speech_preview or "Agent Response"
273+
await self._send_tts_event("started", text_to_send, current_emotion, "session")
268274
elif event.old_state == "speaking" and event.new_state != "speaking":
269275
logger.info("🔍 DEBUG: Agent stopped speaking - sending TTS finished event")
270276
current_emotion = self.current_emotion
271-
await self._send_tts_event("finished", "Agent Response", current_emotion, "session")
277+
# Use full accumulated text for finished event
278+
text_to_send = self.current_speech_full_text or "Agent Response"
279+
await self._send_tts_event("finished", text_to_send, current_emotion, "session")
272280
except Exception as e:
273281
logger.error(f"Error handling agent state change TTS events: {e}")
274282

@@ -502,6 +510,10 @@ async def say_with_emotion(self, text: str, emotion: str = None):
502510
logger.info(f"🔍 DEBUG: say_with_emotion emotion: {emotion}")
503511

504512
if self.session:
513+
# Store text for TTS events
514+
self.current_speech_preview = text[:50] + "..." if len(text) > 50 else text
515+
self.current_speech_full_text = text
516+
505517
# Send TTS_STARTED event - COMMENTED OUT to prevent duplicates (using agent_state_changed instead)
506518
# logger.info("🔍 DEBUG: About to send TTS_STARTED event")
507519
# await self._send_tts_event("started", text, emotion or self.current_emotion, "manual")

0 commit comments

Comments
 (0)