Skip to content

Commit e90e3b1

Browse files
committed
Publish unified agent status
1 parent eaf21fc commit e90e3b1

File tree

3 files changed

+164
-15
lines changed

3 files changed

+164
-15
lines changed

coffee_ws/src/coffee_voice_agent/coffee_voice_agent/voice_agent_bridge.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from rclpy.callback_groups import ReentrantCallbackGroup
2222
from std_msgs.msg import String, Bool
2323
from geometry_msgs.msg import Twist
24-
from coffee_voice_agent_msgs.msg import TtsEvent, AgentState, EmotionState, ConversationItem
24+
from coffee_voice_agent_msgs.msg import TtsEvent, AgentState, EmotionState, ConversationItem, AgentStatus, ToolEvent
2525

2626
try:
2727
import websockets
@@ -84,6 +84,20 @@ def __init__(self):
8484
callback_group=self.callback_group
8585
)
8686

87+
self.agent_status_pub = self.create_publisher(
88+
AgentStatus,
89+
'voice_agent/status',
90+
10,
91+
callback_group=self.callback_group
92+
)
93+
94+
self.tool_event_pub = self.create_publisher(
95+
ToolEvent,
96+
'voice_agent/tool_events',
97+
10,
98+
callback_group=self.callback_group
99+
)
100+
87101
self.connected_pub = self.create_publisher(
88102
Bool,
89103
'voice_agent/connected',
@@ -259,6 +273,63 @@ async def _handle_websocket_message(self, message: str):
259273
tts_msg.timestamp = self.get_clock().now().to_msg()
260274
self.tts_events_pub.publish(tts_msg)
261275

276+
elif message_type == 'AGENT_STATUS':
277+
# Handle unified agent status events
278+
status_data = data.get('data', {})
279+
280+
self.get_logger().info(f"Agent Status: mode={status_data.get('behavioral_mode', 'unknown')}, speech={status_data.get('speech_status', 'unknown')}, emotion={status_data.get('emotion', 'unknown')}")
281+
282+
# Publish unified agent status to ROS2 topic
283+
status_msg = AgentStatus()
284+
status_msg.behavioral_mode = status_data.get('behavioral_mode', 'unknown')
285+
status_msg.speech_status = status_data.get('speech_status', 'unknown')
286+
status_msg.emotion = status_data.get('emotion', 'unknown')
287+
status_msg.speech_text = status_data.get('speech_text', '')
288+
status_msg.previous_emotion = status_data.get('previous_emotion', 'unknown')
289+
status_msg.conversation_phase = status_data.get('conversation_phase', '')
290+
status_msg.last_tool_used = status_data.get('last_tool_used', '')
291+
292+
# Parse timestamp if provided, otherwise use current time
293+
timestamp_str = status_data.get('timestamp')
294+
if timestamp_str:
295+
try:
296+
dt = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
297+
status_msg.timestamp.sec = int(dt.timestamp())
298+
status_msg.timestamp.nanosec = int((dt.timestamp() % 1) * 1e9)
299+
except:
300+
status_msg.timestamp = self.get_clock().now().to_msg()
301+
else:
302+
status_msg.timestamp = self.get_clock().now().to_msg()
303+
304+
self.agent_status_pub.publish(status_msg)
305+
306+
elif message_type == 'TOOL_EVENT':
307+
# Handle function tool events
308+
tool_data = data.get('data', {})
309+
310+
self.get_logger().info(f"Tool Event: {tool_data.get('tool_name', 'unknown')} - {tool_data.get('status', 'unknown')}")
311+
312+
# Publish tool event to ROS2 topic
313+
tool_msg = ToolEvent()
314+
tool_msg.tool_name = tool_data.get('tool_name', 'unknown')
315+
tool_msg.parameters = tool_data.get('parameters', [])
316+
tool_msg.result = tool_data.get('result', '')
317+
tool_msg.status = tool_data.get('status', 'unknown')
318+
319+
# Parse timestamp if provided, otherwise use current time
320+
timestamp_str = tool_data.get('timestamp')
321+
if timestamp_str:
322+
try:
323+
dt = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
324+
tool_msg.timestamp.sec = int(dt.timestamp())
325+
tool_msg.timestamp.nanosec = int((dt.timestamp() % 1) * 1e9)
326+
except:
327+
tool_msg.timestamp = self.get_clock().now().to_msg()
328+
else:
329+
tool_msg.timestamp = self.get_clock().now().to_msg()
330+
331+
self.tool_event_pub.publish(tool_msg)
332+
262333
elif message_type == 'ACKNOWLEDGMENT':
263334
# Handle acknowledgment messages from voice agent
264335
status = data.get('status', 'unknown')

coffee_ws/src/coffee_voice_agent/scripts/agents/coffee_barista_agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from state.state_manager import StateManager, AgentState
2424
from tools.coffee_tools import (
2525
get_current_time_impl, get_current_date_impl, get_coffee_menu_impl,
26-
get_ordering_instructions_impl, recommend_drink_impl
26+
get_ordering_instructions_impl, recommend_drink_impl, set_agent_instance
2727
)
2828

2929
logger = logging.getLogger(__name__)
@@ -68,6 +68,9 @@ def __init__(self):
6868
# State management
6969
self.state_manager = StateManager(self)
7070

71+
# Set agent instance for tool event tracking
72+
set_agent_instance(self)
73+
7174
# Wake word detection setup
7275
self.porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY")
7376
self.porcupine = None

coffee_ws/src/coffee_voice_agent/scripts/state/state_manager.py

Lines changed: 88 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,16 @@ def __init__(self, agent=None):
4040
self.ctx = None
4141
self.agent = agent
4242
self.current_emotion = "waiting" # Track current emotional state
43+
self.previous_emotion = "" # Track previous emotion for smooth transitions
4344
self.emotion_history = [] # Log emotional journey
4445
self.ending_conversation = False # Flag to prevent timer conflicts during goodbye
4546

4647
# Text tracking for TTS events
4748
self.current_speech_preview = "" # Preview text for "started" events
4849
self.current_speech_full_text = "" # Accumulated full text for "finished" events
50+
51+
# Tool tracking
52+
self.last_tool_used = "" # Last function tool called
4953
self.virtual_request_queue = [] # Queue for virtual coffee requests
5054
self.announcing_virtual_request = False # Flag to prevent conflicts during announcements
5155
self.recent_greetings = [] # Track recent greetings to avoid repetition
@@ -67,6 +71,23 @@ async def transition_to_state(self, new_state: AgentState):
6771
await self._exit_current_state()
6872
self.current_state = new_state
6973
await self._enter_new_state()
74+
75+
# Send agent status update for behavioral mode changes
76+
behavioral_mode_map = {
77+
AgentState.DORMANT: "dormant",
78+
AgentState.CONNECTING: "connecting",
79+
AgentState.ACTIVE: "active",
80+
AgentState.DISCONNECTING: "disconnecting"
81+
}
82+
83+
behavioral_mode = behavioral_mode_map.get(new_state, "dormant")
84+
85+
# Determine conversation phase for greetings
86+
conversation_phase = ""
87+
if new_state == AgentState.ACTIVE:
88+
conversation_phase = "greeting"
89+
90+
await self._send_agent_status(behavioral_mode, "idle", conversation_phase)
7091

7192
async def _exit_current_state(self):
7293
"""Clean up current state"""
@@ -262,23 +283,26 @@ def on_agent_state_changed(event):
262283
"""Handle agent state changes (initializing/listening/thinking/speaking)"""
263284
logger.info(f"🔍 DEBUG: agent_state_changed: {event.old_state}{event.new_state}")
264285

265-
# Send TTS events based on state transitions
286+
# Send unified agent status based on state transitions
266287
async def handle_state_change():
267288
try:
289+
# Map LiveKit agent states to our behavioral modes
290+
current_behavioral_mode = "dormant" # Default
291+
if self.current_state == AgentState.ACTIVE:
292+
current_behavioral_mode = "active"
293+
elif self.current_state == AgentState.CONNECTING:
294+
current_behavioral_mode = "connecting"
295+
elif self.current_state == AgentState.DISCONNECTING:
296+
current_behavioral_mode = "disconnecting"
297+
268298
if event.new_state == "speaking":
269-
logger.info("🔍 DEBUG: Agent started speaking - sending TTS started event")
270-
current_emotion = self.current_emotion
271-
# Use preview text for started event
272-
text_to_send = self.current_speech_preview or "Agent Response"
273-
await self._send_tts_event("started", text_to_send, current_emotion, "session")
299+
logger.info("🔍 DEBUG: Agent started speaking - sending agent status")
300+
await self._send_agent_status(current_behavioral_mode, "speaking")
274301
elif event.old_state == "speaking" and event.new_state != "speaking":
275-
logger.info("🔍 DEBUG: Agent stopped speaking - sending TTS finished event")
276-
current_emotion = self.current_emotion
277-
# Use full accumulated text for finished event
278-
text_to_send = self.current_speech_full_text or "Agent Response"
279-
await self._send_tts_event("finished", text_to_send, current_emotion, "session")
302+
logger.info("🔍 DEBUG: Agent stopped speaking - sending agent status")
303+
await self._send_agent_status(current_behavioral_mode, "idle")
280304
except Exception as e:
281-
logger.error(f"Error handling agent state change TTS events: {e}")
305+
logger.error(f"Error handling agent state change status events: {e}")
282306

283307
asyncio.create_task(handle_state_change())
284308

@@ -482,6 +506,7 @@ def process_emotional_response(self, llm_response: str) -> tuple[str, str]:
482506
if emotion != self.current_emotion:
483507
logger.info(f"🎭 Emotion transition: {self.current_emotion}{emotion}")
484508
self.log_animated_eyes(emotion)
509+
self.previous_emotion = self.current_emotion # Store previous before updating
485510
self.current_emotion = emotion
486511

487512
# Store in emotion history
@@ -561,4 +586,54 @@ async def _send_tts_event(self, event: str, text: str, emotion: str, source: str
561586
}
562587
await self.agent._send_websocket_event("TTS_EVENT", event_data)
563588
else:
564-
logger.debug(f"Cannot send TTS {event} event - no agent WebSocket connection")
589+
logger.debug(f"Cannot send TTS {event} event - no agent WebSocket connection")
590+
591+
async def _send_agent_status(self, behavioral_mode: str, speech_status: str, conversation_phase: str = ""):
592+
"""Send unified agent status through agent's WebSocket connection"""
593+
if self.agent and hasattr(self.agent, '_send_websocket_event'):
594+
# Determine conversation phase if not provided
595+
if not conversation_phase:
596+
if self.announcing_virtual_request:
597+
conversation_phase = "announcement"
598+
elif behavioral_mode == "active":
599+
conversation_phase = "discussion"
600+
# else conversation_phase remains empty for dormant
601+
602+
# Get current speech text based on speech status
603+
speech_text = ""
604+
if speech_status == "speaking":
605+
speech_text = self.current_speech_preview or ""
606+
elif speech_status == "idle" and self.current_speech_full_text:
607+
speech_text = self.current_speech_full_text
608+
609+
status_data = {
610+
"behavioral_mode": behavioral_mode,
611+
"speech_status": speech_status,
612+
"emotion": self.current_emotion,
613+
"speech_text": speech_text,
614+
"previous_emotion": getattr(self, 'previous_emotion', ''),
615+
"conversation_phase": conversation_phase,
616+
"last_tool_used": self.last_tool_used,
617+
"timestamp": datetime.now().isoformat()
618+
}
619+
await self.agent._send_websocket_event("AGENT_STATUS", status_data)
620+
else:
621+
logger.debug(f"Cannot send agent status - no agent WebSocket connection")
622+
623+
async def _send_tool_event(self, tool_name: str, status: str, parameters: list = None, result: str = ""):
624+
"""Send tool event through agent's WebSocket connection"""
625+
if self.agent and hasattr(self.agent, '_send_websocket_event'):
626+
# Update last tool used when tool starts
627+
if status == "started":
628+
self.last_tool_used = tool_name
629+
630+
tool_data = {
631+
"tool_name": tool_name,
632+
"status": status,
633+
"parameters": parameters or [],
634+
"result": result,
635+
"timestamp": datetime.now().isoformat()
636+
}
637+
await self.agent._send_websocket_event("TOOL_EVENT", tool_data)
638+
else:
639+
logger.debug(f"Cannot send tool event - no agent WebSocket connection")

0 commit comments

Comments
 (0)