Publish unified agent status

kPatch · kPatch · commit e90e3b1bf04d · 2025-07-28T21:45:19.000-04:00
diff --git a/coffee_ws/src/coffee_voice_agent/coffee_voice_agent/voice_agent_bridge.py b/coffee_ws/src/coffee_voice_agent/coffee_voice_agent/voice_agent_bridge.py
@@ -21,7 +21,7 @@
 from rclpy.callback_groups import ReentrantCallbackGroup
 from std_msgs.msg import String, Bool
 from geometry_msgs.msg import Twist
-from coffee_voice_agent_msgs.msg import TtsEvent, AgentState, EmotionState, ConversationItem
+from coffee_voice_agent_msgs.msg import TtsEvent, AgentState, EmotionState, ConversationItem, AgentStatus, ToolEvent
 
 try:
     import websockets
@@ -84,6 +84,20 @@ def __init__(self):
             callback_group=self.callback_group
         )
         
+        self.agent_status_pub = self.create_publisher(
+            AgentStatus,
+            'voice_agent/status',
+            10,
+            callback_group=self.callback_group
+        )
+        
+        self.tool_event_pub = self.create_publisher(
+            ToolEvent,
+            'voice_agent/tool_events',
+            10,
+            callback_group=self.callback_group
+        )
+        
         self.connected_pub = self.create_publisher(
             Bool, 
             'voice_agent/connected', 
@@ -259,6 +273,63 @@ async def _handle_websocket_message(self, message: str):
                     tts_msg.timestamp = self.get_clock().now().to_msg()
                 self.tts_events_pub.publish(tts_msg)
                 
+            elif message_type == 'AGENT_STATUS':
+                # Handle unified agent status events
+                status_data = data.get('data', {})
+                
+                self.get_logger().info(f"Agent Status: mode={status_data.get('behavioral_mode', 'unknown')}, speech={status_data.get('speech_status', 'unknown')}, emotion={status_data.get('emotion', 'unknown')}")
+                
+                # Publish unified agent status to ROS2 topic
+                status_msg = AgentStatus()
+                status_msg.behavioral_mode = status_data.get('behavioral_mode', 'unknown')
+                status_msg.speech_status = status_data.get('speech_status', 'unknown') 
+                status_msg.emotion = status_data.get('emotion', 'unknown')
+                status_msg.speech_text = status_data.get('speech_text', '')
+                status_msg.previous_emotion = status_data.get('previous_emotion', 'unknown')
+                status_msg.conversation_phase = status_data.get('conversation_phase', '')
+                status_msg.last_tool_used = status_data.get('last_tool_used', '')
+                
+                # Parse timestamp if provided, otherwise use current time
+                timestamp_str = status_data.get('timestamp')
+                if timestamp_str:
+                    try:
+                        dt = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
+                        status_msg.timestamp.sec = int(dt.timestamp())
+                        status_msg.timestamp.nanosec = int((dt.timestamp() % 1) * 1e9)
+                    except:
+                        status_msg.timestamp = self.get_clock().now().to_msg()
+                else:
+                    status_msg.timestamp = self.get_clock().now().to_msg()
+                    
+                self.agent_status_pub.publish(status_msg)
+                
+            elif message_type == 'TOOL_EVENT':
+                # Handle function tool events
+                tool_data = data.get('data', {})
+                
+                self.get_logger().info(f"Tool Event: {tool_data.get('tool_name', 'unknown')} - {tool_data.get('status', 'unknown')}")
+                
+                # Publish tool event to ROS2 topic
+                tool_msg = ToolEvent()
+                tool_msg.tool_name = tool_data.get('tool_name', 'unknown')
+                tool_msg.parameters = tool_data.get('parameters', [])
+                tool_msg.result = tool_data.get('result', '')
+                tool_msg.status = tool_data.get('status', 'unknown')
+                
+                # Parse timestamp if provided, otherwise use current time
+                timestamp_str = tool_data.get('timestamp')
+                if timestamp_str:
+                    try:
+                        dt = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
+                        tool_msg.timestamp.sec = int(dt.timestamp())
+                        tool_msg.timestamp.nanosec = int((dt.timestamp() % 1) * 1e9)
+                    except:
+                        tool_msg.timestamp = self.get_clock().now().to_msg()
+                else:
+                    tool_msg.timestamp = self.get_clock().now().to_msg()
+                    
+                self.tool_event_pub.publish(tool_msg)
+                
             elif message_type == 'ACKNOWLEDGMENT':
                 # Handle acknowledgment messages from voice agent
                 status = data.get('status', 'unknown')
diff --git a/coffee_ws/src/coffee_voice_agent/scripts/agents/coffee_barista_agent.py b/coffee_ws/src/coffee_voice_agent/scripts/agents/coffee_barista_agent.py
@@ -23,7 +23,7 @@
 from state.state_manager import StateManager, AgentState
 from tools.coffee_tools import (
     get_current_time_impl, get_current_date_impl, get_coffee_menu_impl,
-    get_ordering_instructions_impl, recommend_drink_impl
+    get_ordering_instructions_impl, recommend_drink_impl, set_agent_instance
 )
 
 logger = logging.getLogger(__name__)
@@ -68,6 +68,9 @@ def __init__(self):
         # State management
         self.state_manager = StateManager(self)
         
+        # Set agent instance for tool event tracking
+        set_agent_instance(self)
+        
         # Wake word detection setup
         self.porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY")
         self.porcupine = None
diff --git a/coffee_ws/src/coffee_voice_agent/scripts/state/state_manager.py b/coffee_ws/src/coffee_voice_agent/scripts/state/state_manager.py
@@ -40,12 +40,16 @@ def __init__(self, agent=None):
         self.ctx = None
         self.agent = agent
         self.current_emotion = "waiting"  # Track current emotional state
+        self.previous_emotion = ""  # Track previous emotion for smooth transitions
         self.emotion_history = []  # Log emotional journey
         self.ending_conversation = False  # Flag to prevent timer conflicts during goodbye
         
         # Text tracking for TTS events
         self.current_speech_preview = ""  # Preview text for "started" events
         self.current_speech_full_text = ""  # Accumulated full text for "finished" events
+        
+        # Tool tracking
+        self.last_tool_used = ""  # Last function tool called
         self.virtual_request_queue = []  # Queue for virtual coffee requests
         self.announcing_virtual_request = False  # Flag to prevent conflicts during announcements
         self.recent_greetings = []  # Track recent greetings to avoid repetition
@@ -67,6 +71,23 @@ async def transition_to_state(self, new_state: AgentState):
             await self._exit_current_state()
             self.current_state = new_state
             await self._enter_new_state()
+            
+            # Send agent status update for behavioral mode changes
+            behavioral_mode_map = {
+                AgentState.DORMANT: "dormant",
+                AgentState.CONNECTING: "connecting", 
+                AgentState.ACTIVE: "active",
+                AgentState.DISCONNECTING: "disconnecting"
+            }
+            
+            behavioral_mode = behavioral_mode_map.get(new_state, "dormant")
+            
+            # Determine conversation phase for greetings
+            conversation_phase = ""
+            if new_state == AgentState.ACTIVE:
+                conversation_phase = "greeting"
+            
+            await self._send_agent_status(behavioral_mode, "idle", conversation_phase)
 
     async def _exit_current_state(self):
         """Clean up current state"""
@@ -262,23 +283,26 @@ def on_agent_state_changed(event):
                 """Handle agent state changes (initializing/listening/thinking/speaking)"""
                 logger.info(f"🔍 DEBUG: agent_state_changed: {event.old_state} → {event.new_state}")
                 
-                # Send TTS events based on state transitions
+                # Send unified agent status based on state transitions
                 async def handle_state_change():
                     try:
+                        # Map LiveKit agent states to our behavioral modes
+                        current_behavioral_mode = "dormant"  # Default
+                        if self.current_state == AgentState.ACTIVE:
+                            current_behavioral_mode = "active"
+                        elif self.current_state == AgentState.CONNECTING:
+                            current_behavioral_mode = "connecting"
+                        elif self.current_state == AgentState.DISCONNECTING:
+                            current_behavioral_mode = "disconnecting"
+                        
                         if event.new_state == "speaking":
-                            logger.info("🔍 DEBUG: Agent started speaking - sending TTS started event")
-                            current_emotion = self.current_emotion
-                            # Use preview text for started event
-                            text_to_send = self.current_speech_preview or "Agent Response"
-                            await self._send_tts_event("started", text_to_send, current_emotion, "session")
+                            logger.info("🔍 DEBUG: Agent started speaking - sending agent status")
+                            await self._send_agent_status(current_behavioral_mode, "speaking")
                         elif event.old_state == "speaking" and event.new_state != "speaking":
-                            logger.info("🔍 DEBUG: Agent stopped speaking - sending TTS finished event")
-                            current_emotion = self.current_emotion
-                            # Use full accumulated text for finished event
-                            text_to_send = self.current_speech_full_text or "Agent Response"
-                            await self._send_tts_event("finished", text_to_send, current_emotion, "session")
+                            logger.info("🔍 DEBUG: Agent stopped speaking - sending agent status")
+                            await self._send_agent_status(current_behavioral_mode, "idle")
                     except Exception as e:
-                        logger.error(f"Error handling agent state change TTS events: {e}")
+                        logger.error(f"Error handling agent state change status events: {e}")
                 
                 asyncio.create_task(handle_state_change())
                 
@@ -482,6 +506,7 @@ def process_emotional_response(self, llm_response: str) -> tuple[str, str]:
             if emotion != self.current_emotion:
                 logger.info(f"🎭 Emotion transition: {self.current_emotion} → {emotion}")
                 self.log_animated_eyes(emotion)
+                self.previous_emotion = self.current_emotion  # Store previous before updating
                 self.current_emotion = emotion
                 
                 # Store in emotion history
@@ -561,4 +586,54 @@ async def _send_tts_event(self, event: str, text: str, emotion: str, source: str
             }
             await self.agent._send_websocket_event("TTS_EVENT", event_data)
         else:
-            logger.debug(f"Cannot send TTS {event} event - no agent WebSocket connection") 
+            logger.debug(f"Cannot send TTS {event} event - no agent WebSocket connection")
+
+    async def _send_agent_status(self, behavioral_mode: str, speech_status: str, conversation_phase: str = ""):
+        """Send unified agent status through agent's WebSocket connection"""
+        if self.agent and hasattr(self.agent, '_send_websocket_event'):
+            # Determine conversation phase if not provided
+            if not conversation_phase:
+                if self.announcing_virtual_request:
+                    conversation_phase = "announcement"
+                elif behavioral_mode == "active":
+                    conversation_phase = "discussion"
+                # else conversation_phase remains empty for dormant
+            
+            # Get current speech text based on speech status
+            speech_text = ""
+            if speech_status == "speaking":
+                speech_text = self.current_speech_preview or ""
+            elif speech_status == "idle" and self.current_speech_full_text:
+                speech_text = self.current_speech_full_text
+            
+            status_data = {
+                "behavioral_mode": behavioral_mode,
+                "speech_status": speech_status,
+                "emotion": self.current_emotion,
+                "speech_text": speech_text,
+                "previous_emotion": getattr(self, 'previous_emotion', ''),
+                "conversation_phase": conversation_phase,
+                "last_tool_used": self.last_tool_used,
+                "timestamp": datetime.now().isoformat()
+            }
+            await self.agent._send_websocket_event("AGENT_STATUS", status_data)
+        else:
+            logger.debug(f"Cannot send agent status - no agent WebSocket connection")
+
+    async def _send_tool_event(self, tool_name: str, status: str, parameters: list = None, result: str = ""):
+        """Send tool event through agent's WebSocket connection"""
+        if self.agent and hasattr(self.agent, '_send_websocket_event'):
+            # Update last tool used when tool starts
+            if status == "started":
+                self.last_tool_used = tool_name
+            
+            tool_data = {
+                "tool_name": tool_name,
+                "status": status,
+                "parameters": parameters or [],
+                "result": result,
+                "timestamp": datetime.now().isoformat()
+            }
+            await self.agent._send_websocket_event("TOOL_EVENT", tool_data)
+        else:
+            logger.debug(f"Cannot send tool event - no agent WebSocket connection")