ATR-Lab
diff --git a/‎coffee_ws/src/coffee_voice_agent/scripts/agents/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎coffee_ws/src/coffee_voice_agent/scripts/agents/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎coffee_ws/src/coffee_voice_agent/scripts/agents/simple_coffee_agent.py‎
Lines changed: 136 additions & 0 deletions b/‎coffee_ws/src/coffee_voice_agent/scripts/agents/simple_coffee_agent.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎coffee_ws/src/coffee_voice_agent/scripts/coffee_barista_v2.py‎
Lines changed: 218 additions & 0 deletions b/‎coffee_ws/src/coffee_voice_agent/scripts/coffee_barista_v2.py‎
Lines changed: 218 additions & 0 deletions
diff --git a/‎coffee_ws/src/coffee_voice_agent/scripts/services/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎coffee_ws/src/coffee_voice_agent/scripts/services/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1 @@
+"""Agents for Coffee Voice Agent""" 
@@ -0,0 +1,136 @@
+"""Simple coffee agent with emotion-aware TTS processing"""
+
+import logging
+from livekit.agents import Agent, function_tool
+from services.emotion_service import EmotionStateManager
+
+from config.instructions import BARISTA_INSTRUCTIONS
+from tools.coffee_tools import (
+    get_current_time_impl, get_current_date_impl, get_coffee_menu_impl,
+    get_ordering_instructions_impl, recommend_drink_impl
+)
+
+logger = logging.getLogger(__name__)
+
+
+class SimpleCoffeeAgent(Agent):
+    """Coffee barista agent with emotion-aware TTS processing
+
+    This agent follows the proven pattern from the original implementation:
+    - Standard LiveKit Agent with instructions and function tools (programmatic registration)
+    - Override tts_node to handle emotion:text format processing
+    - Clean separation of concerns with emotion service
+    """
+
+    def __init__(self, emotion_manager=None):
+        # Use provided emotion manager or create a new one
+        self.emotion_manager = emotion_manager or EmotionStateManager()
+        
+        # Initialize with instructions and programmatically registered tools
+        super().__init__(
+            instructions=BARISTA_INSTRUCTIONS,
+            tools=[
+                function_tool(
+                    get_current_time_impl,
+                    name="get_current_time",
+                    description="Get the current time."
+                ),
+                function_tool(
+                    get_current_date_impl,
+                    name="get_current_date",
+                    description="Get today's date."
+                ),
+                function_tool(
+                    get_coffee_menu_impl,
+                    name="get_coffee_menu",
+                    description="Get the Sui Hub coffee menu."
+                ),
+                function_tool(
+                    get_ordering_instructions_impl,
+                    name="get_ordering_instructions",
+                    description="Get instructions on how to order coffee through the Slush wallet and Coffee Hub website."
+                ),
+                function_tool(
+                    recommend_drink_impl,
+                    name="recommend_drink",
+                    description="Recommend a drink based on user preference."
+                ),
+            ]
+        )
+        
+        logger.info("SimpleCoffeeAgent initialized with emotion-aware TTS processing and 5 programmatically registered tools")
+
+    async def tts_node(self, text, model_settings=None):
+        """Override TTS node to process emotion:text format (same pattern as original)"""
+        
+        # Process text stream with minimal buffering for emotion extraction
+        async def process_text_stream():
+            first_chunk_buffer = ""
+            emotion_extracted = False
+            emotion_check_limit = 50  # Only check first 50 characters for emotion delimiter
+            chunks_processed = 0
+            
+            async for text_chunk in text:
+                if not text_chunk:
+                    continue
+
+                chunks_processed += 1
+                
+                # Only buffer and check for emotion in the very first chunk(s)
+                if not emotion_extracted and len(first_chunk_buffer) < emotion_check_limit:
+                    first_chunk_buffer += text_chunk
+                    
+                    # Check if we have delimiter in the buffered portion
+                    if ":" in first_chunk_buffer:
+                        logger.info("🔍 Found delimiter in first chunk(s)! Extracting emotion...")
+                        
+                        # Process emotion using our emotion service
+                        emotion, text_after_delimiter = self.emotion_manager.process_emotional_response(first_chunk_buffer)
+                        
+                        logger.info(f"🎭 Agent speaking with emotion: {emotion}")
+                        
+                        # Mark emotion as extracted
+                        emotion_extracted = True
+                        
+                        # Immediately yield the text part (no more buffering)
+                        if text_after_delimiter.strip():
+                            logger.info(f"💬 TTS streaming text immediately: {text_after_delimiter[:30]}{'...' if len(text_after_delimiter) > 30 else ''}")
+                            yield text_after_delimiter
+                        
+                    elif len(first_chunk_buffer) >= emotion_check_limit:
+                        # Reached limit without finding delimiter - give up and stream everything
+                        logger.info("🔍 No delimiter found within limit, streaming everything with default emotion")
+                        
+                        # Process with default emotion
+                        emotion, processed_text = self.emotion_manager.process_emotional_response(first_chunk_buffer)
+                        
+                        emotion_extracted = True
+                        
+                        # Yield the processed content immediately
+                        logger.info(f"💬 TTS fallback streaming: {processed_text[:30]}{'...' if len(processed_text) > 30 else ''}")
+                        yield processed_text
+                    
+                    # If we haven't extracted emotion yet and haven't hit limit, continue buffering
+                    # (don't yield anything yet)
+                    
+                else:
+                    # Either emotion already extracted, or we're past the check limit
+                    # Stream everything immediately
+                    yield text_chunk
+        
+        # Process the text stream and pass clean text to default TTS
+        processed_text = process_text_stream()
+        
+        # Use default TTS implementation with processed text
+        async for audio_frame in Agent.default.tts_node(self, processed_text, model_settings):
+            yield audio_frame
+
+    # Function tools are automatically discovered by LiveKit from the imported functions
+    # No need to manually register them - the @function_tool decorators handle this
+
+    def get_emotion_manager(self):
+        """Get the emotion manager for external access"""
+        return self.emotion_manager
+
+    def __repr__(self):
+        return f"SimpleCoffeeAgent(tools=5, emotion={self.emotion_manager.get_current_emotion()}, registration=programmatic)" 
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+"""Coffee Barista Voice Agent v2 - Clean modular implementation
+
+This is a refactored version of the original livekit_voice_agent.py that:
+- Uses extracted services for wake word detection and order notifications  
+- Uses a simple agent with function tools
+- Uses emotion-aware TTS processing
+- Maintains the same functionality with cleaner architecture
+"""
+
+import asyncio
+import logging
+import os
+from datetime import datetime
+
+from livekit import agents
+from livekit.agents import AgentSession, JobContext, WorkerOptions
+from livekit.plugins import openai, silero
+
+# Import our extracted components
+from config.settings import REQUIRED_ENV_VARS, WEBSOCKET_HOST, WEBSOCKET_PORT
+from config.instructions import BARISTA_INSTRUCTIONS
+from agents.simple_coffee_agent import SimpleCoffeeAgent
+from services.emotion_service import EmotionStateManager
+from services.wake_word_service import WakeWordService
+from services.order_service import OrderNotificationService
+from utils.greeting_data import get_random_greeting
+from utils.announcement_data import format_virtual_request_announcement
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class CoffeeBaristaV2:
+    """Clean coffee barista implementation using extracted services
+    
+    This version composes the extracted services to provide the same functionality
+    as the original but with much cleaner architecture and separation of concerns.
+    """
+    
+    def __init__(self):
+        # Core components - emotion manager is handled by the agent now
+        self.emotion_manager = EmotionStateManager()
+        self.agent = SimpleCoffeeAgent(emotion_manager=self.emotion_manager)
+        
+        # I/O Services
+        self.wake_word_service = WakeWordService(on_wake_word_detected=self.on_wake_word_detected)
+        self.order_service = OrderNotificationService(on_order_received=self.on_order_received)
+        
+        # Session management
+        self.current_session = None
+        self.room = None
+        
+        logger.info("CoffeeBaristaV2 initialized with modular architecture")
+    
+    async def start(self, ctx: JobContext):
+        """Start the coffee barista with all services"""
+        # Connect to the room
+        await ctx.connect()
+        self.room = ctx.room
+        logger.info(f"Connected to room: {ctx.room.name}")
+        
+        # Start I/O services
+        await self.order_service.start()
+        wake_word_started = await self.wake_word_service.start(ctx.room)
+        
+        # Handle wake word vs always-on mode
+        if not wake_word_started:
+            logger.info("🔍 Starting in always-on mode (no wake word detection)")
+            await self.start_conversation()
+        else:
+            logger.info("Started in wake word mode - say 'hey barista' to activate")
+    
+    async def on_wake_word_detected(self, room):
+        """Handle wake word detection - start a conversation"""
+        logger.info("🔍 Wake word detected - starting conversation")
+        await self.start_conversation()
+    
+    async def start_conversation(self):
+        """Start a conversation session with emotion-aware TTS"""
+        try:
+            # Create session with standard TTS (emotion processing happens in agent.tts_node)
+            self.current_session = AgentSession(
+                stt=openai.STT(model="whisper-1"),
+                llm=openai.LLM(
+                    model="gpt-4o-mini",
+                    temperature=float(os.getenv("VOICE_AGENT_TEMPERATURE", "0.7"))
+                ),
+                tts=openai.TTS(
+                    model="tts-1",
+                    voice=os.getenv("VOICE_AGENT_VOICE", "nova")
+                ),
+                vad=silero.VAD.load(),
+            )
+            
+            # Start the session with our simple agent
+            await self.current_session.start(
+                room=self.room,
+                agent=self.agent
+            )
+            
+            # Pause wake word detection during conversation
+            if self.wake_word_service.is_active():
+                self.wake_word_service.pause()
+            
+            # Start with a random greeting
+            greeting = get_random_greeting()
+            emotion, text = self.emotion_manager.process_emotional_response(greeting)
+            
+            # Use the session to say the greeting
+            await self.current_session.say(text)
+            
+            logger.info("🎉 Conversation started successfully")
+            
+        except Exception as e:
+            logger.error(f"Error starting conversation: {e}")
+            await self.end_conversation()
+    
+    async def on_order_received(self, order_info):
+        """Handle order notifications from WebSocket"""
+        try:
+            # Format the order announcement using our utility
+            announcement = format_virtual_request_announcement({
+                "type": order_info["type"],
+                "content": order_info["content"]
+            })
+            
+            # Process emotion and announce if we have an active session
+            if self.current_session:
+                emotion, text = self.emotion_manager.process_emotional_response(announcement)
+                await self.current_session.say(text)
+                logger.info(f"📢 Announced order: {order_info['coffee_type']}")
+            else:
+                logger.info(f"📋 Order received but no active session: {order_info['coffee_type']}")
+                
+        except Exception as e:
+            logger.error(f"Error processing order notification: {e}")
+    
+    async def end_conversation(self):
+        """End the current conversation and return to dormant state"""
+        if self.current_session:
+            try:
+                await self.current_session.aclose()
+            except Exception as e:
+                logger.error(f"Error closing session: {e}")
+            finally:
+                self.current_session = None
+        
+        # Resume wake word detection
+        if self.wake_word_service.is_active():
+            self.wake_word_service.resume()
+        
+        # Reset emotion state for next conversation
+        self.emotion_manager.reset_emotion_state()
+        
+        logger.info("🔍 Conversation ended - returned to dormant state")
+    
+    def stop(self):
+        """Stop all services and clean up resources"""
+        logger.info("🛑 Stopping Coffee Barista v2...")
+        
+        # Stop I/O services
+        self.wake_word_service.stop()
+        self.order_service.stop()
+        
+        # Close any active session
+        if self.current_session:
+            # Note: In a real implementation, this would need proper async cleanup
+            logger.info("Closing active session...")
+        
+        logger.info("✅ Coffee Barista v2 stopped")
+    
+    def __repr__(self):
+        return f"CoffeeBaristaV2(session_active={self.current_session is not None}, emotion={self.emotion_manager.get_current_emotion()})"
+
+
+async def entrypoint(ctx: JobContext):
+    """Main entrypoint for the coffee barista agent v2"""
+    barista = CoffeeBaristaV2()
+    await barista.start(ctx)
+
+
+def main():
+    """Main function with environment validation and startup"""
+    # Validate required environment variables
+    missing_vars = [var for var in REQUIRED_ENV_VARS if not os.getenv(var)]
+    
+    if missing_vars:
+        logger.error(f"Missing required environment variables: {missing_vars}")
+        logger.error("Please check your .env file and ensure OPENAI_API_KEY is set.")
+        exit(1)
+    
+    # Log configuration
+    logger.info("☕ Starting Coffee Barista Voice Agent v2...")
+    logger.info(f"Wake Word Detection: {'✅ Enabled' if os.getenv('PORCUPINE_ACCESS_KEY') else '❌ Disabled (always-on mode)'}")
+    logger.info(f"WebSocket Server: ✅ Enabled on {WEBSOCKET_HOST}:{WEBSOCKET_PORT}")
+    logger.info(f"OpenAI Model: gpt-4o-mini")
+    logger.info(f"Voice: {os.getenv('VOICE_AGENT_VOICE', 'nova')}")
+    logger.info(f"Temperature: {os.getenv('VOICE_AGENT_TEMPERATURE', '0.7')}")
+    logger.info(f"Architecture: 🏗️ Modular (extracted services)")
+    
+    logger.info("\n📋 Available CLI modes:")
+    logger.info("  python coffee_barista_v2.py console  - Terminal mode (local testing)")
+    logger.info("  python coffee_barista_v2.py dev      - Development mode (connect to LiveKit)")
+    logger.info("  python coffee_barista_v2.py start    - Production mode")
+    
+    # Run the agent
+    agents.cli.run_app(
+        WorkerOptions(
+            entrypoint_fnc=entrypoint,
+            agent_name="coffee-barista-v2"
+        )
+    )
+
+
+if __name__ == "__main__":
+    main() 
@@ -0,0 +1 @@
+"""Services for Coffee Voice Agent"""