1+ #!/usr/bin/env python3
2+ """Coffee Barista Voice Agent v2 - Clean modular implementation
3+
4+ This is a refactored version of the original livekit_voice_agent.py that:
5+ - Uses extracted services for wake word detection and order notifications
6+ - Uses a simple agent with function tools
7+ - Uses emotion-aware TTS processing
8+ - Maintains the same functionality with cleaner architecture
9+ """
10+
11+ import asyncio
12+ import logging
13+ import os
14+ from datetime import datetime
15+
16+ from livekit import agents
17+ from livekit .agents import AgentSession , JobContext , WorkerOptions
18+ from livekit .plugins import openai , silero
19+
20+ # Import our extracted components
21+ from config .settings import REQUIRED_ENV_VARS , WEBSOCKET_HOST , WEBSOCKET_PORT
22+ from config .instructions import BARISTA_INSTRUCTIONS
23+ from agents .simple_coffee_agent import SimpleCoffeeAgent
24+ from services .emotion_service import EmotionStateManager
25+ from services .wake_word_service import WakeWordService
26+ from services .order_service import OrderNotificationService
27+ from utils .greeting_data import get_random_greeting
28+ from utils .announcement_data import format_virtual_request_announcement
29+
30+ # Configure logging
31+ logging .basicConfig (level = logging .INFO )
32+ logger = logging .getLogger (__name__ )
33+
34+
35+ class CoffeeBaristaV2 :
36+ """Clean coffee barista implementation using extracted services
37+
38+ This version composes the extracted services to provide the same functionality
39+ as the original but with much cleaner architecture and separation of concerns.
40+ """
41+
42+ def __init__ (self ):
43+ # Core components - emotion manager is handled by the agent now
44+ self .emotion_manager = EmotionStateManager ()
45+ self .agent = SimpleCoffeeAgent (emotion_manager = self .emotion_manager )
46+
47+ # I/O Services
48+ self .wake_word_service = WakeWordService (on_wake_word_detected = self .on_wake_word_detected )
49+ self .order_service = OrderNotificationService (on_order_received = self .on_order_received )
50+
51+ # Session management
52+ self .current_session = None
53+ self .room = None
54+
55+ logger .info ("CoffeeBaristaV2 initialized with modular architecture" )
56+
57+ async def start (self , ctx : JobContext ):
58+ """Start the coffee barista with all services"""
59+ # Connect to the room
60+ await ctx .connect ()
61+ self .room = ctx .room
62+ logger .info (f"Connected to room: { ctx .room .name } " )
63+
64+ # Start I/O services
65+ await self .order_service .start ()
66+ wake_word_started = await self .wake_word_service .start (ctx .room )
67+
68+ # Handle wake word vs always-on mode
69+ if not wake_word_started :
70+ logger .info ("🔍 Starting in always-on mode (no wake word detection)" )
71+ await self .start_conversation ()
72+ else :
73+ logger .info ("Started in wake word mode - say 'hey barista' to activate" )
74+
75+ async def on_wake_word_detected (self , room ):
76+ """Handle wake word detection - start a conversation"""
77+ logger .info ("🔍 Wake word detected - starting conversation" )
78+ await self .start_conversation ()
79+
80+ async def start_conversation (self ):
81+ """Start a conversation session with emotion-aware TTS"""
82+ try :
83+ # Create session with standard TTS (emotion processing happens in agent.tts_node)
84+ self .current_session = AgentSession (
85+ stt = openai .STT (model = "whisper-1" ),
86+ llm = openai .LLM (
87+ model = "gpt-4o-mini" ,
88+ temperature = float (os .getenv ("VOICE_AGENT_TEMPERATURE" , "0.7" ))
89+ ),
90+ tts = openai .TTS (
91+ model = "tts-1" ,
92+ voice = os .getenv ("VOICE_AGENT_VOICE" , "nova" )
93+ ),
94+ vad = silero .VAD .load (),
95+ )
96+
97+ # Start the session with our simple agent
98+ await self .current_session .start (
99+ room = self .room ,
100+ agent = self .agent
101+ )
102+
103+ # Pause wake word detection during conversation
104+ if self .wake_word_service .is_active ():
105+ self .wake_word_service .pause ()
106+
107+ # Start with a random greeting
108+ greeting = get_random_greeting ()
109+ emotion , text = self .emotion_manager .process_emotional_response (greeting )
110+
111+ # Use the session to say the greeting
112+ await self .current_session .say (text )
113+
114+ logger .info ("🎉 Conversation started successfully" )
115+
116+ except Exception as e :
117+ logger .error (f"Error starting conversation: { e } " )
118+ await self .end_conversation ()
119+
120+ async def on_order_received (self , order_info ):
121+ """Handle order notifications from WebSocket"""
122+ try :
123+ # Format the order announcement using our utility
124+ announcement = format_virtual_request_announcement ({
125+ "type" : order_info ["type" ],
126+ "content" : order_info ["content" ]
127+ })
128+
129+ # Process emotion and announce if we have an active session
130+ if self .current_session :
131+ emotion , text = self .emotion_manager .process_emotional_response (announcement )
132+ await self .current_session .say (text )
133+ logger .info (f"📢 Announced order: { order_info ['coffee_type' ]} " )
134+ else :
135+ logger .info (f"📋 Order received but no active session: { order_info ['coffee_type' ]} " )
136+
137+ except Exception as e :
138+ logger .error (f"Error processing order notification: { e } " )
139+
140+ async def end_conversation (self ):
141+ """End the current conversation and return to dormant state"""
142+ if self .current_session :
143+ try :
144+ await self .current_session .aclose ()
145+ except Exception as e :
146+ logger .error (f"Error closing session: { e } " )
147+ finally :
148+ self .current_session = None
149+
150+ # Resume wake word detection
151+ if self .wake_word_service .is_active ():
152+ self .wake_word_service .resume ()
153+
154+ # Reset emotion state for next conversation
155+ self .emotion_manager .reset_emotion_state ()
156+
157+ logger .info ("🔍 Conversation ended - returned to dormant state" )
158+
159+ def stop (self ):
160+ """Stop all services and clean up resources"""
161+ logger .info ("🛑 Stopping Coffee Barista v2..." )
162+
163+ # Stop I/O services
164+ self .wake_word_service .stop ()
165+ self .order_service .stop ()
166+
167+ # Close any active session
168+ if self .current_session :
169+ # Note: In a real implementation, this would need proper async cleanup
170+ logger .info ("Closing active session..." )
171+
172+ logger .info ("✅ Coffee Barista v2 stopped" )
173+
174+ def __repr__ (self ):
175+ return f"CoffeeBaristaV2(session_active={ self .current_session is not None } , emotion={ self .emotion_manager .get_current_emotion ()} )"
176+
177+
178+ async def entrypoint (ctx : JobContext ):
179+ """Main entrypoint for the coffee barista agent v2"""
180+ barista = CoffeeBaristaV2 ()
181+ await barista .start (ctx )
182+
183+
184+ def main ():
185+ """Main function with environment validation and startup"""
186+ # Validate required environment variables
187+ missing_vars = [var for var in REQUIRED_ENV_VARS if not os .getenv (var )]
188+
189+ if missing_vars :
190+ logger .error (f"Missing required environment variables: { missing_vars } " )
191+ logger .error ("Please check your .env file and ensure OPENAI_API_KEY is set." )
192+ exit (1 )
193+
194+ # Log configuration
195+ logger .info ("☕ Starting Coffee Barista Voice Agent v2..." )
196+ logger .info (f"Wake Word Detection: { '✅ Enabled' if os .getenv ('PORCUPINE_ACCESS_KEY' ) else '❌ Disabled (always-on mode)' } " )
197+ logger .info (f"WebSocket Server: ✅ Enabled on { WEBSOCKET_HOST } :{ WEBSOCKET_PORT } " )
198+ logger .info (f"OpenAI Model: gpt-4o-mini" )
199+ logger .info (f"Voice: { os .getenv ('VOICE_AGENT_VOICE' , 'nova' )} " )
200+ logger .info (f"Temperature: { os .getenv ('VOICE_AGENT_TEMPERATURE' , '0.7' )} " )
201+ logger .info (f"Architecture: 🏗️ Modular (extracted services)" )
202+
203+ logger .info ("\n 📋 Available CLI modes:" )
204+ logger .info (" python coffee_barista_v2.py console - Terminal mode (local testing)" )
205+ logger .info (" python coffee_barista_v2.py dev - Development mode (connect to LiveKit)" )
206+ logger .info (" python coffee_barista_v2.py start - Production mode" )
207+
208+ # Run the agent
209+ agents .cli .run_app (
210+ WorkerOptions (
211+ entrypoint_fnc = entrypoint ,
212+ agent_name = "coffee-barista-v2"
213+ )
214+ )
215+
216+
217+ if __name__ == "__main__" :
218+ main ()
0 commit comments