|
23 | 23 | class Assistant(Agent): |
24 | 24 | def __init__(self) -> None: |
25 | 25 | super().__init__( |
26 | | - instructions="""You are a helpful voice AI assistant. |
| 26 | + instructions="""You are a helpful voice AI assistant. The user is interacting with you via voice, even if you perceive the conversation as text. |
27 | 27 | You eagerly assist users with their questions by providing information from your extensive knowledge. |
28 | 28 | Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols. |
29 | 29 | You are curious, friendly, and have a sense of humor.""", |
@@ -62,13 +62,13 @@ async def entrypoint(ctx: JobContext): |
62 | 62 | session = AgentSession( |
63 | 63 | # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand |
64 | 64 | # See all available models at https://docs.livekit.io/agents/models/stt/ |
65 | | - stt="assemblyai/universal-streaming", |
| 65 | + stt="assemblyai/universal-streaming:en", |
66 | 66 | # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response |
67 | 67 | # See all available models at https://docs.livekit.io/agents/models/llm/ |
68 | | - llm="azure/gpt-4o-mini", |
| 68 | + llm="openai/gpt-4.1-mini", |
69 | 69 | # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear |
70 | 70 | # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/ |
71 | | - tts="cartesia/sonic-2:f786b574-daa5-4673-aa0c-cbe3e8534c02", |
| 71 | + tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc", |
72 | 72 | # VAD and turn detection are used to determine when the user is speaking and when the agent should respond |
73 | 73 | # See more at https://docs.livekit.io/agents/build/turns |
74 | 74 | turn_detection=MultilingualModel(), |
|
0 commit comments