1313 WorkerOptions ,
1414 cli ,
1515 metrics ,
16+ inference ,
1617)
1718from livekit .plugins import noise_cancellation , silero
18- from livekit .plugins .turn_detector .multilingual import MultilingualModel
19+ from livekit .plugins .turn_detector .english import EnglishModel
1920
2021logger = logging .getLogger ("agent" )
2122
@@ -50,8 +51,7 @@ def __init__(self) -> None:
5051
5152
5253def prewarm (proc : JobProcess ):
53- pass
54- # proc.userdata["vad"] = silero.VAD.load()
54+ proc .userdata ["vad" ] = silero .VAD .load ()
5555
5656
5757async def entrypoint (ctx : JobContext ):
@@ -65,17 +65,17 @@ async def entrypoint(ctx: JobContext):
6565 session = AgentSession (
6666 # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
6767 # See all available models at https://docs.livekit.io/agents/models/llm/
68- llm = "openai /gpt-4o-mini" ,
68+ llm = "azure /gpt-4o-mini" ,
6969 # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
7070 # See all available models at https://docs.livekit.io/agents/models/stt/
71- stt = "deepgram/nova-3" ,
71+ stt = inference . STT ( language = "en" ) ,
7272 # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
7373 # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
7474 tts = "cartesia/sonic-2:6f84f4b8-58a2-430c-8c79-688dad597532" ,
7575 # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
7676 # See more at https://docs.livekit.io/agents/build/turns
77- # turn_detection=MultilingualModel (),
78- # vad=ctx.proc.userdata["vad"],
77+ turn_detection = EnglishModel (),
78+ vad = ctx .proc .userdata ["vad" ],
7979 # allow the LLM to generate a response while waiting for the end of turn
8080 # See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
8181 preemptive_generation = True ,
0 commit comments