Skip to content

Commit 6e91e7a

Browse files
bcherryrektdeckard
authored andcommitted
working
1 parent 5da0276 commit 6e91e7a

File tree

2 files changed

+8
-11
lines changed

2 files changed

+8
-11
lines changed

pyproject.toml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,11 @@ description = "Simple voice AI assistant built with LiveKit Agents for Python"
99
requires-python = ">=3.9"
1010

1111
dependencies = [
12-
"livekit-agents[turn-detector,silero]~=1.2",
12+
"livekit-agents[turn-detector,silero]~=1.2.11",
1313
"livekit-plugins-noise-cancellation~=0.2",
1414
"python-dotenv",
1515
]
1616

17-
[tool.uv.sources]
18-
livekit-agents = { git = "https://github.com/livekit/agents.git", branch = "longc/cloud-inference-draft", subdirectory = "livekit-agents"}
19-
2017
[dependency-groups]
2118
dev = [
2219
"pytest",

src/agent.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
WorkerOptions,
1414
cli,
1515
metrics,
16+
inference,
1617
)
1718
from livekit.plugins import noise_cancellation, silero
18-
from livekit.plugins.turn_detector.multilingual import MultilingualModel
19+
from livekit.plugins.turn_detector.english import EnglishModel
1920

2021
logger = logging.getLogger("agent")
2122

@@ -50,8 +51,7 @@ def __init__(self) -> None:
5051

5152

5253
def prewarm(proc: JobProcess):
53-
pass
54-
# proc.userdata["vad"] = silero.VAD.load()
54+
proc.userdata["vad"] = silero.VAD.load()
5555

5656

5757
async def entrypoint(ctx: JobContext):
@@ -65,17 +65,17 @@ async def entrypoint(ctx: JobContext):
6565
session = AgentSession(
6666
# A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
6767
# See all available models at https://docs.livekit.io/agents/models/llm/
68-
llm="openai/gpt-4o-mini",
68+
llm="azure/gpt-4o-mini",
6969
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
7070
# See all available models at https://docs.livekit.io/agents/models/stt/
71-
stt="deepgram/nova-3",
71+
stt=inference.STT(language="en"),
7272
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
7373
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
7474
tts="cartesia/sonic-2:6f84f4b8-58a2-430c-8c79-688dad597532",
7575
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
7676
# See more at https://docs.livekit.io/agents/build/turns
77-
# turn_detection=MultilingualModel(),
78-
# vad=ctx.proc.userdata["vad"],
77+
turn_detection=EnglishModel(),
78+
vad=ctx.proc.userdata["vad"],
7979
# allow the LLM to generate a response while waiting for the end of turn
8080
# See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
8181
preemptive_generation=True,

0 commit comments

Comments
 (0)