Skip to content

Commit 20facf4

Browse files
Comprehensive example and bug fixes with google plugins (#646)
Co-authored-by: Shubhra <shubhrakanti@berkeley.edu>
1 parent ab83b45 commit 20facf4

File tree

7 files changed

+235
-8
lines changed

7 files changed

+235
-8
lines changed

.changeset/cyan-baths-wait.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
'@livekit/agents-plugin-elevenlabs': patch
3+
'@livekit/agents-plugin-neuphonic': patch
4+
'@livekit/agents-plugin-cartesia': patch
5+
'@livekit/agents-plugin-deepgram': patch
6+
'@livekit/agents-plugin-resemble': patch
7+
'@livekit/agents-plugin-livekit': patch
8+
'@livekit/agents-plugin-google': patch
9+
'@livekit/agents-plugin-openai': patch
10+
'@livekit/agents-plugin-silero': patch
11+
'@livekit/agents-plugins-test': patch
12+
'@livekit/agents': patch
13+
---
14+
15+
fix google LLM and gemini realtime

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ Currently, only the following plugins are supported:
6767
| [@livekit/agents-plugin-openai](https://www.npmjs.com/package/@livekit/agents-plugin-openai) | LLM, TTS, STT |
6868
| [@livekit/agents-plugin-google](https://www.npmjs.com/package/@livekit/agents-plugin-google) | LLM, TTS |
6969
| [@livekit/agents-plugin-deepgram](https://www.npmjs.com/package/@livekit/agents-plugin-deepgram) | STT |
70-
| [@livekit/agents-plugin-assemblyai](https://www.npmjs.com/package/@livekit/agents-plugin-assemblyai) | STT |
7170
| [@livekit/agents-plugin-elevenlabs](https://www.npmjs.com/package/@livekit/agents-plugin-elevenlabs) | TTS |
7271
| [@livekit/agents-plugin-cartesia](https://www.npmjs.com/package/@livekit/agents-plugin-cartesia) | TTS |
7372
| [@livekit/agents-plugin-neuphonic](https://www.npmjs.com/package/@livekit/agents-plugin-neuphonic) | TTS |

examples/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
},
2020
"dependencies": {
2121
"@livekit/agents": "workspace:*",
22+
"@livekit/agents-plugin-resemble": "workspace:*",
2223
"@livekit/agents-plugin-deepgram": "workspace:*",
2324
"@livekit/agents-plugin-elevenlabs": "workspace:*",
2425
"@livekit/agents-plugin-google": "workspace:*",
2526
"@livekit/agents-plugin-livekit": "workspace:*",
2627
"@livekit/agents-plugin-openai": "workspace:*",
2728
"@livekit/agents-plugin-silero": "workspace:*",
2829
"@livekit/agents-plugin-cartesia": "workspace:*",
30+
"@livekit/agents-plugin-neuphonic": "workspace:*",
2931
"@livekit/noise-cancellation-node": "^0.1.9",
3032
"@livekit/rtc-node": "^0.13.11",
3133
"livekit-server-sdk": "^2.9.2",

examples/src/comprehensive_test.ts

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
import {
5+
type JobContext,
6+
type JobProcess,
7+
WorkerOptions,
8+
cli,
9+
defineAgent,
10+
llm,
11+
metrics,
12+
voice,
13+
} from '@livekit/agents';
14+
import * as cartesia from '@livekit/agents-plugin-cartesia';
15+
import * as deepgram from '@livekit/agents-plugin-deepgram';
16+
import * as elevenlabs from '@livekit/agents-plugin-elevenlabs';
17+
import * as google from '@livekit/agents-plugin-google';
18+
import * as livekit from '@livekit/agents-plugin-livekit';
19+
import * as neuphonic from '@livekit/agents-plugin-neuphonic';
20+
import * as openai from '@livekit/agents-plugin-openai';
21+
import * as resemble from '@livekit/agents-plugin-resemble';
22+
import * as silero from '@livekit/agents-plugin-silero';
23+
import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node';
24+
import { fileURLToPath } from 'node:url';
25+
import { z } from 'zod';
26+
27+
const sttOptions = {
28+
deepgram: () => new deepgram.STT(),
29+
};
30+
31+
const ttsOptions = {
32+
cartesia: () => new cartesia.TTS(),
33+
elevenlabs: () => new elevenlabs.TTS(),
34+
openai: () => new openai.TTS(),
35+
gemini: () => new google.beta.TTS(),
36+
neuphonic: () => new neuphonic.TTS(),
37+
resemble: () => new resemble.TTS(),
38+
};
39+
40+
const eouOptions = {
41+
english: () => new livekit.turnDetector.EnglishModel(),
42+
multilingual: () => new livekit.turnDetector.MultilingualModel(),
43+
};
44+
45+
const llmOptions = {
46+
openai: () => new openai.LLM(),
47+
gemini: () => new google.LLM(),
48+
};
49+
50+
const realtimeLlmOptions = {
51+
openai: () => new openai.realtime.RealtimeModel(),
52+
gemini: () => new google.beta.realtime.RealtimeModel(),
53+
};
54+
55+
type UserData = {
56+
testedSttChoices: Set<string>;
57+
testedTtsChoices: Set<string>;
58+
testedEouChoices: Set<string>;
59+
testedLlmChoices: Set<string>;
60+
testedRealtimeLlmChoices: Set<string>;
61+
};
62+
63+
class TestAgent extends voice.Agent<UserData> {
64+
private readonly sttChoice: keyof typeof sttOptions;
65+
private readonly ttsChoice: keyof typeof ttsOptions;
66+
private readonly eouChoice: keyof typeof eouOptions;
67+
private readonly llmChoice: keyof typeof llmOptions;
68+
private readonly realtimeLlmChoice?: keyof typeof realtimeLlmOptions;
69+
70+
constructor({
71+
sttChoice,
72+
ttsChoice,
73+
eouChoice,
74+
llmChoice,
75+
realtimeLlmChoice,
76+
}: {
77+
sttChoice: keyof typeof sttOptions;
78+
ttsChoice: keyof typeof ttsOptions;
79+
eouChoice: keyof typeof eouOptions;
80+
llmChoice: keyof typeof llmOptions;
81+
realtimeLlmChoice?: keyof typeof realtimeLlmOptions;
82+
}) {
83+
const stt = sttOptions[sttChoice]();
84+
const tts = ttsOptions[ttsChoice]();
85+
const eou = eouOptions[eouChoice]();
86+
const model = llmOptions[llmChoice]();
87+
const realtimeModel = realtimeLlmChoice ? realtimeLlmOptions[realtimeLlmChoice]() : undefined;
88+
89+
const modelName = realtimeModel ? `${realtimeLlmChoice} realtime` : llmChoice;
90+
91+
super({
92+
instructions: `You are a test voice-based agent, you can hear the user's message and respond to it. User is testing your hearing & speaking abilities.
93+
You are using ${sttChoice} STT, ${ttsChoice} TTS, ${eouChoice} EOU, ${modelName} LLM.
94+
You can use the following tools to test your abilities:
95+
- testTool: Testing agent's tool calling ability
96+
- nextAgent: Called when user confirm current agent is working and want to proceed to next agent`,
97+
stt: stt,
98+
tts: tts,
99+
llm: realtimeModel ?? model,
100+
turnDetection: eou,
101+
tools: {
102+
testTool: llm.tool({
103+
description: "Testing agent's tool calling ability",
104+
parameters: z
105+
.object({
106+
randomString: z.string().describe('A random string'),
107+
})
108+
.describe('Test parameter'),
109+
execute: async (input) => {
110+
return {
111+
result: 'Tool been called with input: ' + JSON.stringify(input),
112+
};
113+
},
114+
}),
115+
nextAgent: llm.tool({
116+
description:
117+
'Called when user confirm current agent is working and want to proceed to next agent',
118+
execute: async () => {
119+
return llm.handoff({
120+
agent: new TestAgent({
121+
sttChoice: sttChoice,
122+
ttsChoice: ttsChoice,
123+
eouChoice: eouChoice,
124+
llmChoice: llmChoice,
125+
realtimeLlmChoice: realtimeLlmChoice,
126+
}),
127+
returns: 'Transfer to next agent',
128+
});
129+
},
130+
}),
131+
},
132+
});
133+
134+
this.sttChoice = sttChoice;
135+
this.ttsChoice = ttsChoice;
136+
this.eouChoice = eouChoice;
137+
this.llmChoice = llmChoice;
138+
this.realtimeLlmChoice = realtimeLlmChoice;
139+
}
140+
141+
async onEnter(): Promise<void> {
142+
if (this.llm instanceof llm.RealtimeModel) {
143+
this.session.generateReply({
144+
userInput: `Tell user that you are voice agent with ${this.sttChoice} STT, ${this.ttsChoice} TTS, ${this.eouChoice} EOU, ${this.llmChoice} LLM`,
145+
});
146+
} else {
147+
this.session.say(
148+
`Hi, I'm a voice agent with ${this.sttChoice} STT, ${this.ttsChoice} TTS, ${this.eouChoice} EOU, ${this.llmChoice} LLM. I'm ready to test your hearing & speaking abilities.`,
149+
);
150+
}
151+
}
152+
}
153+
154+
export default defineAgent({
155+
prewarm: async (proc: JobProcess) => {
156+
proc.userData.vad = await silero.VAD.load();
157+
},
158+
entry: async (ctx: JobContext) => {
159+
await ctx.connect();
160+
161+
const vad = ctx.proc.userData.vad! as silero.VAD;
162+
const session = new voice.AgentSession({
163+
vad,
164+
userData: {
165+
testedSttChoices: new Set(),
166+
testedTtsChoices: new Set(),
167+
testedEouChoices: new Set(),
168+
testedLlmChoices: new Set(),
169+
testedRealtimeLlmChoices: new Set(),
170+
},
171+
});
172+
const usageCollector = new metrics.UsageCollector();
173+
174+
session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
175+
metrics.logMetrics(ev.metrics);
176+
usageCollector.collect(ev.metrics);
177+
});
178+
179+
await session.start({
180+
agent: new TestAgent({
181+
sttChoice: 'deepgram',
182+
ttsChoice: 'cartesia',
183+
eouChoice: 'multilingual',
184+
llmChoice: 'gemini',
185+
}),
186+
187+
room: ctx.room,
188+
inputOptions: {
189+
noiseCancellation: BackgroundVoiceCancellation(),
190+
},
191+
});
192+
},
193+
});
194+
195+
cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));

plugins/google/src/llm.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,10 @@ export class LLMStream extends llm.LLMStream {
323323
}));
324324

325325
const functionDeclarations = this.toolCtx ? toFunctionDeclarations(this.toolCtx) : undefined;
326-
const tools = functionDeclarations ? [{ functionDeclarations }] : undefined;
326+
const tools =
327+
functionDeclarations && functionDeclarations.length > 0
328+
? [{ functionDeclarations }]
329+
: undefined;
327330

328331
let systemInstruction: types.Content | undefined = undefined;
329332
if (extraData.systemMessages && extraData.systemMessages.length > 0) {

plugins/openai/src/realtime/realtime_model.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -371,15 +371,15 @@ export class RealtimeSession extends llm.RealtimeSession {
371371
private pushedDurationMs: number = 0;
372372

373373
#logger = log();
374-
#task: Promise<void>;
374+
#task: Task<void>;
375375
#closed = false;
376376

377377
constructor(realtimeModel: RealtimeModel) {
378378
super(realtimeModel);
379379

380380
this.oaiRealtimeModel = realtimeModel;
381381

382-
this.#task = this.#mainTask();
382+
this.#task = Task.from(({ signal }) => this.#mainTask(signal));
383383

384384
this.sendEvent(this.createSessionUpdateEvent());
385385
}
@@ -727,7 +727,7 @@ export class RealtimeSession extends llm.RealtimeSession {
727727
});
728728
}
729729

730-
async #mainTask(): Promise<void> {
730+
async #mainTask(signal: AbortSignal): Promise<void> {
731731
let reconnecting = false;
732732
let numRetries = 0;
733733
let wsConn: WebSocket | null = null;
@@ -780,16 +780,20 @@ export class RealtimeSession extends llm.RealtimeSession {
780780
};
781781

782782
reconnecting = false;
783-
while (!this.#closed) {
783+
while (!this.#closed && !signal.aborted) {
784784
this.#logger.debug('Creating WebSocket connection to OpenAI Realtime API');
785785
wsConn = await this.createWsConn();
786+
if (signal.aborted) break;
786787

787788
try {
788789
if (reconnecting) {
789790
await reconnect();
791+
if (signal.aborted) break;
790792
numRetries = 0;
791793
}
794+
792795
await this.runWs(wsConn);
796+
if (signal.aborted) break;
793797
} catch (error) {
794798
if (!isAPIError(error)) {
795799
this.emitError({ error: error as Error, recoverable: false });
@@ -836,10 +840,13 @@ export class RealtimeSession extends llm.RealtimeSession {
836840

837841
private async runWs(wsConn: WebSocket): Promise<void> {
838842
const forwardEvents = async (signal: AbortSignal): Promise<void> => {
843+
const abortFuture = new Future<void>();
844+
signal.addEventListener('abort', () => abortFuture.resolve());
845+
839846
while (!this.#closed && wsConn.readyState === WebSocket.OPEN && !signal.aborted) {
840847
try {
841-
const event = await this.messageChannel.get();
842-
if (signal.aborted) {
848+
const event = await Promise.race([this.messageChannel.get(), abortFuture.await]);
849+
if (signal.aborted || abortFuture.done || event === undefined) {
843850
break;
844851
}
845852

pnpm-lock.yaml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)