Skip to content

Commit df523ab

Browse files
authored
feat(agents): add initial test framework for agent testing (#965)
1 parent 92699c9 commit df523ab

File tree

10 files changed

+1089
-54
lines changed

10 files changed

+1089
-54
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@livekit/agents': patch
3+
---
4+
5+
Supports initial set of testing utilities in agent framework

agents/src/voice/agent_activity.ts

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,18 +1350,24 @@ export class AgentActivity implements RecognitionHooks {
13501350
);
13511351
tasks.push(llmTask);
13521352

1353-
const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
1354-
13551353
let ttsTask: Task<void> | null = null;
13561354
let ttsStream: ReadableStream<AudioFrame> | null = null;
1355+
let llmOutput: ReadableStream<string>;
1356+
13571357
if (audioOutput) {
1358+
// Only tee the stream when we need TTS
1359+
const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
1360+
llmOutput = textOutput;
13581361
[ttsTask, ttsStream] = performTTSInference(
13591362
(...args) => this.agent.ttsNode(...args),
13601363
ttsTextInput,
13611364
modelSettings,
13621365
replyAbortController,
13631366
);
13641367
tasks.push(ttsTask);
1368+
} else {
1369+
// No TTS needed, use the stream directly
1370+
llmOutput = llmGenData.textStream;
13651371
}
13661372

13671373
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
@@ -1421,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
14211427
//TODO(AJS-272): before executing tools, make sure we generated all the text
14221428
// (this ensure everything is kept ordered)
14231429

1424-
const onToolExecutionStarted = (_: FunctionCall) => {
1425-
// TODO(brian): handle speech_handle item_added
1430+
const onToolExecutionStarted = (f: FunctionCall) => {
1431+
speechHandle._itemAdded([f]);
1432+
this.agent._chatCtx.items.push(f);
1433+
this.agentSession._toolItemsAdded([f]);
14261434
};
14271435

1428-
const onToolExecutionCompleted = (_: ToolExecutionOutput) => {
1429-
// TODO(brian): handle speech_handle item_added
1436+
const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
1437+
if (out.toolCallOutput) {
1438+
speechHandle._itemAdded([out.toolCallOutput]);
1439+
}
14301440
};
14311441

14321442
const [executeToolsTask, toolOutput] = performToolExecutions({
@@ -1501,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
15011511
});
15021512
chatCtx.insert(message);
15031513
this.agent._chatCtx.insert(message);
1514+
speechHandle._itemAdded([message]);
15041515
this.agentSession._conversationItemAdded(message);
15051516
}
15061517

@@ -1528,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
15281539
});
15291540
chatCtx.insert(message);
15301541
this.agent._chatCtx.insert(message);
1542+
speechHandle._itemAdded([message]);
15311543
this.agentSession._conversationItemAdded(message);
15321544
this.logger.info(
15331545
{ speech_id: speechHandle.id, message: textOut.text },
@@ -1612,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
16121624
if (shouldGenerateToolReply) {
16131625
chatCtx.insert(toolMessages);
16141626

1615-
const handle = SpeechHandle.create({
1616-
allowInterruptions: speechHandle.allowInterruptions,
1617-
stepIndex: speechHandle._stepIndex + 1,
1618-
parent: speechHandle,
1619-
});
1620-
this.agentSession.emit(
1621-
AgentSessionEventTypes.SpeechCreated,
1622-
createSpeechCreatedEvent({
1623-
userInitiated: false,
1624-
source: 'tool_response',
1625-
speechHandle: handle,
1626-
}),
1627-
);
1627+
// Increment step count on SAME handle (parity with Python agent_activity.py L2081)
1628+
speechHandle._numSteps += 1;
16281629

16291630
// Avoid setting tool_choice to "required" or a specific function when
16301631
// passing tool response back to the LLM
16311632
const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
16321633

1634+
// Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
16331635
const toolResponseTask = this.createSpeechTask({
16341636
task: Task.from(() =>
16351637
this.pipelineReplyTask(
1636-
handle,
1638+
speechHandle,
16371639
chatCtx,
16381640
toolCtx,
16391641
{ toolChoice: respondToolChoice },
@@ -1643,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
16431645
toolMessages,
16441646
),
16451647
),
1646-
ownedSpeechHandle: handle,
1648+
ownedSpeechHandle: speechHandle,
16471649
name: 'AgentActivity.pipelineReply',
16481650
});
16491651

16501652
toolResponseTask.finally(() => this.onPipelineReplyDone());
16511653

1652-
this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1654+
this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
16531655
} else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
16541656
for (const msg of toolMessages) {
16551657
msg.createdAt = replyStartedAt;

agents/src/voice/agent_session.ts

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ import { RecorderIO } from './recorder_io/index.js';
6161
import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
6262
import type { UnknownUserData } from './run_context.js';
6363
import type { SpeechHandle } from './speech_handle.js';
64+
import { RunResult } from './testing/run_result.js';
6465

6566
export interface VoiceOptions {
6667
allowInterruptions: boolean;
@@ -167,6 +168,9 @@ export class AgentSession<
167168
/** @internal - Timestamp when the session started (milliseconds) */
168169
_startedAt?: number;
169170

171+
/** @internal - Current run state for testing */
172+
_globalRunState?: RunResult;
173+
170174
constructor(opts: AgentSessionOptions<UserData>) {
171175
super();
172176

@@ -272,7 +276,7 @@ export class AgentSession<
272276
span,
273277
}: {
274278
agent: Agent;
275-
room: Room;
279+
room?: Room;
276280
inputOptions?: Partial<RoomInputOptions>;
277281
outputOptions?: Partial<RoomOutputOptions>;
278282
span: Span;
@@ -283,41 +287,45 @@ export class AgentSession<
283287
this._updateAgentState('initializing');
284288

285289
const tasks: Promise<void>[] = [];
286-
// Check for existing input/output configuration and warn if needed
287-
if (this.input.audio && inputOptions?.audioEnabled !== false) {
288-
this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
289-
}
290290

291-
if (this.output.audio && outputOptions?.audioEnabled !== false) {
292-
this.logger.warn(
293-
'RoomIO audio output is enabled but output.audio is already set, ignoring..',
294-
);
295-
}
291+
if (room && !this.roomIO) {
292+
// Check for existing input/output configuration and warn if needed
293+
if (this.input.audio && inputOptions?.audioEnabled !== false) {
294+
this.logger.warn(
295+
'RoomIO audio input is enabled but input.audio is already set, ignoring..',
296+
);
297+
}
296298

297-
if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
298-
this.logger.warn(
299-
'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
300-
);
301-
}
299+
if (this.output.audio && outputOptions?.audioEnabled !== false) {
300+
this.logger.warn(
301+
'RoomIO audio output is enabled but output.audio is already set, ignoring..',
302+
);
303+
}
302304

303-
this.roomIO = new RoomIO({
304-
agentSession: this,
305-
room,
306-
inputOptions,
307-
outputOptions,
308-
});
309-
this.roomIO.start();
305+
if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
306+
this.logger.warn(
307+
'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
308+
);
309+
}
310+
311+
this.roomIO = new RoomIO({
312+
agentSession: this,
313+
room,
314+
inputOptions,
315+
outputOptions,
316+
});
317+
this.roomIO.start();
318+
}
310319

311320
let ctx: JobContext | undefined = undefined;
312321
try {
313322
ctx = getJobContext();
314-
} catch (error) {
323+
} catch {
315324
// JobContext is not available in evals
316-
this.logger.warn('JobContext is not available');
317325
}
318326

319327
if (ctx) {
320-
if (ctx.room === room && !room.isConnected) {
328+
if (room && ctx.room === room && !room.isConnected) {
321329
this.logger.debug('Auto-connecting to room via job context');
322330
tasks.push(ctx.connect());
323331
}
@@ -370,7 +378,7 @@ export class AgentSession<
370378
record,
371379
}: {
372380
agent: Agent;
373-
room: Room;
381+
room?: Room;
374382
inputOptions?: Partial<RoomInputOptions>;
375383
outputOptions?: Partial<RoomOutputOptions>;
376384
record?: boolean;
@@ -497,13 +505,50 @@ export class AgentSession<
497505

498506
// attach to the session span if called outside of the AgentSession
499507
const activeSpan = trace.getActiveSpan();
508+
let handle: SpeechHandle;
500509
if (!activeSpan && this.rootSpanContext) {
501-
return otelContext.with(this.rootSpanContext, () =>
510+
handle = otelContext.with(this.rootSpanContext, () =>
502511
doGenerateReply(this.activity!, this.nextActivity),
503512
);
513+
} else {
514+
handle = doGenerateReply(this.activity!, this.nextActivity);
504515
}
505516

506-
return doGenerateReply(this.activity!, this.nextActivity);
517+
if (this._globalRunState) {
518+
this._globalRunState._watchHandle(handle);
519+
}
520+
521+
return handle;
522+
}
523+
524+
/**
525+
* Run a test with user input and return a result for assertions.
526+
*
527+
* This method is primarily used for testing agent behavior without
528+
* requiring a real room connection.
529+
*
530+
* @example
531+
* ```typescript
532+
* const result = await session.run({ userInput: 'Hello' });
533+
* result.expect.nextEvent().isMessage({ role: 'assistant' });
534+
* result.expect.noMoreEvents();
535+
* ```
536+
*
537+
* @param options - Run options including user input
538+
* @returns A RunResult that resolves when the agent finishes responding
539+
*
540+
* TODO: Add outputType parameter for typed outputs (parity with Python)
541+
*/
542+
run(options: { userInput: string }): RunResult {
543+
if (this._globalRunState && !this._globalRunState.done()) {
544+
throw new Error('nested runs are not supported');
545+
}
546+
547+
const runState = new RunResult({ userInput: options.userInput });
548+
this._globalRunState = runState;
549+
this.generateReply({ userInput: options.userInput });
550+
551+
return runState;
507552
}
508553

509554
private async updateActivity(agent: Agent): Promise<void> {

agents/src/voice/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ export { type TimedString } from './io.js';
1010
export * from './report.js';
1111
export * from './room_io/index.js';
1212
export { RunContext } from './run_context.js';
13+
export * as testing from './testing/index.js';

agents/src/voice/speech_handle.ts

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,25 @@
22
//
33
// SPDX-License-Identifier: Apache-2.0
44
import type { ChatItem } from '../llm/index.js';
5-
import { Event, Future, shortuuid } from '../utils.js';
65
import type { Task } from '../utils.js';
6+
import { Event, Future, shortuuid } from '../utils.js';
77
import { asyncLocalStorage } from './agent.js';
88

9+
/** Symbol used to identify SpeechHandle instances */
10+
const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
11+
12+
/**
13+
* Type guard to check if a value is a SpeechHandle.
14+
*/
15+
export function isSpeechHandle(value: unknown): value is SpeechHandle {
16+
return (
17+
typeof value === 'object' &&
18+
value !== null &&
19+
SPEECH_HANDLE_SYMBOL in value &&
20+
(value as Record<symbol, boolean>)[SPEECH_HANDLE_SYMBOL] === true
21+
);
22+
}
23+
924
export class SpeechHandle {
1025
/** Priority for messages that should be played after all other messages in the queue */
1126
static SPEECH_PRIORITY_LOW = 0;
@@ -18,16 +33,21 @@ export class SpeechHandle {
1833
private authorizedEvent = new Event();
1934
private scheduledFut = new Future<void>();
2035
private doneFut = new Future<void>();
21-
2236
private generations: Future<void>[] = [];
37+
private _chatItems: ChatItem[] = [];
38+
2339
/** @internal */
2440
_tasks: Task<void>[] = [];
25-
private _chatItems: ChatItem[] = [];
26-
private _numSteps = 1;
41+
42+
/** @internal */
43+
_numSteps = 1;
2744

2845
private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
2946
private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
3047

48+
/** @internal Symbol marker for type identification */
49+
readonly [SPEECH_HANDLE_SYMBOL] = true;
50+
3151
constructor(
3252
private _id: string,
3353
private _allowInterruptions: boolean,

agents/src/voice/testing/index.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
/**
6+
* Testing utilities for agent evaluation.
7+
*
8+
* @example
9+
* ```typescript
10+
* import { AgentSession, Agent, voice } from '@livekit/agents';
11+
*
12+
* const session = new AgentSession({ llm });
13+
* await session.start(agent);
14+
*
15+
* const result = await session.run({ userInput: 'Hello' });
16+
* result.expect.nextEvent().isMessage({ role: 'assistant' });
17+
* result.expect.noMoreEvents();
18+
* ```
19+
*
20+
* @packageDocumentation
21+
*/
22+
23+
export {
24+
AgentHandoffAssert,
25+
AssertionError,
26+
EventAssert,
27+
FunctionCallAssert,
28+
FunctionCallOutputAssert,
29+
MessageAssert,
30+
RunAssert,
31+
RunResult,
32+
} from './run_result.js';
33+
34+
export {
35+
isAgentHandoffEvent,
36+
isChatMessageEvent,
37+
isFunctionCallEvent,
38+
isFunctionCallOutputEvent,
39+
type AgentHandoffAssertOptions,
40+
type AgentHandoffEvent,
41+
type ChatMessageEvent,
42+
type EventType,
43+
type FunctionCallAssertOptions,
44+
type FunctionCallEvent,
45+
type FunctionCallOutputAssertOptions,
46+
type FunctionCallOutputEvent,
47+
type MessageAssertOptions,
48+
type RunEvent,
49+
} from './types.js';

0 commit comments

Comments
 (0)