livekit
diff --git a/‎.changeset/silver-planets-retire.md‎
Lines changed: 5 additions & 0 deletions b/‎.changeset/silver-planets-retire.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎agents/src/voice/agent_activity.ts‎
Lines changed: 24 additions & 22 deletions b/‎agents/src/voice/agent_activity.ts‎
Lines changed: 24 additions & 22 deletions
diff --git a/‎agents/src/voice/agent_session.ts‎
Lines changed: 73 additions & 28 deletions b/‎agents/src/voice/agent_session.ts‎
Lines changed: 73 additions & 28 deletions
diff --git a/‎agents/src/voice/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎agents/src/voice/index.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎agents/src/voice/speech_handle.ts‎
Lines changed: 24 additions & 4 deletions b/‎agents/src/voice/speech_handle.ts‎
Lines changed: 24 additions & 4 deletions
diff --git a/‎agents/src/voice/testing/index.ts‎
Lines changed: 49 additions & 0 deletions b/‎agents/src/voice/testing/index.ts‎
Lines changed: 49 additions & 0 deletions
@@ -0,0 +1,5 @@
+---
+'@livekit/agents': patch
+---
+
+Supports initial set of testing utilities in agent framework
@@ -1350,18 +1350,24 @@ export class AgentActivity implements RecognitionHooks {
     );
     tasks.push(llmTask);
 
-    const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
-
     let ttsTask: Task<void> | null = null;
     let ttsStream: ReadableStream<AudioFrame> | null = null;
+    let llmOutput: ReadableStream<string>;
+
     if (audioOutput) {
+      // Only tee the stream when we need TTS
+      const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
+      llmOutput = textOutput;
       [ttsTask, ttsStream] = performTTSInference(
         (...args) => this.agent.ttsNode(...args),
         ttsTextInput,
         modelSettings,
         replyAbortController,
       );
       tasks.push(ttsTask);
+    } else {
+      // No TTS needed, use the stream directly
+      llmOutput = llmGenData.textStream;
     }
 
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
@@ -1421,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
     //TODO(AJS-272): before executing tools, make sure we generated all the text
     // (this ensure everything is kept ordered)
 
-    const onToolExecutionStarted = (_: FunctionCall) => {
-      // TODO(brian): handle speech_handle item_added
+    const onToolExecutionStarted = (f: FunctionCall) => {
+      speechHandle._itemAdded([f]);
+      this.agent._chatCtx.items.push(f);
+      this.agentSession._toolItemsAdded([f]);
     };
 
-    const onToolExecutionCompleted = (_: ToolExecutionOutput) => {
-      // TODO(brian): handle speech_handle item_added
+    const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
+      if (out.toolCallOutput) {
+        speechHandle._itemAdded([out.toolCallOutput]);
+      }
     };
 
     const [executeToolsTask, toolOutput] = performToolExecutions({
@@ -1501,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
         });
         chatCtx.insert(message);
         this.agent._chatCtx.insert(message);
+        speechHandle._itemAdded([message]);
         this.agentSession._conversationItemAdded(message);
       }
 
@@ -1528,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
       });
       chatCtx.insert(message);
       this.agent._chatCtx.insert(message);
+      speechHandle._itemAdded([message]);
       this.agentSession._conversationItemAdded(message);
       this.logger.info(
         { speech_id: speechHandle.id, message: textOut.text },
@@ -1612,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
     if (shouldGenerateToolReply) {
       chatCtx.insert(toolMessages);
 
-      const handle = SpeechHandle.create({
-        allowInterruptions: speechHandle.allowInterruptions,
-        stepIndex: speechHandle._stepIndex + 1,
-        parent: speechHandle,
-      });
-      this.agentSession.emit(
-        AgentSessionEventTypes.SpeechCreated,
-        createSpeechCreatedEvent({
-          userInitiated: false,
-          source: 'tool_response',
-          speechHandle: handle,
-        }),
-      );
+      // Increment step count on SAME handle (parity with Python agent_activity.py L2081)
+      speechHandle._numSteps += 1;
 
       // Avoid setting tool_choice to "required" or a specific function when
       // passing tool response back to the LLM
       const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
 
+      // Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
       const toolResponseTask = this.createSpeechTask({
         task: Task.from(() =>
           this.pipelineReplyTask(
-            handle,
+            speechHandle,
             chatCtx,
             toolCtx,
             { toolChoice: respondToolChoice },
@@ -1643,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
             toolMessages,
           ),
         ),
-        ownedSpeechHandle: handle,
+        ownedSpeechHandle: speechHandle,
         name: 'AgentActivity.pipelineReply',
       });
 
       toolResponseTask.finally(() => this.onPipelineReplyDone());
 
-      this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
+      this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
     } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
       for (const msg of toolMessages) {
         msg.createdAt = replyStartedAt;
 
@@ -61,6 +61,7 @@ import { RecorderIO } from './recorder_io/index.js';
 import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
 import type { UnknownUserData } from './run_context.js';
 import type { SpeechHandle } from './speech_handle.js';
+import { RunResult } from './testing/run_result.js';
 
 export interface VoiceOptions {
   allowInterruptions: boolean;
@@ -167,6 +168,9 @@ export class AgentSession<
   /** @internal - Timestamp when the session started (milliseconds) */
   _startedAt?: number;
 
+  /** @internal - Current run state for testing */
+  _globalRunState?: RunResult;
+
   constructor(opts: AgentSessionOptions<UserData>) {
     super();
 
@@ -272,7 +276,7 @@ export class AgentSession<
     span,
   }: {
     agent: Agent;
-    room: Room;
+    room?: Room;
     inputOptions?: Partial<RoomInputOptions>;
     outputOptions?: Partial<RoomOutputOptions>;
     span: Span;
@@ -283,41 +287,45 @@ export class AgentSession<
     this._updateAgentState('initializing');
 
     const tasks: Promise<void>[] = [];
-    // Check for existing input/output configuration and warn if needed
-    if (this.input.audio && inputOptions?.audioEnabled !== false) {
-      this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
-    }
 
-    if (this.output.audio && outputOptions?.audioEnabled !== false) {
-      this.logger.warn(
-        'RoomIO audio output is enabled but output.audio is already set, ignoring..',
-      );
-    }
+    if (room && !this.roomIO) {
+      // Check for existing input/output configuration and warn if needed
+      if (this.input.audio && inputOptions?.audioEnabled !== false) {
+        this.logger.warn(
+          'RoomIO audio input is enabled but input.audio is already set, ignoring..',
+        );
+      }
 
-    if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
-      this.logger.warn(
-        'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
-      );
-    }
+      if (this.output.audio && outputOptions?.audioEnabled !== false) {
+        this.logger.warn(
+          'RoomIO audio output is enabled but output.audio is already set, ignoring..',
+        );
+      }
 
-    this.roomIO = new RoomIO({
-      agentSession: this,
-      room,
-      inputOptions,
-      outputOptions,
-    });
-    this.roomIO.start();
+      if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
+        this.logger.warn(
+          'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
+        );
+      }
+
+      this.roomIO = new RoomIO({
+        agentSession: this,
+        room,
+        inputOptions,
+        outputOptions,
+      });
+      this.roomIO.start();
+    }
 
     let ctx: JobContext | undefined = undefined;
     try {
       ctx = getJobContext();
-    } catch (error) {
+    } catch {
       // JobContext is not available in evals
-      this.logger.warn('JobContext is not available');
     }
 
     if (ctx) {
-      if (ctx.room === room && !room.isConnected) {
+      if (room && ctx.room === room && !room.isConnected) {
         this.logger.debug('Auto-connecting to room via job context');
         tasks.push(ctx.connect());
       }
@@ -370,7 +378,7 @@ export class AgentSession<
     record,
   }: {
     agent: Agent;
-    room: Room;
+    room?: Room;
     inputOptions?: Partial<RoomInputOptions>;
     outputOptions?: Partial<RoomOutputOptions>;
     record?: boolean;
@@ -497,13 +505,50 @@ export class AgentSession<
 
     // attach to the session span if called outside of the AgentSession
     const activeSpan = trace.getActiveSpan();
+    let handle: SpeechHandle;
     if (!activeSpan && this.rootSpanContext) {
-      return otelContext.with(this.rootSpanContext, () =>
+      handle = otelContext.with(this.rootSpanContext, () =>
         doGenerateReply(this.activity!, this.nextActivity),
       );
+    } else {
+      handle = doGenerateReply(this.activity!, this.nextActivity);
     }
 
-    return doGenerateReply(this.activity!, this.nextActivity);
+    if (this._globalRunState) {
+      this._globalRunState._watchHandle(handle);
+    }
+
+    return handle;
+  }
+
+  /**
+   * Run a test with user input and return a result for assertions.
+   *
+   * This method is primarily used for testing agent behavior without
+   * requiring a real room connection.
+   *
+   * @example
+   * ```typescript
+   * const result = await session.run({ userInput: 'Hello' });
+   * result.expect.nextEvent().isMessage({ role: 'assistant' });
+   * result.expect.noMoreEvents();
+   * ```
+   *
+   * @param options - Run options including user input
+   * @returns A RunResult that resolves when the agent finishes responding
+   *
+   * TODO: Add outputType parameter for typed outputs (parity with Python)
+   */
+  run(options: { userInput: string }): RunResult {
+    if (this._globalRunState && !this._globalRunState.done()) {
+      throw new Error('nested runs are not supported');
+    }
+
+    const runState = new RunResult({ userInput: options.userInput });
+    this._globalRunState = runState;
+    this.generateReply({ userInput: options.userInput });
+
+    return runState;
   }
 
   private async updateActivity(agent: Agent): Promise<void> {
 
@@ -10,3 +10,4 @@ export { type TimedString } from './io.js';
 export * from './report.js';
 export * from './room_io/index.js';
 export { RunContext } from './run_context.js';
+export * as testing from './testing/index.js';
@@ -2,10 +2,25 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import type { ChatItem } from '../llm/index.js';
-import { Event, Future, shortuuid } from '../utils.js';
 import type { Task } from '../utils.js';
+import { Event, Future, shortuuid } from '../utils.js';
 import { asyncLocalStorage } from './agent.js';
 
+/** Symbol used to identify SpeechHandle instances */
+const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
+
+/**
+ * Type guard to check if a value is a SpeechHandle.
+ */
+export function isSpeechHandle(value: unknown): value is SpeechHandle {
+  return (
+    typeof value === 'object' &&
+    value !== null &&
+    SPEECH_HANDLE_SYMBOL in value &&
+    (value as Record<symbol, boolean>)[SPEECH_HANDLE_SYMBOL] === true
+  );
+}
+
 export class SpeechHandle {
   /** Priority for messages that should be played after all other messages in the queue */
   static SPEECH_PRIORITY_LOW = 0;
@@ -18,16 +33,21 @@ export class SpeechHandle {
   private authorizedEvent = new Event();
   private scheduledFut = new Future<void>();
   private doneFut = new Future<void>();
-
   private generations: Future<void>[] = [];
+  private _chatItems: ChatItem[] = [];
+
   /** @internal */
   _tasks: Task<void>[] = [];
-  private _chatItems: ChatItem[] = [];
-  private _numSteps = 1;
+
+  /** @internal */
+  _numSteps = 1;
 
   private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
   private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
 
+  /** @internal Symbol marker for type identification */
+  readonly [SPEECH_HANDLE_SYMBOL] = true;
+
   constructor(
     private _id: string,
     private _allowInterruptions: boolean,
 
@@ -0,0 +1,49 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Testing utilities for agent evaluation.
+ *
+ * @example
+ * ```typescript
+ * import { AgentSession, Agent, voice } from '@livekit/agents';
+ *
+ * const session = new AgentSession({ llm });
+ * await session.start(agent);
+ *
+ * const result = await session.run({ userInput: 'Hello' });
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
+ * result.expect.noMoreEvents();
+ * ```
+ *
+ * @packageDocumentation
+ */
+
+export {
+  AgentHandoffAssert,
+  AssertionError,
+  EventAssert,
+  FunctionCallAssert,
+  FunctionCallOutputAssert,
+  MessageAssert,
+  RunAssert,
+  RunResult,
+} from './run_result.js';
+
+export {
+  isAgentHandoffEvent,
+  isChatMessageEvent,
+  isFunctionCallEvent,
+  isFunctionCallOutputEvent,
+  type AgentHandoffAssertOptions,
+  type AgentHandoffEvent,
+  type ChatMessageEvent,
+  type EventType,
+  type FunctionCallAssertOptions,
+  type FunctionCallEvent,
+  type FunctionCallOutputAssertOptions,
+  type FunctionCallOutputEvent,
+  type MessageAssertOptions,
+  type RunEvent,
+} from './types.js';
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +'@livekit/agents': patch
 +---
++
 +Supports initial set of testing utilities in agent framework