diff --git a/.changeset/long-cameras-throw.md b/.changeset/long-cameras-throw.md new file mode 100644 index 000000000..4aaee3aae --- /dev/null +++ b/.changeset/long-cameras-throw.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents': patch +--- + +Emit away events for User diff --git a/agents/src/voice/agent_session.ts b/agents/src/voice/agent_session.ts index 758d116c4..e8add4c42 100644 --- a/agents/src/voice/agent_session.ts +++ b/agents/src/voice/agent_session.ts @@ -58,6 +58,7 @@ export interface VoiceOptions { maxEndpointingDelay: number; maxToolSteps: number; preemptiveGeneration: boolean; + userAwayTimeout?: number | null; } const defaultVoiceOptions: VoiceOptions = { @@ -69,6 +70,7 @@ const defaultVoiceOptions: VoiceOptions = { maxEndpointingDelay: 6000, maxToolSteps: 3, preemptiveGeneration: false, + userAwayTimeout: 15.0, } as const; export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector; @@ -123,6 +125,7 @@ export class AgentSession< private _output: AgentOutput; private closingTask: Promise | null = null; + private userAwayTimer: NodeJS.Timeout | null = null; constructor(opts: AgentSessionOptions) { super(); @@ -167,6 +170,8 @@ export class AgentSession< // This is the "global" chat context, it holds the entire conversation history this._chatCtx = ChatContext.empty(); this.options = { ...defaultVoiceOptions, ...voiceOptions }; + + this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this)); } get input(): AgentInput { @@ -416,6 +421,14 @@ export class AgentSession< const oldState = this._agentState; this._agentState = state; + + // Handle user away timer based on state changes + if (state === 'listening' && this.userState === 'listening') { + this._setUserAwayTimer(); + } else { + this._cancelUserAwayTimer(); + } + this.emit( AgentSessionEventTypes.AgentStateChanged, createAgentStateChangedEvent(oldState, state), @@ -430,6 +443,14 @@ export class AgentSession< const oldState = this.userState; this.userState = state; + + // Handle user away timer based on state changes + if (state === 'listening' && this._agentState === 'listening') { + this._setUserAwayTimer(); + } else { + this._cancelUserAwayTimer(); + } + this.emit( AgentSessionEventTypes.UserStateChanged, createUserStateChangedEvent(oldState, state), @@ -451,6 +472,37 @@ export class AgentSession< private onTextOutputChanged(): void {} + private _setUserAwayTimer(): void { + this._cancelUserAwayTimer(); + + if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) { + return; + } + + if (this.roomIO && !this.roomIO.isParticipantAvailable) { + return; + } + + this.userAwayTimer = setTimeout(() => { + this.logger.debug('User away timeout triggered'); + this._updateUserState('away'); + }, this.options.userAwayTimeout * 1000); + } + + private _cancelUserAwayTimer(): void { + if (this.userAwayTimer !== null) { + clearTimeout(this.userAwayTimer); + this.userAwayTimer = null; + } + } + + private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void { + if (this.userState === 'away' && ev.isFinal) { + this.logger.debug('User returned from away state due to speech input'); + this._updateUserState('listening'); + } + } + private async closeImpl( reason: CloseReason, error: RealtimeModelError | LLMError | TTSError | STTError | null = null, @@ -460,6 +512,8 @@ export class AgentSession< return; } + this._cancelUserAwayTimer(); + if (this.activity) { if (!drain) { try { diff --git a/agents/src/voice/room_io/room_io.ts b/agents/src/voice/room_io/room_io.ts index 9ca4d3aed..43a69bb31 100644 --- a/agents/src/voice/room_io/room_io.ts +++ b/agents/src/voice/room_io/room_io.ts @@ -369,6 +369,10 @@ export class RoomIO { return this.transcriptionSynchronizer.textOutput; } + get isParticipantAvailable(): boolean { + return this.participantAvailableFuture.done; + } + /** Switch to a different participant */ setParticipant(participantIdentity: string | null) { this.logger.debug({ participantIdentity }, 'setting participant'); diff --git a/examples/src/idle_user_timeout_example.ts b/examples/src/idle_user_timeout_example.ts new file mode 100644 index 000000000..e67b6e928 --- /dev/null +++ b/examples/src/idle_user_timeout_example.ts @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: 2025 LiveKit, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +/** + * Minimal example demonstrating idle user timeout functionality. + * Direct port of: https://github.com/livekit/agents/blob/main/examples/voice_agents/inactive_user.py + */ +import { + type JobContext, + type JobProcess, + Task, + WorkerOptions, + cli, + defineAgent, + delay, + log, + voice, +} from '@livekit/agents'; +import * as openai from '@livekit/agents-plugin-openai'; +import * as silero from '@livekit/agents-plugin-silero'; +import { fileURLToPath } from 'node:url'; + +export default defineAgent({ + prewarm: async (proc: JobProcess) => { + proc.userData.vad = await silero.VAD.load(); + }, + entry: async (ctx: JobContext) => { + const logger = log(); + const vad = ctx.proc.userData.vad! as silero.VAD; + + const session = new voice.AgentSession({ + vad, + llm: new openai.LLM({ model: 'gpt-4o-mini' }), + stt: 'assemblyai/universal-streaming:en', + tts: 'cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc', + + voiceOptions: { + userAwayTimeout: 12.5, + }, + }); + + let task: Task | null = null; + + const userPresenceTask = async (controller: AbortController): Promise => { + for (let i = 0; i < 3; i++) { + if (controller.signal.aborted) return; + + const reply = await session.generateReply({ + instructions: 'The user has been inactive. Politely check if the user is still present.', + }); + + await reply.waitForPlayout(); + + try { + await delay(10000, { signal: controller.signal }); + } catch { + return; + } + } + + if (!controller.signal.aborted) { + await session.close(); + } + }; + + session.on(voice.AgentSessionEventTypes.UserStateChanged, (event) => { + logger.info({ event }, 'User state changed'); + + if (task) { + task.cancel(); + } + + if (event.newState === 'away') { + task = Task.from(userPresenceTask); + return; + } + }); + + const agent = new voice.Agent({ + instructions: 'You are a helpful assistant.', + }); + + await session.start({ agent, room: ctx.room }); + }, +}); + +cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) }));