diff --git a/packages/api/src/hooks/index.ts b/packages/api/src/hooks/index.ts index f5a1a959d7..b1d027ee3a 100644 --- a/packages/api/src/hooks/index.ts +++ b/packages/api/src/hooks/index.ts @@ -71,6 +71,7 @@ import useUIState from './useUIState'; import useUserID from './useUserID'; import useUsername from './useUsername'; import useVoiceSelector from './useVoiceSelector'; +import useVoiceActivities from './useVoiceActivities'; export { useBuildRenderActivityCallback } from '@msinternal/botframework-webchat-api-middleware'; export { useSuggestedActionsHooks } from '@msinternal/botframework-webchat-redux-store'; @@ -148,5 +149,6 @@ export { useUIState, useUserID, useUsername, - useVoiceSelector + useVoiceSelector, + useVoiceActivities }; diff --git a/packages/api/src/hooks/internal/useStateRef.ts b/packages/api/src/hooks/internal/useStateRef.ts new file mode 100644 index 0000000000..a6f517fcc3 --- /dev/null +++ b/packages/api/src/hooks/internal/useStateRef.ts @@ -0,0 +1,31 @@ +import { useCallback, useRef, useState } from 'react'; + +import type { Dispatch, MutableRefObject, SetStateAction } from 'react'; + +export default function useStateRef( + initialValue?: T +): readonly [T, Dispatch>, MutableRefObject] { + const [_, forceRender] = useState(); + const valueRef: MutableRefObject = useRef(initialValue); + + const setter: Dispatch> = useCallback( + (value: SetStateAction) => { + const { current } = valueRef; + + value = value instanceof Function ? value(current) : value; + + if (current !== value) { + valueRef.current = value; + + forceRender({}); + } + }, + [forceRender, valueRef] + ); + + return Object.freeze([valueRef.current, setter, valueRef]) as readonly [ + T, + Dispatch>, + MutableRefObject + ]; +} diff --git a/packages/api/src/hooks/useSpeechToSpeech.ts b/packages/api/src/hooks/useSpeechToSpeech.ts new file mode 100644 index 0000000000..4f529a2c08 --- /dev/null +++ b/packages/api/src/hooks/useSpeechToSpeech.ts @@ -0,0 +1,3 @@ +import useSpeechToSpeech from '../providers/SpeechToSpeech/useSpeechToSpeech'; + +export default useSpeechToSpeech; diff --git a/packages/api/src/hooks/useVoiceActivities.ts b/packages/api/src/hooks/useVoiceActivities.ts new file mode 100644 index 0000000000..0abff2229f --- /dev/null +++ b/packages/api/src/hooks/useVoiceActivities.ts @@ -0,0 +1,6 @@ +import { type WebChatActivity } from 'botframework-webchat-core'; +import { useSelector } from './internal/WebChatReduxContext'; + +export default function useVoiceActivities(): [WebChatActivity[]] { + return [useSelector(({ voiceActivities }) => voiceActivities)]; +} diff --git a/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx new file mode 100644 index 0000000000..70d9c3aa4a --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx @@ -0,0 +1,144 @@ +import React, { useCallback, useEffect, useMemo, useRef, type ReactNode } from 'react'; +import { isVoiceActivity, WebChatActivity } from 'botframework-webchat-core'; +import { useAudioPlayer } from './private/useAudioPlayer'; +import { useRecorder } from './private/useRecorder'; +import { useDebouncedNotifications, usePostActivity, useVoiceActivities } from '../../hooks'; +import SpeechToSpeechContext from './private/Context'; +import { SpeechState } from './types/SpeechState'; +import useStateRef from '../../hooks/internal/useStateRef'; + +export const SpeechToSpeechComposer: React.FC<{ readonly children: ReactNode }> = ({ children }) => { + const [voiceActivities] = useVoiceActivities(); + const postActivity = usePostActivity(); + const [{ connectivitystatus }] = useDebouncedNotifications(); + const lastProcessedIndexRef = useRef(0); + const [speechState, setSpeechState] = useStateRef('idle'); + + // config received from server on session init (only once), for now ccv2 and mmrt runs on different sample rate and chunk interval. + // we will read those config, free form object as unsure of what all session config would be needed in future. + const [serverConfig, setServerConfig] = useStateRef | null>(null); + const { playAudio, stopAudio, isPlaying } = useAudioPlayer(serverConfig); + + const isConnected = useMemo(() => connectivitystatus?.message === 'connected', [connectivitystatus]); + + const sendAudioChunk = useCallback( + (base64: string, timestamp: string) => { + postActivity({ + type: 'event', + name: 'stream.chunk', + value: { voice: { contentUrl: base64, timestamp } } + } as any); + }, + [postActivity] + ); + + const { recording, setRecording: baseSetRecording } = useRecorder(sendAudioChunk, serverConfig); + + const handleVoiceActivity = useCallback( + (activity: WebChatActivity) => { + if (!isVoiceActivity(activity)) { + return; + } + + const { name, value } = activity; + const { voice } = value; + + switch (name) { + // TODO - this will be commandResult activity and not event, need to think on handling of command and commandResult activities. + case 'session.init': { + setServerConfig(value.session?.config as Record); + break; + } + + case 'session.update': { + switch (voice.bot_state) { + case 'voice.request.detected': + stopAudio(); + setSpeechState('listening'); + break; + + case 'voice.request.processing': + setSpeechState('processing'); + break; + + default: + break; + } + break; + } + + case 'stream.chunk': { + if (voice.contentUrl) { + playAudio(voice.contentUrl); + } + break; + } + + default: + break; + } + }, + [playAudio, setServerConfig, setSpeechState, stopAudio] + ); + + useEffect(() => { + const startIndex = lastProcessedIndexRef.current; + if (!voiceActivities.length || startIndex >= voiceActivities.length) { + return; + } + + for (let i = startIndex; i < voiceActivities.length; i++) { + // eslint-disable-next-line security/detect-object-injection + const activity = voiceActivities[i]; + + // Skip processing the activity if it's from the user as we want to process only incoming voice activities. + // we may receive (optional) config from server as soon as socket is established + // at that time recording would be off but we still want to process to read config and act on it. + if ( + activity.from?.role === 'user' || + (!recording && isVoiceActivity(activity) && activity.name !== 'session.init') + ) { + continue; + } + + handleVoiceActivity(activity); + } + + if (isPlaying && speechState !== 'bot_speaking') { + setSpeechState('bot_speaking'); + } else if (!isPlaying && speechState === 'bot_speaking') { + setSpeechState('listening'); + } + + lastProcessedIndexRef.current = voiceActivities.length; + }, [handleVoiceActivity, isPlaying, recording, setSpeechState, speechState, voiceActivities]); + + const setRecording = useCallback( + async (shouldRecord: boolean) => { + if (!isConnected) { + return; + } + + if (shouldRecord) { + setSpeechState('listening'); + } else { + stopAudio(); + setSpeechState('idle'); + } + + await baseSetRecording(shouldRecord); + }, + [isConnected, baseSetRecording, setSpeechState, stopAudio] + ); + + const contextValue = useMemo( + () => ({ + recording, + setRecording, + speechState + }), + [recording, setRecording, speechState] + ); + + return {children}; +}; diff --git a/packages/api/src/providers/SpeechToSpeech/private/Context.ts b/packages/api/src/providers/SpeechToSpeech/private/Context.ts new file mode 100644 index 0000000000..ce85310246 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/private/Context.ts @@ -0,0 +1,14 @@ +import { createContext } from 'react'; +import { SpeechState } from '../types/SpeechState'; + +type SpeechToSpeechContextType = { + recording: boolean; + setRecording: (recording: boolean) => void; + speechState: SpeechState; +}; + +const SpeechToSpeechContext = createContext(undefined!); + +export default SpeechToSpeechContext; + +export type { SpeechToSpeechContextType }; diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx new file mode 100644 index 0000000000..8c1d42cb08 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx @@ -0,0 +1,279 @@ +/** @jest-environment @happy-dom/jest-environment */ +/// +/// + +import { render, type RenderResult } from '@testing-library/react'; +import React, { type ComponentType } from 'react'; +import { useAudioPlayer } from './useAudioPlayer'; + +// Mock AudioContext and related APIs +const mockAudioContext = { + sampleRate: 24000, + currentTime: 0, + destination: {}, + state: 'running', + resume: jest.fn().mockResolvedValue(undefined), + close: jest.fn().mockResolvedValue(undefined), + createBuffer: jest.fn(), + createBufferSource: jest.fn() +}; + +const mockAudioBuffer = { + duration: 0.1, // 100m + getChannelData: jest.fn().mockReturnValue(new Float32Array(2400)) +}; + +const mockBufferSource = { + buffer: null, + connect: jest.fn(), + start: jest.fn(), + stop: jest.fn(), + disconnect: jest.fn(), + onended: null +}; + +// Mock global AudioContext +global.AudioContext = jest.fn(() => mockAudioContext) as any; +global.atob = jest.fn(str => str); // Simple mock for base64 decode + +type UseAudioPlayerReturn = ReturnType; + +describe('setup', () => { + let HookApp: ComponentType; + let hookData: UseAudioPlayerReturn | undefined; + let renderResult: RenderResult; + const originalAudioContext = global.AudioContext; + + beforeEach(() => { + jest.clearAllMocks(); + mockAudioContext.currentTime = 0; + mockAudioContext.createBuffer.mockReturnValue(mockAudioBuffer); + mockAudioContext.createBufferSource.mockReturnValue(mockBufferSource); + mockBufferSource.buffer = null; + mockBufferSource.onended = null; + + HookApp = () => { + hookData = useAudioPlayer(); + return null; + }; + }); + + afterEach(() => { + global.AudioContext = originalAudioContext; + }); + + describe('Initialization', () => { + test('should initialize with correct default values', () => { + render(); + + expect(hookData?.isPlaying).toBe(false); + expect(typeof hookData?.playAudio).toBe('function'); + expect(typeof hookData?.stopAudio).toBe('function'); + }); + + test('should create AudioContext on first playAudio call', () => { + render(); + + hookData?.playAudio('dGVzdA=='); // base64 for 'test' + + expect(AudioContext).toHaveBeenCalledWith({ sampleRate: 24000 }); + }); + + test('should reuse existing AudioContext on subsequent calls', () => { + render(); + + hookData?.playAudio('dGVzdA=='); + hookData?.playAudio('dGVzdDI='); + + expect(AudioContext).toHaveBeenCalledTimes(1); + }); + }); + + describe('Audio playback', () => { + beforeEach(() => { + renderResult = render(); + }); + + test('should process base64 audio data correctly', () => { + hookData?.playAudio('dGVzdA=='); + + expect(global.atob).toHaveBeenCalledWith('dGVzdA=='); + expect(mockAudioContext.createBuffer).toHaveBeenCalledWith(1, expect.any(Number), 24000); + expect(mockAudioContext.createBufferSource).toHaveBeenCalled(); + }); + + test('should set up audio buffer source correctly', () => { + hookData?.playAudio('dGVzdA=='); + + expect(mockBufferSource.connect).toHaveBeenCalledWith(mockAudioContext.destination); + expect(mockBufferSource.start).toHaveBeenCalled(); + expect(mockBufferSource.buffer).toBe(mockAudioBuffer); + }); + + test('should resume AudioContext if needed', () => { + hookData?.playAudio('dGVzdA=='); + + expect(mockAudioContext.resume).toHaveBeenCalled(); + }); + + test('should queue multiple audio chunks correctly', () => { + mockAudioBuffer.duration = 0.1; // 100ms + + hookData?.playAudio('dGVzdA=='); + hookData?.playAudio('dGVzdDI='); + + expect(mockBufferSource.start).toHaveBeenCalledTimes(2); + // First chunk starts at currentTime (0), second at 0.1 + expect(mockBufferSource.start).toHaveBeenNthCalledWith(1, 0); + expect(mockBufferSource.start).toHaveBeenNthCalledWith(2, 0.1); + }); + }); + + describe('isPlaying state', () => { + beforeEach(() => { + renderResult = render(); + }); + + test('should return true when audio is queued for playback', () => { + mockAudioContext.currentTime = 0; + mockAudioBuffer.duration = 0.1; + + hookData?.playAudio('dGVzdA=='); + renderResult.rerender(); + + expect(hookData?.isPlaying).toBe(true); + }); + + test('should return false when no audio is queued', () => { + expect(hookData?.isPlaying).toBe(false); + }); + + test('should handle multiple chunks and playing state', () => { + mockAudioContext.currentTime = 0.05; // In the middle of first chunk + mockAudioBuffer.duration = 0.1; + + hookData?.playAudio('dGVzdA=='); // 0 - 0.1 + hookData?.playAudio('dGVzdDI='); // 0.1 - 0.2 + renderResult.rerender(); + + expect(hookData?.isPlaying).toBe(true); + }); + }); + + describe('Audio cleanup', () => { + beforeEach(() => { + renderResult = render(); + }); + + test('should clean up buffer source on ended', () => { + hookData?.playAudio('dGVzdA=='); + + // Simulate audio ended + if (mockBufferSource.onended) { + mockBufferSource.onended(); + } + + expect(mockBufferSource.disconnect).toHaveBeenCalled(); + expect(mockBufferSource.buffer).toBeNull(); + }); + + test('should stop all audio and close context', () => { + hookData?.playAudio('dGVzdA=='); + + hookData?.stopAudio(); + renderResult.rerender(); + + expect(mockAudioContext.close).toHaveBeenCalled(); + expect(hookData?.isPlaying).toBe(false); + }); + }); + + describe('Error handling', () => { + beforeEach(() => { + renderResult = render(); + }); + + test('should handle invalid base64 data gracefully', () => { + expect(() => { + hookData?.playAudio('invalid-base64!@#'); + }).not.toThrow(); + }); + + test('should handle AudioContext creation failure', () => { + global.AudioContext = jest.fn(() => { + throw new Error('AudioContext not supported'); + }) as any; + + expect(() => { + hookData?.playAudio('dGVzdA=='); + }).toThrow('AudioContext not supported'); + }); + + test('should handle missing audio context in isPlaying', () => { + // Before any audio is played, audioCtxRef should be null + expect(hookData?.isPlaying).toBe(false); + }); + }); + + describe('Real-world scenarios', () => { + beforeEach(() => { + renderResult = render(); + }); + + test('should handle streaming audio chunks', () => { + mockAudioBuffer.duration = 0.05; // 50ms chunks + + // Simulate streaming 5 chunks + for (let i = 0; i < 5; i++) { + hookData?.playAudio(`chunk${i}`); + } + + expect(mockBufferSource.start).toHaveBeenCalledTimes(5); + renderResult.rerender(); + expect(hookData?.isPlaying).toBe(true); + }); + + test('should handle playback interruption', () => { + hookData?.playAudio('dGVzdA=='); + renderResult.rerender(); + expect(hookData?.isPlaying).toBe(true); + + hookData?.stopAudio(); + renderResult.rerender(); + expect(hookData?.isPlaying).toBe(false); + expect(mockAudioContext.close).toHaveBeenCalled(); + }); + + test('should handle resume after stop', () => { + // Play, stop, then play again + hookData?.playAudio('dGVzdA=='); + hookData?.stopAudio(); + hookData?.playAudio('dGVzdDI='); + + expect(AudioContext).toHaveBeenCalledTimes(2); // New context after stop + }); + }); + + describe('Performance considerations', () => { + beforeEach(() => { + renderResult = render(); + }); + + test('should handle large audio data', () => { + const largeBase64 = 'A'.repeat(10000); + + expect(() => { + hookData?.playAudio(largeBase64); + }).not.toThrow(); + }); + + test('should handle rapid successive calls', () => { + for (let i = 0; i < 100; i++) { + // Ensure the mock "base64" data has an even length as Int16Array (which represents 16-bit audio samples) requires the underlying data to be in multiples of 2 bytes + hookData?.playAudio(`chunk${i}`.padEnd(8, ' ')); + } + + expect(mockBufferSource.start).toHaveBeenCalledTimes(100); + }); + }); +}); diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts new file mode 100644 index 0000000000..f9b8405387 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts @@ -0,0 +1,71 @@ +import { useRef, useCallback } from 'react'; + +const DEFAULT_SAMPLE_RATE = 24000; +const INT16_SCALE = 32768; + +export function useAudioPlayer(config?: Record | null) { + const audioCtxRef = useRef(null); + const nextPlayTimeRef = useRef(0); + + const { sampleRate = DEFAULT_SAMPLE_RATE } = config || {}; + + const initAudio = useCallback(() => { + if (!audioCtxRef.current) { + audioCtxRef.current = new AudioContext({ sampleRate: sampleRate as number }); + } + return audioCtxRef.current; + }, [sampleRate]); + + const playAudio = useCallback( + (base64: string) => { + const audioCtx = initAudio(); + audioCtx.resume?.(); + + try { + const bytes = Uint8Array.from(atob(base64), c => c.charCodeAt(0)); + const int16 = new Int16Array(bytes.buffer); + const float32 = new Float32Array(int16.length); + + for (let i = 0; i < int16.length; i++) { + // eslint-disable-next-line security/detect-object-injection + float32[i] = int16[i] / INT16_SCALE; + } + + const buffer = audioCtx.createBuffer(1, float32.length, audioCtx.sampleRate); + buffer.getChannelData(0).set(float32); + + const src = audioCtx.createBufferSource(); + src.buffer = buffer; + src.connect(audioCtx.destination); + + // Clear buffer when finished + src.onended = () => { + src.disconnect(); + src.buffer = null; + }; + + nextPlayTimeRef.current = Math.max(nextPlayTimeRef.current, audioCtx.currentTime); + src.start(nextPlayTimeRef.current); + nextPlayTimeRef.current += buffer.duration; + } catch (error) { + console.warn('botframework-webchat: Error during audio playback in useAudioPlayer:', error); + } + }, + [initAudio] + ); + + const stopAudio = useCallback(() => { + nextPlayTimeRef.current = 0; + + if (audioCtxRef.current) { + audioCtxRef.current.close(); + audioCtxRef.current = null; + } + }, []); + + return { + playAudio, + stopAudio, + isPlaying: audioCtxRef.current ? audioCtxRef.current.currentTime < nextPlayTimeRef.current : false + }; +} diff --git a/packages/api/src/providers/SpeechToSpeech/private/useContext.ts b/packages/api/src/providers/SpeechToSpeech/private/useContext.ts new file mode 100644 index 0000000000..50926b0a12 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/private/useContext.ts @@ -0,0 +1,15 @@ +import { useContext } from 'react'; + +import SpeechToSpeechContext from './Context'; + +import type { SpeechToSpeechContextType } from './Context'; + +export default function useSpeechToSpeechContext(thrownOnUndefined = true): SpeechToSpeechContextType { + const contextValue = useContext(SpeechToSpeechContext); + + if (thrownOnUndefined && !contextValue) { + throw new Error('botframework-webchat internal: This hook can only be used under .'); + } + + return contextValue; +} diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx new file mode 100644 index 0000000000..f2b01ca6b5 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx @@ -0,0 +1,164 @@ +/** @jest-environment @happy-dom/jest-environment */ +/// + +import { act, render, waitFor, type RenderResult } from '@testing-library/react'; +import React, { type ComponentType } from 'react'; +import { useRecorder } from './useRecorder'; + +// --- Mocks --- + +jest.mock('../../Ponyfill/usePonyfill', () => ({ __esModule: true, default: jest.fn(() => [{ Date: global.Date }]) })); + +const mockTrack = { + stop: jest.fn() +}; + +const mockMediaStream = { + getTracks: jest.fn(() => [mockTrack]) +}; + +const mockMediaDevices = { + getUserMedia: jest.fn().mockResolvedValue(mockMediaStream) +}; + +const mockWorkletPort = { + postMessage: jest.fn(), + onmessage: null as ((event: { data: any }) => void) | null +}; + +const mockWorkletNode = { + connect: jest.fn(), + disconnect: jest.fn(), + port: mockWorkletPort +}; + +const mockAudioContext = { + state: 'running', + resume: jest.fn().mockResolvedValue(undefined), + createMediaStreamSource: jest.fn(() => ({ + connect: jest.fn() + })), + destination: {}, + audioWorklet: { + addModule: jest.fn().mockResolvedValue(undefined) + } +}; + +// --- Global Mocks Setup --- + +Object.defineProperty(global.navigator, 'mediaDevices', { + value: mockMediaDevices, + writable: true +}); + +global.AudioContext = jest.fn(() => mockAudioContext as any); +global.AudioWorkletNode = jest.fn(() => mockWorkletNode as any); +global.Blob = jest.fn(parts => ({ parts, type: parts[1]?.type })) as any; +global.URL.createObjectURL = jest.fn(() => 'blob:http://localhost/mock-url'); +global.URL.revokeObjectURL = jest.fn(); +global.btoa = jest.fn(str => `btoa(${str})`); + +// --- Tests --- + +describe('useRecorder', () => { + let onAudioChunk: jest.Mock; + let HookApp: ComponentType<{ onAudioChunk: (base64: string) => void }>; + let hookData: ReturnType | undefined; + // eslint-disable-next-line @typescript-eslint/no-unused-vars + let renderResult: RenderResult; + + beforeEach(() => { + // Clear all mocks before each test + jest.clearAllMocks(); + onAudioChunk = jest.fn(); + hookData = undefined; + mockWorkletPort.onmessage = null; + (mockAudioContext.state as any) = 'running'; + + HookApp = ({ onAudioChunk }) => { + hookData = useRecorder(onAudioChunk); + return null; + }; + }); + + test('should be initially not recording', () => { + render(); + expect(hookData?.recording).toBe(false); + }); + + test('should start recording when setRecording(true) is called', async () => { + renderResult = render(); + + act(() => { + hookData?.setRecording(true); + }); + + await waitFor(() => expect(hookData?.recording).toBe(true)); + + expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledTimes(1); + expect(global.AudioContext).toHaveBeenCalledTimes(1); + expect(mockAudioContext.audioWorklet.addModule).toHaveBeenCalledTimes(1); + expect(global.AudioWorkletNode).toHaveBeenCalledWith(expect.anything(), 'audio-recorder', { + processorOptions: { bufferSize: 2400 } + }); + expect(mockWorkletNode.connect).toHaveBeenCalledTimes(1); + expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' }); + }); + + test('should stop recording when setRecording(false) is called', async () => { + renderResult = render(); + + // Start recording + act(() => { + hookData?.setRecording(true); + }); + + await waitFor(() => expect(hookData?.recording).toBe(true)); + + // Stop recording + act(() => { + hookData?.setRecording(false); + }); + + await waitFor(() => expect(hookData?.recording).toBe(false)); + + expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'STOP' }); + expect(mockWorkletNode.disconnect).toHaveBeenCalledTimes(1); + expect(mockTrack.stop).toHaveBeenCalledTimes(1); + }); + + test('should process audio chunks sent from the worklet', async () => { + render(); + + act(() => { + hookData?.setRecording(true); + }); + + await waitFor(() => expect(mockWorkletPort.onmessage).not.toBeNull()); + + // Simulate a message from the audio worklet + const mockAudioData = new Float32Array([0.1, 0.2, -0.1]); + act(() => { + mockWorkletPort.onmessage!({ + data: { + eventType: 'audio', + audioData: mockAudioData + } + }); + }); + + await waitFor(() => expect(onAudioChunk).toHaveBeenCalledTimes(1)); + expect(global.btoa).toHaveBeenCalled(); + }); + + test('should handle suspended audio context by resuming it', async () => { + (mockAudioContext.state as any) = 'suspended'; + render(); + + act(() => { + hookData?.setRecording(true); + }); + + await waitFor(() => expect(mockAudioContext.resume).toHaveBeenCalledTimes(1)); + }); +}); diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts new file mode 100644 index 0000000000..7ba19ba244 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts @@ -0,0 +1,143 @@ +import { useRef, useState, useCallback } from 'react'; +import usePonyfill from '../../Ponyfill/usePonyfill'; + +const audioProcessorCode = ` + class AudioRecorderProcessor extends AudioWorkletProcessor { + constructor(options) { + super() + this.recording = false + this.buffer = [] + this.bufferSize = options.processorOptions.bufferSize + this.port.onmessage = e => { + if (e.data.command === 'START') this.recording = true + else if (e.data.command === 'STOP') { + this.recording = false + this.buffer = [] + } + } + } + sendBuffer() { + while (this.buffer.length >= this.bufferSize) { + const chunk = this.buffer.splice(0, this.bufferSize) + this.port.postMessage({ + eventType: 'audio', + audioData: new Float32Array(chunk) + }) + } + } + process(inputs) { + if (inputs[0]?.length && this.recording) { + this.buffer.push(...inputs[0][0]) + if (this.buffer.length >= this.bufferSize) this.sendBuffer() + } + return true + } + } + registerProcessor('audio-recorder', AudioRecorderProcessor)`; + +const INT16_MIN = -32768; +const INT16_MAX = 32767; +const INT16_SCALE = 32767; +const DEFAULT_SAMPLE_RATE = 24000; +const DEFAULT_CHUNK_SIZE_IN_MS = 100; +const MS_IN_SECOND = 1000; + +export function useRecorder( + onAudioChunk: (base64: string, timestamp: string) => void, + config?: Record | null +) { + const [recording, setRecordingInternal] = useState(false); + const audioCtxRef = useRef(null); + const workletRef = useRef(null); + const streamRef = useRef(null); + const [{ Date }] = usePonyfill(); + + const { sampleRate = DEFAULT_SAMPLE_RATE, chunkIntervalMs = DEFAULT_CHUNK_SIZE_IN_MS } = config || {}; + + const initAudio = useCallback(async () => { + if (audioCtxRef.current) { + return; + } + const audioCtx = new AudioContext({ sampleRate: sampleRate as number }); + const blob = new Blob([audioProcessorCode], { + type: 'application/javascript' + }); + // eslint-disable-next-line no-restricted-properties + const url = URL.createObjectURL(blob); + await audioCtx.audioWorklet.addModule(url); + URL.revokeObjectURL(url); + // eslint-disable-next-line require-atomic-updates + audioCtxRef.current = audioCtx; + }, [sampleRate]); + + const startRecording = useCallback(async () => { + await initAudio(); + const audioCtx = audioCtxRef.current!; + if (audioCtx.state === 'suspended') { + await audioCtx.resume(); + } + const stream = await navigator.mediaDevices.getUserMedia({ + audio: { + channelCount: 1, + sampleRate, + echoCancellation: true + } + }); + streamRef.current = stream; + const source = audioCtx.createMediaStreamSource(stream); + const worklet = new AudioWorkletNode(audioCtx, 'audio-recorder', { + processorOptions: { + bufferSize: ((sampleRate as number) * (chunkIntervalMs as number)) / MS_IN_SECOND + } + }); + + worklet.port.onmessage = e => { + if (e.data.eventType === 'audio') { + const timestamp = new Date().toISOString(); + const float32 = e.data.audioData; + const int16 = new Int16Array(float32.length); + for (let i = 0; i < float32.length; i++) { + // eslint-disable-next-line security/detect-object-injection + int16[i] = Math.max(INT16_MIN, Math.min(INT16_MAX, float32[i] * INT16_SCALE)); + } + const base64 = btoa(String.fromCharCode(...new Uint8Array(int16.buffer))); + onAudioChunk(base64, timestamp); + } + }; + + source.connect(worklet); + worklet.connect(audioCtx.destination); + worklet.port.postMessage({ command: 'START' }); + workletRef.current = worklet; + setRecordingInternal(true); + }, [Date, chunkIntervalMs, initAudio, onAudioChunk, sampleRate]); + + const stopRecording = useCallback(() => { + if (workletRef.current) { + workletRef.current.port.postMessage({ command: 'STOP' }); + workletRef.current.disconnect(); + workletRef.current = null; + } + if (streamRef.current) { + streamRef.current.getTracks().forEach(track => track.stop()); + streamRef.current = null; + } + setRecordingInternal(false); + }, []); + + const setRecording = useCallback( + async (shouldRecord: boolean) => { + if (!shouldRecord && recording) { + stopRecording(); + } else if (shouldRecord && !recording) { + await startRecording(); + } + }, + [recording, startRecording, stopRecording] + ); + + return { + recording, + setRecording + }; +} diff --git a/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts b/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts new file mode 100644 index 0000000000..62d5cc8c13 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts @@ -0,0 +1 @@ +export type SpeechState = 'idle' | 'listening' | 'processing' | 'bot_speaking'; diff --git a/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts b/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts new file mode 100644 index 0000000000..d7ac3fac44 --- /dev/null +++ b/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts @@ -0,0 +1,6 @@ +import { SpeechToSpeechContextType } from './private/Context'; +import useSpeechToSpeechContext from './private/useContext'; + +export default function useSpeechToSpeech(): readonly [SpeechToSpeechContextType] { + return [useSpeechToSpeechContext()]; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e2534aa94d..fd05b34d75 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -40,6 +40,7 @@ import getActivityLivestreamingMetadata from './utils/getActivityLivestreamingMe import getOrgSchemaMessage from './utils/getOrgSchemaMessage'; import onErrorResumeNext from './utils/onErrorResumeNext'; import singleToArray from './utils/singleToArray'; +import isVoiceActivity from './utils/voiceActivity/isVoiceActivity'; export { isForbiddenPropertyName, @@ -99,6 +100,7 @@ export { emitTypingIndicator, getActivityLivestreamingMetadata, getOrgSchemaMessage, + isVoiceActivity, markActivity, onErrorResumeNext, parseAction, diff --git a/packages/core/src/reducers/activities/combineActivitiesReducer.ts b/packages/core/src/reducers/activities/combineActivitiesReducer.ts index 488df1be62..49ee3cee3d 100644 --- a/packages/core/src/reducers/activities/combineActivitiesReducer.ts +++ b/packages/core/src/reducers/activities/combineActivitiesReducer.ts @@ -10,6 +10,7 @@ import createGroupedActivitiesReducer, { type ActivitiesState = { activities: readonly WebChatActivity[]; + voiceActivities: readonly WebChatActivity[]; groupedActivities: GroupedActivitiesState; }; @@ -34,7 +35,12 @@ export default function combineActivitiesReducer( state: (ExistingState & ActivitiesState) | undefined, action: ExistingAction & GroupedActivitiesAction ): ExistingState & ActivitiesState { - const { activities: _activities, groupedActivities, ...existingState } = state ?? {}; + const { + activities: _activities, + voiceActivities: _voiceActivities, + groupedActivities, + ...existingState + } = state ?? {}; const nextState = existingSlicedReducer(existingState as ExistingState, action); const nextGroupedActivities = groupedActivitiesReducer(groupedActivities, action); @@ -52,7 +58,12 @@ export default function combineActivitiesReducer( ); return hasChanged - ? { ...nextState, activities: nextGroupedActivities.sortedActivities, groupedActivities: nextGroupedActivities } + ? { + ...nextState, + activities: nextGroupedActivities.sortedActivities, + voiceActivities: nextGroupedActivities.voiceActivities, + groupedActivities: nextGroupedActivities + } : state; }; } diff --git a/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts b/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts index f333c67af1..e7179ac44e 100644 --- a/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts +++ b/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts @@ -32,7 +32,7 @@ import type { WebChatActivity } from '../../types/WebChatActivity'; import patchActivity from './patchActivity'; import deleteActivityByLocalId from './sort/deleteActivityByLocalId'; import { generateLocalIdInActivity, getLocalIdFromActivity, setLocalIdInActivity } from './sort/property/LocalId'; -import { getPositionFromActivity, setPositionInActivity } from './sort/property/Position'; +import { getPositionFromActivity, queryPositionFromActivity, setPositionInActivity } from './sort/property/Position'; import { setReceivedAtInActivity } from './sort/property/ReceivedAt'; import { querySendStatusFromOutgoingActivity, setSendStatusInOutgoingActivity } from './sort/property/SendStatus'; import queryLocalIdAByActivityId from './sort/queryLocalIdByActivityId'; @@ -42,6 +42,8 @@ import updateActivityChannelData, { updateActivityChannelDataInternalSkipNameCheck } from './sort/updateActivityChannelData'; import upsert, { INITIAL_STATE } from './sort/upsert'; +import isVoiceActivity from '../../utils/voiceActivity/isVoiceActivity'; +import isVoiceTranscriptActivity from '../../utils/voiceActivity/isVoiceTranscriptActivity'; type GroupedActivitiesAction = | DeleteActivityAction @@ -100,6 +102,13 @@ function createGroupedActivitiesReducer( payload: { activity } } = action; + // Not transcript voice does not render on UI and mostly fire and forget as we dont't have replay etc. + // hence we don't want to process and simply pass through. + if (isVoiceActivity(activity) && !isVoiceTranscriptActivity(activity)) { + state = upsert(ponyfill, state, activity); + break; + } + // Patch activity so the outgoing blob: URL is not re-downloadable. // Related to /__tests__/html2/accessibility/liveRegion/attachment/file. @@ -151,6 +160,12 @@ function createGroupedActivitiesReducer( } case POST_ACTIVITY_FULFILLED: { + // Not transcript voice does not render on UI and mostly fire and forget as we dont't have replay etc. + // hence we don't want to process and simply pass through. + if (isVoiceActivity(action.payload.activity) && !isVoiceTranscriptActivity(action.payload.activity)) { + state = upsert(ponyfill, state, action.payload.activity); + break; + } const localId = queryLocalIdAByClientActivityId(state, action.meta.clientActivityID); const existingActivity = localId && state.activityMap.get(localId)?.activity; @@ -175,8 +190,11 @@ function createGroupedActivitiesReducer( activity = setSendStatusInOutgoingActivity(activity, SENT); activity = setLocalIdInActivity(activity, localId); - // Keep existing position. - activity = setPositionInActivity(activity, getPositionFromActivity(existingActivity)); + // Keep existing position (if it exists - voice activities don't have positions) + const existingPosition = queryPositionFromActivity(existingActivity); + if (typeof existingPosition !== 'undefined') { + activity = setPositionInActivity(activity, getPositionFromActivity(existingActivity)); + } // Compare the INCOMING_ACTIVITY below: // - POST_ACTIVITY_FULFILLED will mark send status as SENT diff --git a/packages/core/src/reducers/activities/sort/types.ts b/packages/core/src/reducers/activities/sort/types.ts index 286711854f..28be6d1149 100644 --- a/packages/core/src/reducers/activities/sort/types.ts +++ b/packages/core/src/reducers/activities/sort/types.ts @@ -57,6 +57,7 @@ type State = { readonly livestreamSessionMap: LivestreamSessionMap; readonly sortedChatHistoryList: SortedChatHistory; readonly sortedActivities: readonly Activity[]; + readonly voiceActivities: readonly Activity[]; }; export { diff --git a/packages/core/src/reducers/activities/sort/upsert.ts b/packages/core/src/reducers/activities/sort/upsert.ts index c917d77568..9528d1c956 100644 --- a/packages/core/src/reducers/activities/sort/upsert.ts +++ b/packages/core/src/reducers/activities/sort/upsert.ts @@ -18,6 +18,8 @@ import { type SortedChatHistoryEntry, type State } from './types'; +import isVoiceActivity from '../../../utils/voiceActivity/isVoiceActivity'; +import isVoiceTranscriptActivity from '../../../utils/voiceActivity/isVoiceTranscriptActivity'; // Honoring timestamp or not: // @@ -48,7 +50,8 @@ const INITIAL_STATE = Object.freeze({ livestreamSessionMap: Object.freeze(new Map()), howToGroupingMap: Object.freeze(new Map()), sortedActivities: Object.freeze([]), - sortedChatHistoryList: Object.freeze([]) + sortedChatHistoryList: Object.freeze([]), + voiceActivities: Object.freeze([]) } satisfies State); // Question: Why insertion sort works but not quick sort? @@ -58,6 +61,14 @@ const INITIAL_STATE = Object.freeze({ // - Duplicate timestamps: activities without timestamp can't be sort deterministically with quick sort function upsert(ponyfill: Pick, state: State, activity: Activity): State { + // we only want to process transcript voice activities thorugh this as those will be rendered. + // all other voice activities will be stored in separate slice and we don't want to perform any operation on them. + if (isVoiceActivity(activity) && !isVoiceTranscriptActivity(activity)) { + return Object.freeze({ + ...state, + voiceActivities: Object.freeze([...state.voiceActivities, activity]) + } satisfies State); + } const nextActivityIdToLocalIdMap = new Map(state.activityIdToLocalIdMap); const nextActivityMap = new Map(state.activityMap); const nextClientActivityIdToLocalIdMap = new Map(state.clientActivityIdToLocalIdMap); @@ -336,7 +347,8 @@ function upsert(ponyfill: Pick, state: State, activ howToGroupingMap: Object.freeze(nextHowToGroupingMap), livestreamSessionMap: Object.freeze(nextLivestreamSessionMap), sortedActivities: Object.freeze(nextSortedActivities), - sortedChatHistoryList: Object.freeze(nextSortedChatHistoryList) + sortedChatHistoryList: Object.freeze(nextSortedChatHistoryList), + voiceActivities: state.voiceActivities } satisfies State); } diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts new file mode 100644 index 0000000000..1b2c3abf77 --- /dev/null +++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts @@ -0,0 +1,112 @@ +import isVoiceActivity from './isVoiceActivity'; +import { WebChatActivity } from '../../types/WebChatActivity'; + +// Mock activity factory for testing +const createMockActivity = (type: string = 'event', name?: string, value?: any): WebChatActivity => ({ + type: type as any, + id: 'test-activity-id', + from: { id: 'test-user' }, + channelData: { + 'webchat:sequence-id': 1 + }, + ...(name && { name }), + ...(value && { value }) +}); + +const createMockVoiceActivity = (name: string, voiceProps: Record): WebChatActivity => + createMockActivity('event', name, { + voice: voiceProps + }); + +describe('isVoiceActivity', () => { + describe('Valid voice activities', () => { + test('should return true for event activity with voice', () => { + const activity = createMockVoiceActivity('stream.chunk', { contentUrl: 'base64' }); + + const result = isVoiceActivity(activity); + + expect(result).toBe(true); + }); + + test('should return true for voice activity with minimal voice', () => { + const activity = createMockActivity('event', 'stream.chunk', { voice: {} }); + + const result = isVoiceActivity(activity); + + expect(result).toBe(true); + }); + }); + + describe('Invalid activities', () => { + const testCases = [ + { + name: 'message activity with voice', + activity: () => createMockActivity('message', 'stream.chunk', { voice: { contentUrl: 'base64' } }) + }, + { + name: 'typing activity', + activity: () => createMockActivity('typing') + }, + { + name: 'event activity with non-object value', + activity: () => ({ ...createMockActivity('event', 'test'), value: 'not an object' }) + }, + { + name: 'event activity without voice property', + activity: () => createMockActivity('event', 'test', { someOtherProp: 'value' }) + }, + { + name: 'event activity with no value', + activity: () => createMockActivity('event', 'test') + }, + { + name: 'event activity with no name', + activity: () => createMockActivity('event', undefined, { voice: {} }) + } + ]; + + test.each(testCases)('should return false for $name', ({ activity }) => { + const result = isVoiceActivity(activity()); + + expect(result).toBe(false); + }); + }); + + describe('Real-world voice activity scenarios', () => { + const voiceScenarios = [ + { + name: 'session.update with speech detected state', + eventName: 'session.update', + voiceProps: { bot_state: 'voice.request.detected', message: 'Your request is identified' } + }, + { + name: 'session.update with processing state', + eventName: 'session.update', + voiceProps: { bot_state: 'voice.request.processing', message: 'Your request is being processed' } + }, + { + name: 'stream.end with user transcription', + eventName: 'stream.end', + voiceProps: { transcription: 'My destination is bangalore', origin: 'user' } + }, + { + name: 'stream.chunk with server audio response', + eventName: 'stream.chunk', + voiceProps: { contentUrl: 'base64chunk' } + }, + { + name: 'stream.end with bot transcription', + eventName: 'stream.end', + voiceProps: { transcription: 'Your destination is at 1000m above sea level', origin: 'bot' } + } + ]; + + test.each(voiceScenarios)('should return true for $name', ({ eventName, voiceProps }) => { + const activity = createMockVoiceActivity(eventName, voiceProps); + + const result = isVoiceActivity(activity); + + expect(result).toBe(true); + }); + }); +}); diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts new file mode 100644 index 0000000000..a17937d8ba --- /dev/null +++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts @@ -0,0 +1,17 @@ +import { WebChatActivity } from '../../types/WebChatActivity'; + +// This is interim until activity protocol is ratified. +const isVoiceActivity = ( + activity: WebChatActivity +): activity is WebChatActivity & { + name: string; + type: 'event'; + value: { voice: any }; +} => + activity.type === 'event' && + !!activity.name && + !!activity.value && + typeof activity.value === 'object' && + 'voice' in activity.value; + +export default isVoiceActivity; diff --git a/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts new file mode 100644 index 0000000000..e061e24813 --- /dev/null +++ b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts @@ -0,0 +1,164 @@ +import isVoiceTranscriptActivity from './isVoiceTranscriptActivity'; +import { WebChatActivity } from '../../types/WebChatActivity'; + +// Mock activity factory for testing +const createMockActivity = (type: string = 'event', name?: string, value?: any): WebChatActivity => ({ + type: type as any, + id: 'test-activity-id', + from: { id: 'test-user' }, + channelData: { + 'webchat:sequence-id': 1 + }, + ...(name && { name }), + ...(value && { value }) +}); + +const createMockVoiceActivity = (name: string, voiceProps: Record): WebChatActivity => + createMockActivity('event', name, { + voice: voiceProps + }); + +describe('isVoiceTranscriptActivity', () => { + describe('Valid transcript activities', () => { + test('should return true for stream.end with user transcription', () => { + const activity = createMockVoiceActivity('stream.end', { + transcription: 'Hello world', + origin: 'user' + }); + + const result = isVoiceTranscriptActivity(activity); + + expect(result).toBe(true); + }); + + test('should return true for stream.end with bot transcription', () => { + const activity = createMockVoiceActivity('stream.end', { + transcription: 'Hi there!', + origin: 'bot' + }); + + const result = isVoiceTranscriptActivity(activity); + + expect(result).toBe(true); + }); + + test('should return true for stream.end with empty transcription string', () => { + const activity = createMockVoiceActivity('stream.end', { + transcription: '', + origin: 'user' + }); + + const result = isVoiceTranscriptActivity(activity); + + expect(result).toBe(true); + }); + }); + + describe('Invalid activities', () => { + const testCases = [ + { + name: 'stream.chunk voice activity', + activity: () => createMockVoiceActivity('stream.chunk', { contentUrl: 'base64' }) + }, + { + name: 'session.update voice activity', + activity: () => createMockVoiceActivity('session.update', { bot_state: 'voice.request.detected' }) + }, + { + name: 'stream.end without transcription', + activity: () => createMockVoiceActivity('stream.end', { origin: 'user' }) + }, + { + name: 'stream.end with non-string transcription', + activity: () => createMockVoiceActivity('stream.end', { transcription: 123, origin: 'user' }) + }, + { + name: 'stream.end with null transcription', + activity: () => createMockVoiceActivity('stream.end', { transcription: null, origin: 'user' }) + }, + { + name: 'regular message activity', + activity: () => createMockActivity('message', 'test') + }, + { + name: 'typing activity', + activity: () => createMockActivity('typing') + }, + { + name: 'event activity without voice data', + activity: () => createMockActivity('event', 'stream.end', { someData: 'test' }) + }, + { + name: 'event activity with null value', + activity: () => ({ ...createMockActivity('event', 'stream.end'), value: null }) + }, + { + name: 'event activity without value', + activity: () => createMockActivity('event', 'stream.end') + }, + { + name: 'event activity without name', + activity: () => createMockActivity('event', undefined, { voice: { transcription: 'test' } }) + } + ]; + + test.each(testCases)('should return false for $name', ({ activity }) => { + const result = isVoiceTranscriptActivity(activity()); + + expect(result).toBe(false); + }); + }); + + describe('Real-world scenarios', () => { + test('should identify user transcript in conversation flow', () => { + const conversationActivities = [ + createMockVoiceActivity('session.update', { bot_state: 'voice.request.detected' }), + createMockVoiceActivity('session.update', { bot_state: 'voice.request.processing' }), + createMockVoiceActivity('stream.end', { + transcription: 'What is the weather today?', + origin: 'user' + }) + ]; + + const transcriptResults = conversationActivities.map(activity => isVoiceTranscriptActivity(activity)); + + expect(transcriptResults).toEqual([false, false, true]); + }); + + test('should identify bot transcript in response flow', () => { + const responseActivities = [ + createMockVoiceActivity('session.update', { bot_state: 'voice.response.available' }), + createMockVoiceActivity('stream.chunk', { contentUrl: 'chunk1' }), + createMockVoiceActivity('stream.chunk', { contentUrl: 'chunk2' }), + createMockVoiceActivity('stream.end', { + transcription: 'Today will be sunny with a high of 75 degrees.', + origin: 'bot' + }) + ]; + + const transcriptResults = responseActivities.map(activity => isVoiceTranscriptActivity(activity)); + + expect(transcriptResults).toEqual([false, false, false, true]); + }); + + test('should handle complete conversation with mixed activities', () => { + const mixedActivities = [ + createMockActivity('message', 'test'), + createMockVoiceActivity('stream.end', { + transcription: 'Hello', + origin: 'user' + }), + createMockVoiceActivity('stream.chunk', { contentUrl: 'audio' }), + createMockVoiceActivity('stream.end', { + transcription: 'Hi there!', + origin: 'bot' + }), + createMockActivity('typing') + ]; + + const transcriptResults = mixedActivities.map(activity => isVoiceTranscriptActivity(activity)); + + expect(transcriptResults).toEqual([false, true, false, true, false]); + }); + }); +}); diff --git a/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts new file mode 100644 index 0000000000..c6ae5bd742 --- /dev/null +++ b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts @@ -0,0 +1,18 @@ +import isVoiceActivity from './isVoiceActivity'; +import { WebChatActivity } from '../../types/WebChatActivity'; + +const isVoiceTranscriptActivity = ( + activity: WebChatActivity +): activity is WebChatActivity & { + value: { + voice: { + transcription: string; + origin: 'user' | 'agent'; + }; + }; +} => + isVoiceActivity(activity) && + activity.name === 'stream.end' && + typeof activity.value?.voice?.transcription === 'string'; + +export default isVoiceTranscriptActivity;