diff --git a/packages/api/src/hooks/index.ts b/packages/api/src/hooks/index.ts
index f5a1a959d7..b1d027ee3a 100644
--- a/packages/api/src/hooks/index.ts
+++ b/packages/api/src/hooks/index.ts
@@ -71,6 +71,7 @@ import useUIState from './useUIState';
 import useUserID from './useUserID';
 import useUsername from './useUsername';
 import useVoiceSelector from './useVoiceSelector';
+import useVoiceActivities from './useVoiceActivities';
 
 export { useBuildRenderActivityCallback } from '@msinternal/botframework-webchat-api-middleware';
 export { useSuggestedActionsHooks } from '@msinternal/botframework-webchat-redux-store';
@@ -148,5 +149,6 @@ export {
   useUIState,
   useUserID,
   useUsername,
-  useVoiceSelector
+  useVoiceSelector,
+  useVoiceActivities
 };
diff --git a/packages/api/src/hooks/internal/useStateRef.ts b/packages/api/src/hooks/internal/useStateRef.ts
new file mode 100644
index 0000000000..a6f517fcc3
--- /dev/null
+++ b/packages/api/src/hooks/internal/useStateRef.ts
@@ -0,0 +1,31 @@
+import { useCallback, useRef, useState } from 'react';
+
+import type { Dispatch, MutableRefObject, SetStateAction } from 'react';
+
+export default function useStateRef<T>(
+  initialValue?: T
+): readonly [T, Dispatch<SetStateAction<T>>, MutableRefObject<T>] {
+  const [_, forceRender] = useState<unknown>();
+  const valueRef: MutableRefObject<T> = useRef<T>(initialValue);
+
+  const setter: Dispatch<SetStateAction<T>> = useCallback(
+    (value: SetStateAction<T>) => {
+      const { current } = valueRef;
+
+      value = value instanceof Function ? value(current) : value;
+
+      if (current !== value) {
+        valueRef.current = value;
+
+        forceRender({});
+      }
+    },
+    [forceRender, valueRef]
+  );
+
+  return Object.freeze([valueRef.current, setter, valueRef]) as readonly [
+    T,
+    Dispatch<SetStateAction<T>>,
+    MutableRefObject<T>
+  ];
+}
diff --git a/packages/api/src/hooks/useSpeechToSpeech.ts b/packages/api/src/hooks/useSpeechToSpeech.ts
new file mode 100644
index 0000000000..4f529a2c08
--- /dev/null
+++ b/packages/api/src/hooks/useSpeechToSpeech.ts
@@ -0,0 +1,3 @@
+import useSpeechToSpeech from '../providers/SpeechToSpeech/useSpeechToSpeech';
+
+export default useSpeechToSpeech;
diff --git a/packages/api/src/hooks/useVoiceActivities.ts b/packages/api/src/hooks/useVoiceActivities.ts
new file mode 100644
index 0000000000..0abff2229f
--- /dev/null
+++ b/packages/api/src/hooks/useVoiceActivities.ts
@@ -0,0 +1,6 @@
+import { type WebChatActivity } from 'botframework-webchat-core';
+import { useSelector } from './internal/WebChatReduxContext';
+
+export default function useVoiceActivities(): [WebChatActivity[]] {
+  return [useSelector(({ voiceActivities }) => voiceActivities)];
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
new file mode 100644
index 0000000000..70d9c3aa4a
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
@@ -0,0 +1,144 @@
+import React, { useCallback, useEffect, useMemo, useRef, type ReactNode } from 'react';
+import { isVoiceActivity, WebChatActivity } from 'botframework-webchat-core';
+import { useAudioPlayer } from './private/useAudioPlayer';
+import { useRecorder } from './private/useRecorder';
+import { useDebouncedNotifications, usePostActivity, useVoiceActivities } from '../../hooks';
+import SpeechToSpeechContext from './private/Context';
+import { SpeechState } from './types/SpeechState';
+import useStateRef from '../../hooks/internal/useStateRef';
+
+export const SpeechToSpeechComposer: React.FC<{ readonly children: ReactNode }> = ({ children }) => {
+  const [voiceActivities] = useVoiceActivities();
+  const postActivity = usePostActivity();
+  const [{ connectivitystatus }] = useDebouncedNotifications();
+  const lastProcessedIndexRef = useRef(0);
+  const [speechState, setSpeechState] = useStateRef<SpeechState>('idle');
+
+  // config received from server on session init (only once), for now ccv2 and mmrt runs on different sample rate and chunk interval.
+  // we will read those config, free form object as unsure of what all session config would be needed in future.
+  const [serverConfig, setServerConfig] = useStateRef<Record<string, unknown> | null>(null);
+  const { playAudio, stopAudio, isPlaying } = useAudioPlayer(serverConfig);
+
+  const isConnected = useMemo(() => connectivitystatus?.message === 'connected', [connectivitystatus]);
+
+  const sendAudioChunk = useCallback(
+    (base64: string, timestamp: string) => {
+      postActivity({
+        type: 'event',
+        name: 'stream.chunk',
+        value: { voice: { contentUrl: base64, timestamp } }
+      } as any);
+    },
+    [postActivity]
+  );
+
+  const { recording, setRecording: baseSetRecording } = useRecorder(sendAudioChunk, serverConfig);
+
+  const handleVoiceActivity = useCallback(
+    (activity: WebChatActivity) => {
+      if (!isVoiceActivity(activity)) {
+        return;
+      }
+
+      const { name, value } = activity;
+      const { voice } = value;
+
+      switch (name) {
+        // TODO - this will be commandResult activity and not event, need to think on handling of command and commandResult activities.
+        case 'session.init': {
+          setServerConfig(value.session?.config as Record<string, unknown>);
+          break;
+        }
+
+        case 'session.update': {
+          switch (voice.bot_state) {
+            case 'voice.request.detected':
+              stopAudio();
+              setSpeechState('listening');
+              break;
+
+            case 'voice.request.processing':
+              setSpeechState('processing');
+              break;
+
+            default:
+              break;
+          }
+          break;
+        }
+
+        case 'stream.chunk': {
+          if (voice.contentUrl) {
+            playAudio(voice.contentUrl);
+          }
+          break;
+        }
+
+        default:
+          break;
+      }
+    },
+    [playAudio, setServerConfig, setSpeechState, stopAudio]
+  );
+
+  useEffect(() => {
+    const startIndex = lastProcessedIndexRef.current;
+    if (!voiceActivities.length || startIndex >= voiceActivities.length) {
+      return;
+    }
+
+    for (let i = startIndex; i < voiceActivities.length; i++) {
+      // eslint-disable-next-line security/detect-object-injection
+      const activity = voiceActivities[i];
+
+      // Skip processing the activity if it's from the user as we want to process only incoming voice activities.
+      // we may receive (optional) config from server as soon as socket is established
+      // at that time recording would be off but we still want to process to read config and act on it.
+      if (
+        activity.from?.role === 'user' ||
+        (!recording && isVoiceActivity(activity) && activity.name !== 'session.init')
+      ) {
+        continue;
+      }
+
+      handleVoiceActivity(activity);
+    }
+
+    if (isPlaying && speechState !== 'bot_speaking') {
+      setSpeechState('bot_speaking');
+    } else if (!isPlaying && speechState === 'bot_speaking') {
+      setSpeechState('listening');
+    }
+
+    lastProcessedIndexRef.current = voiceActivities.length;
+  }, [handleVoiceActivity, isPlaying, recording, setSpeechState, speechState, voiceActivities]);
+
+  const setRecording = useCallback(
+    async (shouldRecord: boolean) => {
+      if (!isConnected) {
+        return;
+      }
+
+      if (shouldRecord) {
+        setSpeechState('listening');
+      } else {
+        stopAudio();
+        setSpeechState('idle');
+      }
+
+      await baseSetRecording(shouldRecord);
+    },
+    [isConnected, baseSetRecording, setSpeechState, stopAudio]
+  );
+
+  const contextValue = useMemo(
+    () => ({
+      recording,
+      setRecording,
+      speechState
+    }),
+    [recording, setRecording, speechState]
+  );
+
+  return <SpeechToSpeechContext.Provider value={contextValue}>{children}</SpeechToSpeechContext.Provider>;
+};
diff --git a/packages/api/src/providers/SpeechToSpeech/private/Context.ts b/packages/api/src/providers/SpeechToSpeech/private/Context.ts
new file mode 100644
index 0000000000..ce85310246
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/Context.ts
@@ -0,0 +1,14 @@
+import { createContext } from 'react';
+import { SpeechState } from '../types/SpeechState';
+
+type SpeechToSpeechContextType = {
+  recording: boolean;
+  setRecording: (recording: boolean) => void;
+  speechState: SpeechState;
+};
+
+const SpeechToSpeechContext = createContext<SpeechToSpeechContextType>(undefined!);
+
+export default SpeechToSpeechContext;
+
+export type { SpeechToSpeechContextType };
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx
new file mode 100644
index 0000000000..8c1d42cb08
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx
@@ -0,0 +1,279 @@
+/** @jest-environment @happy-dom/jest-environment */
+/// <reference types="jest" />
+/// <reference types="node" />
+
+import { render, type RenderResult } from '@testing-library/react';
+import React, { type ComponentType } from 'react';
+import { useAudioPlayer } from './useAudioPlayer';
+
+// Mock AudioContext and related APIs
+const mockAudioContext = {
+  sampleRate: 24000,
+  currentTime: 0,
+  destination: {},
+  state: 'running',
+  resume: jest.fn().mockResolvedValue(undefined),
+  close: jest.fn().mockResolvedValue(undefined),
+  createBuffer: jest.fn(),
+  createBufferSource: jest.fn()
+};
+
+const mockAudioBuffer = {
+  duration: 0.1, // 100m
+  getChannelData: jest.fn().mockReturnValue(new Float32Array(2400))
+};
+
+const mockBufferSource = {
+  buffer: null,
+  connect: jest.fn(),
+  start: jest.fn(),
+  stop: jest.fn(),
+  disconnect: jest.fn(),
+  onended: null
+};
+
+// Mock global AudioContext
+global.AudioContext = jest.fn(() => mockAudioContext) as any;
+global.atob = jest.fn(str => str); // Simple mock for base64 decode
+
+type UseAudioPlayerReturn = ReturnType<typeof useAudioPlayer>;
+
+describe('setup', () => {
+  let HookApp: ComponentType;
+  let hookData: UseAudioPlayerReturn | undefined;
+  let renderResult: RenderResult;
+  const originalAudioContext = global.AudioContext;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    mockAudioContext.currentTime = 0;
+    mockAudioContext.createBuffer.mockReturnValue(mockAudioBuffer);
+    mockAudioContext.createBufferSource.mockReturnValue(mockBufferSource);
+    mockBufferSource.buffer = null;
+    mockBufferSource.onended = null;
+
+    HookApp = () => {
+      hookData = useAudioPlayer();
+      return null;
+    };
+  });
+
+  afterEach(() => {
+    global.AudioContext = originalAudioContext;
+  });
+
+  describe('Initialization', () => {
+    test('should initialize with correct default values', () => {
+      render(<HookApp />);
+
+      expect(hookData?.isPlaying).toBe(false);
+      expect(typeof hookData?.playAudio).toBe('function');
+      expect(typeof hookData?.stopAudio).toBe('function');
+    });
+
+    test('should create AudioContext on first playAudio call', () => {
+      render(<HookApp />);
+
+      hookData?.playAudio('dGVzdA=='); // base64 for 'test'
+
+      expect(AudioContext).toHaveBeenCalledWith({ sampleRate: 24000 });
+    });
+
+    test('should reuse existing AudioContext on subsequent calls', () => {
+      render(<HookApp />);
+
+      hookData?.playAudio('dGVzdA==');
+      hookData?.playAudio('dGVzdDI=');
+
+      expect(AudioContext).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe('Audio playback', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should process base64 audio data correctly', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      expect(global.atob).toHaveBeenCalledWith('dGVzdA==');
+      expect(mockAudioContext.createBuffer).toHaveBeenCalledWith(1, expect.any(Number), 24000);
+      expect(mockAudioContext.createBufferSource).toHaveBeenCalled();
+    });
+
+    test('should set up audio buffer source correctly', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      expect(mockBufferSource.connect).toHaveBeenCalledWith(mockAudioContext.destination);
+      expect(mockBufferSource.start).toHaveBeenCalled();
+      expect(mockBufferSource.buffer).toBe(mockAudioBuffer);
+    });
+
+    test('should resume AudioContext if needed', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      expect(mockAudioContext.resume).toHaveBeenCalled();
+    });
+
+    test('should queue multiple audio chunks correctly', () => {
+      mockAudioBuffer.duration = 0.1; // 100ms
+
+      hookData?.playAudio('dGVzdA==');
+      hookData?.playAudio('dGVzdDI=');
+
+      expect(mockBufferSource.start).toHaveBeenCalledTimes(2);
+      // First chunk starts at currentTime (0), second at 0.1
+      expect(mockBufferSource.start).toHaveBeenNthCalledWith(1, 0);
+      expect(mockBufferSource.start).toHaveBeenNthCalledWith(2, 0.1);
+    });
+  });
+
+  describe('isPlaying state', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should return true when audio is queued for playback', () => {
+      mockAudioContext.currentTime = 0;
+      mockAudioBuffer.duration = 0.1;
+
+      hookData?.playAudio('dGVzdA==');
+      renderResult.rerender(<HookApp />);
+
+      expect(hookData?.isPlaying).toBe(true);
+    });
+
+    test('should return false when no audio is queued', () => {
+      expect(hookData?.isPlaying).toBe(false);
+    });
+
+    test('should handle multiple chunks and playing state', () => {
+      mockAudioContext.currentTime = 0.05; // In the middle of first chunk
+      mockAudioBuffer.duration = 0.1;
+
+      hookData?.playAudio('dGVzdA=='); // 0 - 0.1
+      hookData?.playAudio('dGVzdDI='); // 0.1 - 0.2
+      renderResult.rerender(<HookApp />);
+
+      expect(hookData?.isPlaying).toBe(true);
+    });
+  });
+
+  describe('Audio cleanup', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should clean up buffer source on ended', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      // Simulate audio ended
+      if (mockBufferSource.onended) {
+        mockBufferSource.onended();
+      }
+
+      expect(mockBufferSource.disconnect).toHaveBeenCalled();
+      expect(mockBufferSource.buffer).toBeNull();
+    });
+
+    test('should stop all audio and close context', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      hookData?.stopAudio();
+      renderResult.rerender(<HookApp />);
+
+      expect(mockAudioContext.close).toHaveBeenCalled();
+      expect(hookData?.isPlaying).toBe(false);
+    });
+  });
+
+  describe('Error handling', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle invalid base64 data gracefully', () => {
+      expect(() => {
+        hookData?.playAudio('invalid-base64!@#');
+      }).not.toThrow();
+    });
+
+    test('should handle AudioContext creation failure', () => {
+      global.AudioContext = jest.fn(() => {
+        throw new Error('AudioContext not supported');
+      }) as any;
+
+      expect(() => {
+        hookData?.playAudio('dGVzdA==');
+      }).toThrow('AudioContext not supported');
+    });
+
+    test('should handle missing audio context in isPlaying', () => {
+      // Before any audio is played, audioCtxRef should be null
+      expect(hookData?.isPlaying).toBe(false);
+    });
+  });
+
+  describe('Real-world scenarios', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle streaming audio chunks', () => {
+      mockAudioBuffer.duration = 0.05; // 50ms chunks
+
+      // Simulate streaming 5 chunks
+      for (let i = 0; i < 5; i++) {
+        hookData?.playAudio(`chunk${i}`);
+      }
+
+      expect(mockBufferSource.start).toHaveBeenCalledTimes(5);
+      renderResult.rerender(<HookApp />);
+      expect(hookData?.isPlaying).toBe(true);
+    });
+
+    test('should handle playback interruption', () => {
+      hookData?.playAudio('dGVzdA==');
+      renderResult.rerender(<HookApp />);
+      expect(hookData?.isPlaying).toBe(true);
+
+      hookData?.stopAudio();
+      renderResult.rerender(<HookApp />);
+      expect(hookData?.isPlaying).toBe(false);
+      expect(mockAudioContext.close).toHaveBeenCalled();
+    });
+
+    test('should handle resume after stop', () => {
+      // Play, stop, then play again
+      hookData?.playAudio('dGVzdA==');
+      hookData?.stopAudio();
+      hookData?.playAudio('dGVzdDI=');
+
+      expect(AudioContext).toHaveBeenCalledTimes(2); // New context after stop
+    });
+  });
+
+  describe('Performance considerations', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle large audio data', () => {
+      const largeBase64 = 'A'.repeat(10000);
+
+      expect(() => {
+        hookData?.playAudio(largeBase64);
+      }).not.toThrow();
+    });
+
+    test('should handle rapid successive calls', () => {
+      for (let i = 0; i < 100; i++) {
+        // Ensure the mock "base64" data has an even length as Int16Array (which represents 16-bit audio samples) requires the underlying data to be in multiples of 2 bytes
+        hookData?.playAudio(`chunk${i}`.padEnd(8, ' '));
+      }
+
+      expect(mockBufferSource.start).toHaveBeenCalledTimes(100);
+    });
+  });
+});
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts
new file mode 100644
index 0000000000..f9b8405387
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts
@@ -0,0 +1,71 @@
+import { useRef, useCallback } from 'react';
+
+const DEFAULT_SAMPLE_RATE = 24000;
+const INT16_SCALE = 32768;
+
+export function useAudioPlayer(config?: Record<string, unknown> | null) {
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const nextPlayTimeRef = useRef(0);
+
+  const { sampleRate = DEFAULT_SAMPLE_RATE } = config || {};
+
+  const initAudio = useCallback(() => {
+    if (!audioCtxRef.current) {
+      audioCtxRef.current = new AudioContext({ sampleRate: sampleRate as number });
+    }
+    return audioCtxRef.current;
+  }, [sampleRate]);
+
+  const playAudio = useCallback(
+    (base64: string) => {
+      const audioCtx = initAudio();
+      audioCtx.resume?.();
+
+      try {
+        const bytes = Uint8Array.from(atob(base64), c => c.charCodeAt(0));
+        const int16 = new Int16Array(bytes.buffer);
+        const float32 = new Float32Array(int16.length);
+
+        for (let i = 0; i < int16.length; i++) {
+          // eslint-disable-next-line security/detect-object-injection
+          float32[i] = int16[i] / INT16_SCALE;
+        }
+
+        const buffer = audioCtx.createBuffer(1, float32.length, audioCtx.sampleRate);
+        buffer.getChannelData(0).set(float32);
+
+        const src = audioCtx.createBufferSource();
+        src.buffer = buffer;
+        src.connect(audioCtx.destination);
+
+        // Clear buffer when finished
+        src.onended = () => {
+          src.disconnect();
+          src.buffer = null;
+        };
+
+        nextPlayTimeRef.current = Math.max(nextPlayTimeRef.current, audioCtx.currentTime);
+        src.start(nextPlayTimeRef.current);
+        nextPlayTimeRef.current += buffer.duration;
+      } catch (error) {
+        console.warn('botframework-webchat: Error during audio playback in useAudioPlayer:', error);
+      }
+    },
+    [initAudio]
+  );
+
+  const stopAudio = useCallback(() => {
+    nextPlayTimeRef.current = 0;
+
+    if (audioCtxRef.current) {
+      audioCtxRef.current.close();
+      audioCtxRef.current = null;
+    }
+  }, []);
+
+  return {
+    playAudio,
+    stopAudio,
+    isPlaying: audioCtxRef.current ? audioCtxRef.current.currentTime < nextPlayTimeRef.current : false
+  };
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useContext.ts b/packages/api/src/providers/SpeechToSpeech/private/useContext.ts
new file mode 100644
index 0000000000..50926b0a12
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useContext.ts
@@ -0,0 +1,15 @@
+import { useContext } from 'react';
+
+import SpeechToSpeechContext from './Context';
+
+import type { SpeechToSpeechContextType } from './Context';
+
+export default function useSpeechToSpeechContext(thrownOnUndefined = true): SpeechToSpeechContextType {
+  const contextValue = useContext(SpeechToSpeechContext);
+
+  if (thrownOnUndefined && !contextValue) {
+    throw new Error('botframework-webchat internal: This hook can only be used under <SpeechToSpeechContext>.');
+  }
+
+  return contextValue;
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx
new file mode 100644
index 0000000000..f2b01ca6b5
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx
@@ -0,0 +1,164 @@
+/** @jest-environment @happy-dom/jest-environment */
+/// <reference types="jest" />
+
+import { act, render, waitFor, type RenderResult } from '@testing-library/react';
+import React, { type ComponentType } from 'react';
+import { useRecorder } from './useRecorder';
+
+// --- Mocks ---
+
+jest.mock('../../Ponyfill/usePonyfill', () => ({ __esModule: true, default: jest.fn(() => [{ Date: global.Date }]) }));
+
+const mockTrack = {
+  stop: jest.fn()
+};
+
+const mockMediaStream = {
+  getTracks: jest.fn(() => [mockTrack])
+};
+
+const mockMediaDevices = {
+  getUserMedia: jest.fn().mockResolvedValue(mockMediaStream)
+};
+
+const mockWorkletPort = {
+  postMessage: jest.fn(),
+  onmessage: null as ((event: { data: any }) => void) | null
+};
+
+const mockWorkletNode = {
+  connect: jest.fn(),
+  disconnect: jest.fn(),
+  port: mockWorkletPort
+};
+
+const mockAudioContext = {
+  state: 'running',
+  resume: jest.fn().mockResolvedValue(undefined),
+  createMediaStreamSource: jest.fn(() => ({
+    connect: jest.fn()
+  })),
+  destination: {},
+  audioWorklet: {
+    addModule: jest.fn().mockResolvedValue(undefined)
+  }
+};
+
+// --- Global Mocks Setup ---
+
+Object.defineProperty(global.navigator, 'mediaDevices', {
+  value: mockMediaDevices,
+  writable: true
+});
+
+global.AudioContext = jest.fn(() => mockAudioContext as any);
+global.AudioWorkletNode = jest.fn(() => mockWorkletNode as any);
+global.Blob = jest.fn(parts => ({ parts, type: parts[1]?.type })) as any;
+global.URL.createObjectURL = jest.fn(() => 'blob:http://localhost/mock-url');
+global.URL.revokeObjectURL = jest.fn();
+global.btoa = jest.fn(str => `btoa(${str})`);
+
+// --- Tests ---
+
+describe('useRecorder', () => {
+  let onAudioChunk: jest.Mock;
+  let HookApp: ComponentType<{ onAudioChunk: (base64: string) => void }>;
+  let hookData: ReturnType<typeof useRecorder> | undefined;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  let renderResult: RenderResult;
+
+  beforeEach(() => {
+    // Clear all mocks before each test
+    jest.clearAllMocks();
+    onAudioChunk = jest.fn();
+    hookData = undefined;
+    mockWorkletPort.onmessage = null;
+    (mockAudioContext.state as any) = 'running';
+
+    HookApp = ({ onAudioChunk }) => {
+      hookData = useRecorder(onAudioChunk);
+      return null;
+    };
+  });
+
+  test('should be initially not recording', () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+    expect(hookData?.recording).toBe(false);
+  });
+
+  test('should start recording when setRecording(true) is called', async () => {
+    renderResult = render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(hookData?.recording).toBe(true));
+
+    expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledTimes(1);
+    expect(global.AudioContext).toHaveBeenCalledTimes(1);
+    expect(mockAudioContext.audioWorklet.addModule).toHaveBeenCalledTimes(1);
+    expect(global.AudioWorkletNode).toHaveBeenCalledWith(expect.anything(), 'audio-recorder', {
+      processorOptions: { bufferSize: 2400 }
+    });
+    expect(mockWorkletNode.connect).toHaveBeenCalledTimes(1);
+    expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' });
+  });
+
+  test('should stop recording when setRecording(false) is called', async () => {
+    renderResult = render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    // Start recording
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(hookData?.recording).toBe(true));
+
+    // Stop recording
+    act(() => {
+      hookData?.setRecording(false);
+    });
+
+    await waitFor(() => expect(hookData?.recording).toBe(false));
+
+    expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'STOP' });
+    expect(mockWorkletNode.disconnect).toHaveBeenCalledTimes(1);
+    expect(mockTrack.stop).toHaveBeenCalledTimes(1);
+  });
+
+  test('should process audio chunks sent from the worklet', async () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(mockWorkletPort.onmessage).not.toBeNull());
+
+    // Simulate a message from the audio worklet
+    const mockAudioData = new Float32Array([0.1, 0.2, -0.1]);
+    act(() => {
+      mockWorkletPort.onmessage!({
+        data: {
+          eventType: 'audio',
+          audioData: mockAudioData
+        }
+      });
+    });
+
+    await waitFor(() => expect(onAudioChunk).toHaveBeenCalledTimes(1));
+    expect(global.btoa).toHaveBeenCalled();
+  });
+
+  test('should handle suspended audio context by resuming it', async () => {
+    (mockAudioContext.state as any) = 'suspended';
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(mockAudioContext.resume).toHaveBeenCalledTimes(1));
+  });
+});
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts
new file mode 100644
index 0000000000..7ba19ba244
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts
@@ -0,0 +1,143 @@
+import { useRef, useState, useCallback } from 'react';
+import usePonyfill from '../../Ponyfill/usePonyfill';
+
+const audioProcessorCode = `
+  class AudioRecorderProcessor extends AudioWorkletProcessor {
+    constructor(options) {
+      super()
+      this.recording = false
+      this.buffer = []
+      this.bufferSize = options.processorOptions.bufferSize
+      this.port.onmessage = e => {
+        if (e.data.command === 'START') this.recording = true
+        else if (e.data.command === 'STOP') {
+          this.recording = false
+          this.buffer = []
+        }
+      }
+    }
+    sendBuffer() {
+      while (this.buffer.length >= this.bufferSize) {
+        const chunk = this.buffer.splice(0, this.bufferSize)
+        this.port.postMessage({
+          eventType: 'audio',
+          audioData: new Float32Array(chunk)
+        })
+      }
+    }
+    process(inputs) {
+      if (inputs[0]?.length && this.recording) {
+        this.buffer.push(...inputs[0][0])
+        if (this.buffer.length >= this.bufferSize) this.sendBuffer()
+      }
+      return true
+    }
+  }
+  registerProcessor('audio-recorder', AudioRecorderProcessor)`;
+
+const INT16_MIN = -32768;
+const INT16_MAX = 32767;
+const INT16_SCALE = 32767;
+const DEFAULT_SAMPLE_RATE = 24000;
+const DEFAULT_CHUNK_SIZE_IN_MS = 100;
+const MS_IN_SECOND = 1000;
+
+export function useRecorder(
+  onAudioChunk: (base64: string, timestamp: string) => void,
+  config?: Record<string, unknown> | null
+) {
+  const [recording, setRecordingInternal] = useState(false);
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const workletRef = useRef<AudioWorkletNode | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const [{ Date }] = usePonyfill();
+
+  const { sampleRate = DEFAULT_SAMPLE_RATE, chunkIntervalMs = DEFAULT_CHUNK_SIZE_IN_MS } = config || {};
+
+  const initAudio = useCallback(async () => {
+    if (audioCtxRef.current) {
+      return;
+    }
+    const audioCtx = new AudioContext({ sampleRate: sampleRate as number });
+    const blob = new Blob([audioProcessorCode], {
+      type: 'application/javascript'
+    });
+    // eslint-disable-next-line no-restricted-properties
+    const url = URL.createObjectURL(blob);
+    await audioCtx.audioWorklet.addModule(url);
+    URL.revokeObjectURL(url);
+    // eslint-disable-next-line require-atomic-updates
+    audioCtxRef.current = audioCtx;
+  }, [sampleRate]);
+
+  const startRecording = useCallback(async () => {
+    await initAudio();
+    const audioCtx = audioCtxRef.current!;
+    if (audioCtx.state === 'suspended') {
+      await audioCtx.resume();
+    }
+    const stream = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        channelCount: 1,
+        sampleRate,
+        echoCancellation: true
+      }
+    });
+    streamRef.current = stream;
+    const source = audioCtx.createMediaStreamSource(stream);
+    const worklet = new AudioWorkletNode(audioCtx, 'audio-recorder', {
+      processorOptions: {
+        bufferSize: ((sampleRate as number) * (chunkIntervalMs as number)) / MS_IN_SECOND
+      }
+    });
+
+    worklet.port.onmessage = e => {
+      if (e.data.eventType === 'audio') {
+        const timestamp = new Date().toISOString();
+        const float32 = e.data.audioData;
+        const int16 = new Int16Array(float32.length);
+        for (let i = 0; i < float32.length; i++) {
+          // eslint-disable-next-line security/detect-object-injection
+          int16[i] = Math.max(INT16_MIN, Math.min(INT16_MAX, float32[i] * INT16_SCALE));
+        }
+        const base64 = btoa(String.fromCharCode(...new Uint8Array(int16.buffer)));
+        onAudioChunk(base64, timestamp);
+      }
+    };
+
+    source.connect(worklet);
+    worklet.connect(audioCtx.destination);
+    worklet.port.postMessage({ command: 'START' });
+    workletRef.current = worklet;
+    setRecordingInternal(true);
+  }, [Date, chunkIntervalMs, initAudio, onAudioChunk, sampleRate]);
+
+  const stopRecording = useCallback(() => {
+    if (workletRef.current) {
+      workletRef.current.port.postMessage({ command: 'STOP' });
+      workletRef.current.disconnect();
+      workletRef.current = null;
+    }
+    if (streamRef.current) {
+      streamRef.current.getTracks().forEach(track => track.stop());
+      streamRef.current = null;
+    }
+    setRecordingInternal(false);
+  }, []);
+
+  const setRecording = useCallback(
+    async (shouldRecord: boolean) => {
+      if (!shouldRecord && recording) {
+        stopRecording();
+      } else if (shouldRecord && !recording) {
+        await startRecording();
+      }
+    },
+    [recording, startRecording, stopRecording]
+  );
+
+  return {
+    recording,
+    setRecording
+  };
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts b/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts
new file mode 100644
index 0000000000..62d5cc8c13
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts
@@ -0,0 +1 @@
+export type SpeechState = 'idle' | 'listening' | 'processing' | 'bot_speaking';
diff --git a/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts b/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts
new file mode 100644
index 0000000000..d7ac3fac44
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts
@@ -0,0 +1,6 @@
+import { SpeechToSpeechContextType } from './private/Context';
+import useSpeechToSpeechContext from './private/useContext';
+
+export default function useSpeechToSpeech(): readonly [SpeechToSpeechContextType] {
+  return [useSpeechToSpeechContext()];
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index e2534aa94d..fd05b34d75 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -40,6 +40,7 @@ import getActivityLivestreamingMetadata from './utils/getActivityLivestreamingMe
 import getOrgSchemaMessage from './utils/getOrgSchemaMessage';
 import onErrorResumeNext from './utils/onErrorResumeNext';
 import singleToArray from './utils/singleToArray';
+import isVoiceActivity from './utils/voiceActivity/isVoiceActivity';
 
 export {
   isForbiddenPropertyName,
@@ -99,6 +100,7 @@ export {
   emitTypingIndicator,
   getActivityLivestreamingMetadata,
   getOrgSchemaMessage,
+  isVoiceActivity,
   markActivity,
   onErrorResumeNext,
   parseAction,
diff --git a/packages/core/src/reducers/activities/combineActivitiesReducer.ts b/packages/core/src/reducers/activities/combineActivitiesReducer.ts
index 488df1be62..49ee3cee3d 100644
--- a/packages/core/src/reducers/activities/combineActivitiesReducer.ts
+++ b/packages/core/src/reducers/activities/combineActivitiesReducer.ts
@@ -10,6 +10,7 @@ import createGroupedActivitiesReducer, {
 
 type ActivitiesState = {
   activities: readonly WebChatActivity[];
+  voiceActivities: readonly WebChatActivity[];
   groupedActivities: GroupedActivitiesState;
 };
 
@@ -34,7 +35,12 @@ export default function combineActivitiesReducer<M>(
     state: (ExistingState & ActivitiesState) | undefined,
     action: ExistingAction & GroupedActivitiesAction
   ): ExistingState & ActivitiesState {
-    const { activities: _activities, groupedActivities, ...existingState } = state ?? {};
+    const {
+      activities: _activities,
+      voiceActivities: _voiceActivities,
+      groupedActivities,
+      ...existingState
+    } = state ?? {};
     const nextState = existingSlicedReducer(existingState as ExistingState, action);
     const nextGroupedActivities = groupedActivitiesReducer(groupedActivities, action);
 
@@ -52,7 +58,12 @@ export default function combineActivitiesReducer<M>(
       );
 
     return hasChanged
-      ? { ...nextState, activities: nextGroupedActivities.sortedActivities, groupedActivities: nextGroupedActivities }
+      ? {
+          ...nextState,
+          activities: nextGroupedActivities.sortedActivities,
+          voiceActivities: nextGroupedActivities.voiceActivities,
+          groupedActivities: nextGroupedActivities
+        }
       : state;
   };
 }
diff --git a/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts b/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts
index f333c67af1..e7179ac44e 100644
--- a/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts
+++ b/packages/core/src/reducers/activities/createGroupedActivitiesReducer.ts
@@ -32,7 +32,7 @@ import type { WebChatActivity } from '../../types/WebChatActivity';
 import patchActivity from './patchActivity';
 import deleteActivityByLocalId from './sort/deleteActivityByLocalId';
 import { generateLocalIdInActivity, getLocalIdFromActivity, setLocalIdInActivity } from './sort/property/LocalId';
-import { getPositionFromActivity, setPositionInActivity } from './sort/property/Position';
+import { getPositionFromActivity, queryPositionFromActivity, setPositionInActivity } from './sort/property/Position';
 import { setReceivedAtInActivity } from './sort/property/ReceivedAt';
 import { querySendStatusFromOutgoingActivity, setSendStatusInOutgoingActivity } from './sort/property/SendStatus';
 import queryLocalIdAByActivityId from './sort/queryLocalIdByActivityId';
@@ -42,6 +42,8 @@ import updateActivityChannelData, {
   updateActivityChannelDataInternalSkipNameCheck
 } from './sort/updateActivityChannelData';
 import upsert, { INITIAL_STATE } from './sort/upsert';
+import isVoiceActivity from '../../utils/voiceActivity/isVoiceActivity';
+import isVoiceTranscriptActivity from '../../utils/voiceActivity/isVoiceTranscriptActivity';
 
 type GroupedActivitiesAction =
   | DeleteActivityAction
@@ -100,6 +102,13 @@ function createGroupedActivitiesReducer(
           payload: { activity }
         } = action;
 
+        // Not transcript voice does not render on UI and mostly fire and forget as we dont't have replay etc.
+        // hence we don't want to process and simply pass through.
+        if (isVoiceActivity(activity) && !isVoiceTranscriptActivity(activity)) {
+          state = upsert(ponyfill, state, activity);
+          break;
+        }
+
         // Patch activity so the outgoing blob: URL is not re-downloadable.
         // Related to /__tests__/html2/accessibility/liveRegion/attachment/file.
 
@@ -151,6 +160,12 @@ function createGroupedActivitiesReducer(
       }
 
       case POST_ACTIVITY_FULFILLED: {
+        // Not transcript voice does not render on UI and mostly fire and forget as we dont't have replay etc.
+        // hence we don't want to process and simply pass through.
+        if (isVoiceActivity(action.payload.activity) && !isVoiceTranscriptActivity(action.payload.activity)) {
+          state = upsert(ponyfill, state, action.payload.activity);
+          break;
+        }
         const localId = queryLocalIdAByClientActivityId(state, action.meta.clientActivityID);
 
         const existingActivity = localId && state.activityMap.get(localId)?.activity;
@@ -175,8 +190,11 @@ function createGroupedActivitiesReducer(
         activity = setSendStatusInOutgoingActivity(activity, SENT);
         activity = setLocalIdInActivity(activity, localId);
 
-        // Keep existing position.
-        activity = setPositionInActivity(activity, getPositionFromActivity(existingActivity));
+        // Keep existing position (if it exists - voice activities don't have positions)
+        const existingPosition = queryPositionFromActivity(existingActivity);
+        if (typeof existingPosition !== 'undefined') {
+          activity = setPositionInActivity(activity, getPositionFromActivity(existingActivity));
+        }
 
         // Compare the INCOMING_ACTIVITY below:
         // - POST_ACTIVITY_FULFILLED will mark send status as SENT
diff --git a/packages/core/src/reducers/activities/sort/types.ts b/packages/core/src/reducers/activities/sort/types.ts
index 286711854f..28be6d1149 100644
--- a/packages/core/src/reducers/activities/sort/types.ts
+++ b/packages/core/src/reducers/activities/sort/types.ts
@@ -57,6 +57,7 @@ type State = {
   readonly livestreamSessionMap: LivestreamSessionMap;
   readonly sortedChatHistoryList: SortedChatHistory;
   readonly sortedActivities: readonly Activity[];
+  readonly voiceActivities: readonly Activity[];
 };
 
 export {
diff --git a/packages/core/src/reducers/activities/sort/upsert.ts b/packages/core/src/reducers/activities/sort/upsert.ts
index c917d77568..9528d1c956 100644
--- a/packages/core/src/reducers/activities/sort/upsert.ts
+++ b/packages/core/src/reducers/activities/sort/upsert.ts
@@ -18,6 +18,8 @@ import {
   type SortedChatHistoryEntry,
   type State
 } from './types';
+import isVoiceActivity from '../../../utils/voiceActivity/isVoiceActivity';
+import isVoiceTranscriptActivity from '../../../utils/voiceActivity/isVoiceTranscriptActivity';
 
 // Honoring timestamp or not:
 //
@@ -48,7 +50,8 @@ const INITIAL_STATE = Object.freeze({
   livestreamSessionMap: Object.freeze(new Map()),
   howToGroupingMap: Object.freeze(new Map()),
   sortedActivities: Object.freeze([]),
-  sortedChatHistoryList: Object.freeze([])
+  sortedChatHistoryList: Object.freeze([]),
+  voiceActivities: Object.freeze([])
 } satisfies State);
 
 // Question: Why insertion sort works but not quick sort?
@@ -58,6 +61,14 @@ const INITIAL_STATE = Object.freeze({
 // - Duplicate timestamps: activities without timestamp can't be sort deterministically with quick sort
 
 function upsert(ponyfill: Pick<GlobalScopePonyfill, 'Date'>, state: State, activity: Activity): State {
+  // we only want to process transcript voice activities thorugh this as those will be rendered.
+  // all other voice activities will be stored in separate slice and we don't want to perform any operation on them.
+  if (isVoiceActivity(activity) && !isVoiceTranscriptActivity(activity)) {
+    return Object.freeze({
+      ...state,
+      voiceActivities: Object.freeze([...state.voiceActivities, activity])
+    } satisfies State);
+  }
   const nextActivityIdToLocalIdMap = new Map(state.activityIdToLocalIdMap);
   const nextActivityMap = new Map(state.activityMap);
   const nextClientActivityIdToLocalIdMap = new Map(state.clientActivityIdToLocalIdMap);
@@ -336,7 +347,8 @@ function upsert(ponyfill: Pick<GlobalScopePonyfill, 'Date'>, state: State, activ
     howToGroupingMap: Object.freeze(nextHowToGroupingMap),
     livestreamSessionMap: Object.freeze(nextLivestreamSessionMap),
     sortedActivities: Object.freeze(nextSortedActivities),
-    sortedChatHistoryList: Object.freeze(nextSortedChatHistoryList)
+    sortedChatHistoryList: Object.freeze(nextSortedChatHistoryList),
+    voiceActivities: state.voiceActivities
   } satisfies State);
 }
 
diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts
new file mode 100644
index 0000000000..1b2c3abf77
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts
@@ -0,0 +1,112 @@
+import isVoiceActivity from './isVoiceActivity';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', name?: string, value?: any): WebChatActivity => ({
+  type: type as any,
+  id: 'test-activity-id',
+  from: { id: 'test-user' },
+  channelData: {
+    'webchat:sequence-id': 1
+  },
+  ...(name && { name }),
+  ...(value && { value })
+});
+
+const createMockVoiceActivity = (name: string, voiceProps: Record<string, any>): WebChatActivity =>
+  createMockActivity('event', name, {
+    voice: voiceProps
+  });
+
+describe('isVoiceActivity', () => {
+  describe('Valid voice activities', () => {
+    test('should return true for event activity with voice', () => {
+      const activity = createMockVoiceActivity('stream.chunk', { contentUrl: 'base64' });
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for voice activity with minimal voice', () => {
+      const activity = createMockActivity('event', 'stream.chunk', { voice: {} });
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('Invalid activities', () => {
+    const testCases = [
+      {
+        name: 'message activity with voice',
+        activity: () => createMockActivity('message', 'stream.chunk', { voice: { contentUrl: 'base64' } })
+      },
+      {
+        name: 'typing activity',
+        activity: () => createMockActivity('typing')
+      },
+      {
+        name: 'event activity with non-object value',
+        activity: () => ({ ...createMockActivity('event', 'test'), value: 'not an object' })
+      },
+      {
+        name: 'event activity without voice property',
+        activity: () => createMockActivity('event', 'test', { someOtherProp: 'value' })
+      },
+      {
+        name: 'event activity with no value',
+        activity: () => createMockActivity('event', 'test')
+      },
+      {
+        name: 'event activity with no name',
+        activity: () => createMockActivity('event', undefined, { voice: {} })
+      }
+    ];
+
+    test.each(testCases)('should return false for $name', ({ activity }) => {
+      const result = isVoiceActivity(activity());
+
+      expect(result).toBe(false);
+    });
+  });
+
+  describe('Real-world voice activity scenarios', () => {
+    const voiceScenarios = [
+      {
+        name: 'session.update with speech detected state',
+        eventName: 'session.update',
+        voiceProps: { bot_state: 'voice.request.detected', message: 'Your request is identified' }
+      },
+      {
+        name: 'session.update with processing state',
+        eventName: 'session.update',
+        voiceProps: { bot_state: 'voice.request.processing', message: 'Your request is being processed' }
+      },
+      {
+        name: 'stream.end with user transcription',
+        eventName: 'stream.end',
+        voiceProps: { transcription: 'My destination is bangalore', origin: 'user' }
+      },
+      {
+        name: 'stream.chunk with server audio response',
+        eventName: 'stream.chunk',
+        voiceProps: { contentUrl: 'base64chunk' }
+      },
+      {
+        name: 'stream.end with bot transcription',
+        eventName: 'stream.end',
+        voiceProps: { transcription: 'Your destination is at 1000m above sea level', origin: 'bot' }
+      }
+    ];
+
+    test.each(voiceScenarios)('should return true for $name', ({ eventName, voiceProps }) => {
+      const activity = createMockVoiceActivity(eventName, voiceProps);
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts
new file mode 100644
index 0000000000..a17937d8ba
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts
@@ -0,0 +1,17 @@
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// This is interim until activity protocol is ratified.
+const isVoiceActivity = (
+  activity: WebChatActivity
+): activity is WebChatActivity & {
+  name: string;
+  type: 'event';
+  value: { voice: any };
+} =>
+  activity.type === 'event' &&
+  !!activity.name &&
+  !!activity.value &&
+  typeof activity.value === 'object' &&
+  'voice' in activity.value;
+
+export default isVoiceActivity;
diff --git a/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts
new file mode 100644
index 0000000000..e061e24813
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts
@@ -0,0 +1,164 @@
+import isVoiceTranscriptActivity from './isVoiceTranscriptActivity';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', name?: string, value?: any): WebChatActivity => ({
+  type: type as any,
+  id: 'test-activity-id',
+  from: { id: 'test-user' },
+  channelData: {
+    'webchat:sequence-id': 1
+  },
+  ...(name && { name }),
+  ...(value && { value })
+});
+
+const createMockVoiceActivity = (name: string, voiceProps: Record<string, any>): WebChatActivity =>
+  createMockActivity('event', name, {
+    voice: voiceProps
+  });
+
+describe('isVoiceTranscriptActivity', () => {
+  describe('Valid transcript activities', () => {
+    test('should return true for stream.end with user transcription', () => {
+      const activity = createMockVoiceActivity('stream.end', {
+        transcription: 'Hello world',
+        origin: 'user'
+      });
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for stream.end with bot transcription', () => {
+      const activity = createMockVoiceActivity('stream.end', {
+        transcription: 'Hi there!',
+        origin: 'bot'
+      });
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for stream.end with empty transcription string', () => {
+      const activity = createMockVoiceActivity('stream.end', {
+        transcription: '',
+        origin: 'user'
+      });
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('Invalid activities', () => {
+    const testCases = [
+      {
+        name: 'stream.chunk voice activity',
+        activity: () => createMockVoiceActivity('stream.chunk', { contentUrl: 'base64' })
+      },
+      {
+        name: 'session.update voice activity',
+        activity: () => createMockVoiceActivity('session.update', { bot_state: 'voice.request.detected' })
+      },
+      {
+        name: 'stream.end without transcription',
+        activity: () => createMockVoiceActivity('stream.end', { origin: 'user' })
+      },
+      {
+        name: 'stream.end with non-string transcription',
+        activity: () => createMockVoiceActivity('stream.end', { transcription: 123, origin: 'user' })
+      },
+      {
+        name: 'stream.end with null transcription',
+        activity: () => createMockVoiceActivity('stream.end', { transcription: null, origin: 'user' })
+      },
+      {
+        name: 'regular message activity',
+        activity: () => createMockActivity('message', 'test')
+      },
+      {
+        name: 'typing activity',
+        activity: () => createMockActivity('typing')
+      },
+      {
+        name: 'event activity without voice data',
+        activity: () => createMockActivity('event', 'stream.end', { someData: 'test' })
+      },
+      {
+        name: 'event activity with null value',
+        activity: () => ({ ...createMockActivity('event', 'stream.end'), value: null })
+      },
+      {
+        name: 'event activity without value',
+        activity: () => createMockActivity('event', 'stream.end')
+      },
+      {
+        name: 'event activity without name',
+        activity: () => createMockActivity('event', undefined, { voice: { transcription: 'test' } })
+      }
+    ];
+
+    test.each(testCases)('should return false for $name', ({ activity }) => {
+      const result = isVoiceTranscriptActivity(activity());
+
+      expect(result).toBe(false);
+    });
+  });
+
+  describe('Real-world scenarios', () => {
+    test('should identify user transcript in conversation flow', () => {
+      const conversationActivities = [
+        createMockVoiceActivity('session.update', { bot_state: 'voice.request.detected' }),
+        createMockVoiceActivity('session.update', { bot_state: 'voice.request.processing' }),
+        createMockVoiceActivity('stream.end', {
+          transcription: 'What is the weather today?',
+          origin: 'user'
+        })
+      ];
+
+      const transcriptResults = conversationActivities.map(activity => isVoiceTranscriptActivity(activity));
+
+      expect(transcriptResults).toEqual([false, false, true]);
+    });
+
+    test('should identify bot transcript in response flow', () => {
+      const responseActivities = [
+        createMockVoiceActivity('session.update', { bot_state: 'voice.response.available' }),
+        createMockVoiceActivity('stream.chunk', { contentUrl: 'chunk1' }),
+        createMockVoiceActivity('stream.chunk', { contentUrl: 'chunk2' }),
+        createMockVoiceActivity('stream.end', {
+          transcription: 'Today will be sunny with a high of 75 degrees.',
+          origin: 'bot'
+        })
+      ];
+
+      const transcriptResults = responseActivities.map(activity => isVoiceTranscriptActivity(activity));
+
+      expect(transcriptResults).toEqual([false, false, false, true]);
+    });
+
+    test('should handle complete conversation with mixed activities', () => {
+      const mixedActivities = [
+        createMockActivity('message', 'test'),
+        createMockVoiceActivity('stream.end', {
+          transcription: 'Hello',
+          origin: 'user'
+        }),
+        createMockVoiceActivity('stream.chunk', { contentUrl: 'audio' }),
+        createMockVoiceActivity('stream.end', {
+          transcription: 'Hi there!',
+          origin: 'bot'
+        }),
+        createMockActivity('typing')
+      ];
+
+      const transcriptResults = mixedActivities.map(activity => isVoiceTranscriptActivity(activity));
+
+      expect(transcriptResults).toEqual([false, true, false, true, false]);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts
new file mode 100644
index 0000000000..c6ae5bd742
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts
@@ -0,0 +1,18 @@
+import isVoiceActivity from './isVoiceActivity';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+const isVoiceTranscriptActivity = (
+  activity: WebChatActivity
+): activity is WebChatActivity & {
+  value: {
+    voice: {
+      transcription: string;
+      origin: 'user' | 'agent';
+    };
+  };
+} =>
+  isVoiceActivity(activity) &&
+  activity.name === 'stream.end' &&
+  typeof activity.value?.voice?.transcription === 'string';
+
+export default isVoiceTranscriptActivity;