diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md
index 8b019190c1f..5dcc442b77e 100644
--- a/common/api-review/ai.api.md
+++ b/common/api-review/ai.api.md
@@ -85,6 +85,11 @@ export class ArraySchema extends Schema {
     toJSON(): SchemaRequest;
 }
 
+// @beta
+export interface AudioConversationController {
+    stop: () => Promise<void>;
+}
+
 // @public
 export abstract class Backend {
     protected constructor(type: BackendType);
@@ -710,7 +715,7 @@ export interface LiveGenerationConfig {
     frequencyPenalty?: number;
     maxOutputTokens?: number;
     presencePenalty?: number;
-    responseModalities?: [ResponseModality];
+    responseModalities?: ResponseModality[];
     speechConfig?: SpeechConfig;
     temperature?: number;
     topK?: number;
@@ -787,6 +792,7 @@ export class LiveSession {
     // @internal
     constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
     close(): Promise<void>;
+    inConversation: boolean;
     isClosed: boolean;
     receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
     send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
@@ -860,7 +866,7 @@ export const POSSIBLE_ROLES: readonly ["user", "model", "function", "system"];
 
 // @beta
 export interface PrebuiltVoiceConfig {
-    voiceConfig?: string;
+    voiceName?: string;
 }
 
 // @public
@@ -882,6 +888,7 @@ export interface RequestOptions {
 export const ResponseModality: {
     readonly TEXT: "TEXT";
     readonly IMAGE: "IMAGE";
+    readonly AUDIO: "AUDIO";
 };
 
 // @beta
@@ -1031,6 +1038,14 @@ export interface SpeechConfig {
     voiceConfig?: VoiceConfig;
 }
 
+// @beta
+export function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
+
+// @beta
+export interface StartAudioConversationOptions {
+    functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
+}
+
 // @public
 export interface StartChatParams extends BaseParams {
     // (undocumented)
diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml
index 6979b475536..e6a1443c562 100644
--- a/docs-devsite/_toc.yaml
+++ b/docs-devsite/_toc.yaml
@@ -16,6 +16,8 @@ toc:
     path: /docs/reference/js/ai.anyofschema.md
   - title: ArraySchema
     path: /docs/reference/js/ai.arrayschema.md
+  - title: AudioConversationController
+    path: /docs/reference/js/ai.audioconversationcontroller.md
   - title: Backend
     path: /docs/reference/js/ai.backend.md
   - title: BaseParams
@@ -160,6 +162,8 @@ toc:
     path: /docs/reference/js/ai.segment.md
   - title: SpeechConfig
     path: /docs/reference/js/ai.speechconfig.md
+  - title: StartAudioConversationOptions
+    path: /docs/reference/js/ai.startaudioconversationoptions.md
   - title: StartChatParams
     path: /docs/reference/js/ai.startchatparams.md
   - title: StringSchema
diff --git a/docs-devsite/ai.audioconversationcontroller.md b/docs-devsite/ai.audioconversationcontroller.md
new file mode 100644
index 00000000000..18820a2fe55
--- /dev/null
+++ b/docs-devsite/ai.audioconversationcontroller.md
@@ -0,0 +1,41 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# AudioConversationController interface
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+A controller for managing an active audio conversation.
+
+<b>Signature:</b>
+
+```typescript
+export interface AudioConversationController 
+```
+
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [stop](./ai.audioconversationcontroller.md#audioconversationcontrollerstop) | () =&gt; Promise&lt;void&gt; | <b><i>(Public Preview)</i></b> Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete. |
+
+## AudioConversationController.stop
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Stops the audio conversation, closes the microphone connection, and cleans up resources. Returns a promise that resolves when cleanup is complete.
+
+<b>Signature:</b>
+
+```typescript
+stop: () => Promise<void>;
+```
diff --git a/docs-devsite/ai.livegenerationconfig.md b/docs-devsite/ai.livegenerationconfig.md
index a9724ccd86c..1a920afa1e7 100644
--- a/docs-devsite/ai.livegenerationconfig.md
+++ b/docs-devsite/ai.livegenerationconfig.md
@@ -28,7 +28,7 @@ export interface LiveGenerationConfig
 |  [frequencyPenalty](./ai.livegenerationconfig.md#livegenerationconfigfrequencypenalty) | number | <b><i>(Public Preview)</i></b> Frequency penalties. |
 |  [maxOutputTokens](./ai.livegenerationconfig.md#livegenerationconfigmaxoutputtokens) | number | <b><i>(Public Preview)</i></b> Specifies the maximum number of tokens that can be generated in the response. The number of tokens per word varies depending on the language outputted. Is unbounded by default. |
 |  [presencePenalty](./ai.livegenerationconfig.md#livegenerationconfigpresencepenalty) | number | <b><i>(Public Preview)</i></b> Positive penalties. |
-|  [responseModalities](./ai.livegenerationconfig.md#livegenerationconfigresponsemodalities) | \[[ResponseModality](./ai.md#responsemodality)<!-- -->\] | <b><i>(Public Preview)</i></b> The modalities of the response. |
+|  [responseModalities](./ai.livegenerationconfig.md#livegenerationconfigresponsemodalities) | [ResponseModality](./ai.md#responsemodality)<!-- -->\[\] | <b><i>(Public Preview)</i></b> The modalities of the response. |
 |  [speechConfig](./ai.livegenerationconfig.md#livegenerationconfigspeechconfig) | [SpeechConfig](./ai.speechconfig.md#speechconfig_interface) | <b><i>(Public Preview)</i></b> Configuration for speech synthesis. |
 |  [temperature](./ai.livegenerationconfig.md#livegenerationconfigtemperature) | number | <b><i>(Public Preview)</i></b> Controls the degree of randomness in token selection. A <code>temperature</code> value of 0 means that the highest probability tokens are always selected. In this case, responses for a given prompt are mostly deterministic, but a small amount of variation is still possible. |
 |  [topK](./ai.livegenerationconfig.md#livegenerationconfigtopk) | number | <b><i>(Public Preview)</i></b> Changes how the model selects token for output. A <code>topK</code> value of 1 means the select token is the most probable among all tokens in the model's vocabulary, while a <code>topK</code> value 3 means that the next token is selected from among the 3 most probably using probabilities sampled. Tokens are then further filtered with the highest selected <code>temperature</code> sampling. Defaults to 40 if unspecified. |
@@ -83,7 +83,7 @@ The modalities of the response.
 <b>Signature:</b>
 
 ```typescript
-responseModalities?: [ResponseModality];
+responseModalities?: ResponseModality[];
 ```
 
 ## LiveGenerationConfig.speechConfig
diff --git a/docs-devsite/ai.livesession.md b/docs-devsite/ai.livesession.md
index bb2eca88b89..6ae2cde711c 100644
--- a/docs-devsite/ai.livesession.md
+++ b/docs-devsite/ai.livesession.md
@@ -29,6 +29,7 @@ export declare class LiveSession
 
 |  Property | Modifiers | Type | Description |
 |  --- | --- | --- | --- |
+|  [inConversation](./ai.livesession.md#livesessioninconversation) |  | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is being controlled by an <code>AudioConversationController</code>. |
 |  [isClosed](./ai.livesession.md#livesessionisclosed) |  | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is closed. |
 
 ## Methods
@@ -41,6 +42,19 @@ export declare class LiveSession
 |  [sendMediaChunks(mediaChunks)](./ai.livesession.md#livesessionsendmediachunks) |  | <b><i>(Public Preview)</i></b> Sends realtime input to the server. |
 |  [sendMediaStream(mediaChunkStream)](./ai.livesession.md#livesessionsendmediastream) |  | <b><i>(Public Preview)</i></b> Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface)<!-- -->. |
 
+## LiveSession.inConversation
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Indicates whether this Live session is being controlled by an `AudioConversationController`<!-- -->.
+
+<b>Signature:</b>
+
+```typescript
+inConversation: boolean;
+```
+
 ## LiveSession.isClosed
 
 > This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
diff --git a/docs-devsite/ai.md b/docs-devsite/ai.md
index a70772dc763..e6811e96afa 100644
--- a/docs-devsite/ai.md
+++ b/docs-devsite/ai.md
@@ -22,6 +22,8 @@ The Firebase AI Web SDK.
 |  [getGenerativeModel(ai, modelParams, requestOptions)](./ai.md#getgenerativemodel_80bd839) | Returns a [GenerativeModel](./ai.generativemodel.md#generativemodel_class) class with methods for inference and other functionality. |
 |  [getImagenModel(ai, modelParams, requestOptions)](./ai.md#getimagenmodel_e1f6645) | <b><i>(Public Preview)</i></b> Returns an [ImagenModel](./ai.imagenmodel.md#imagenmodel_class) class with methods for using Imagen.<!-- -->Only Imagen 3 models (named <code>imagen-3.0-*</code>) are supported. |
 |  [getLiveGenerativeModel(ai, modelParams)](./ai.md#getlivegenerativemodel_f2099ac) | <b><i>(Public Preview)</i></b> Returns a [LiveGenerativeModel](./ai.livegenerativemodel.md#livegenerativemodel_class) class for real-time, bidirectional communication.<!-- -->The Live API is only supported in modern browser windows and Node &gt;<!-- -->= 22. |
+|  <b>function(liveSession, ...)</b> |
+|  [startAudioConversation(liveSession, options)](./ai.md#startaudioconversation_01c8e7f) | <b><i>(Public Preview)</i></b> Starts a real-time, bidirectional audio conversation with the model. This helper function manages the complexities of microphone access, audio recording, playback, and interruptions. |
 
 ## Classes
 
@@ -53,6 +55,7 @@ The Firebase AI Web SDK.
 |  --- | --- |
 |  [AI](./ai.ai.md#ai_interface) | An instance of the Firebase AI SDK.<!-- -->Do not create this instance directly. Instead, use [getAI()](./ai.md#getai_a94a413)<!-- -->. |
 |  [AIOptions](./ai.aioptions.md#aioptions_interface) | Options for initializing the AI service using [getAI()](./ai.md#getai_a94a413)<!-- -->. This allows specifying which backend to use (Vertex AI Gemini API or Gemini Developer API) and configuring its specific options (like location for Vertex AI). |
+|  [AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface) | <b><i>(Public Preview)</i></b> A controller for managing an active audio conversation. |
 |  [BaseParams](./ai.baseparams.md#baseparams_interface) | Base parameters for a number of methods. |
 |  [Citation](./ai.citation.md#citation_interface) | A single citation. |
 |  [CitationMetadata](./ai.citationmetadata.md#citationmetadata_interface) | Citation metadata that may be found on a [GenerateContentCandidate](./ai.generatecontentcandidate.md#generatecontentcandidate_interface)<!-- -->. |
@@ -112,6 +115,7 @@ The Firebase AI Web SDK.
 |  [SearchEntrypoint](./ai.searchentrypoint.md#searchentrypoint_interface) | Google search entry point. |
 |  [Segment](./ai.segment.md#segment_interface) | Represents a specific segment within a [Content](./ai.content.md#content_interface) object, often used to pinpoint the exact location of text or data that grounding information refers to. |
 |  [SpeechConfig](./ai.speechconfig.md#speechconfig_interface) | <b><i>(Public Preview)</i></b> Configures speech synthesis. |
+|  [StartAudioConversationOptions](./ai.startaudioconversationoptions.md#startaudioconversationoptions_interface) | <b><i>(Public Preview)</i></b> Options for [startAudioConversation()](./ai.md#startaudioconversation_01c8e7f)<!-- -->. |
 |  [StartChatParams](./ai.startchatparams.md#startchatparams_interface) | Params for [GenerativeModel.startChat()](./ai.generativemodel.md#generativemodelstartchat)<!-- -->. |
 |  [TextPart](./ai.textpart.md#textpart_interface) | Content part interface if the part represents a text string. |
 |  [ThinkingConfig](./ai.thinkingconfig.md#thinkingconfig_interface) | Configuration for "thinking" behavior of compatible Gemini models.<!-- -->Certain models utilize a thinking process before generating a response. This allows them to reason through complex problems and plan a more coherent and accurate answer. |
@@ -307,6 +311,76 @@ export declare function getLiveGenerativeModel(ai: AI, modelParams: LiveModelPar
 
 If the `apiKey` or `projectId` fields are missing in your Firebase config.
 
+## function(liveSession, ...)
+
+### startAudioConversation(liveSession, options) {:#startaudioconversation_01c8e7f}
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Starts a real-time, bidirectional audio conversation with the model. This helper function manages the complexities of microphone access, audio recording, playback, and interruptions.
+
+Important: This function must be called in response to a user gesture (for example, a button click) to comply with [browser autoplay policies](https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy)<!-- -->.
+
+<b>Signature:</b>
+
+```typescript
+export declare function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
+```
+
+#### Parameters
+
+|  Parameter | Type | Description |
+|  --- | --- | --- |
+|  liveSession | [LiveSession](./ai.livesession.md#livesession_class) | An active [LiveSession](./ai.livesession.md#livesession_class) instance. |
+|  options | [StartAudioConversationOptions](./ai.startaudioconversationoptions.md#startaudioconversationoptions_interface) | Configuration options for the audio conversation. |
+
+<b>Returns:</b>
+
+Promise&lt;[AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface)<!-- -->&gt;
+
+A `Promise` that resolves with an [AudioConversationController](./ai.audioconversationcontroller.md#audioconversationcontroller_interface)<!-- -->.
+
+#### Exceptions
+
+`AIError` if the environment does not support required Web APIs (`UNSUPPORTED`<!-- -->), if a conversation is already active (`REQUEST_ERROR`<!-- -->), the session is closed (`SESSION_CLOSED`<!-- -->), or if an unexpected initialization error occurs (`ERROR`<!-- -->).
+
+`DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`<!-- -->) or no compatible hardware being found (`NotFoundError`<!-- -->). See the [MDN documentation](https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions) for a full list of exceptions.
+
+### Example
+
+
+```javascript
+const liveSession = await model.connect();
+let conversationController;
+
+// This function must be called from within a click handler.
+async function startConversation() {
+  try {
+    conversationController = await startAudioConversation(liveSession);
+  } catch (e) {
+    // Handle AI-specific errors
+    if (e instanceof AIError) {
+      console.error("AI Error:", e.message);
+    }
+    // Handle microphone permission and hardware errors
+    else if (e instanceof DOMException) {
+      console.error("Microphone Error:", e.message);
+    }
+    // Handle other unexpected errors
+    else {
+      console.error("An unexpected error occurred:", e);
+    }
+  }
+}
+
+// Later, to stop the conversation:
+// if (conversationController) {
+//   await conversationController.stop();
+// }
+
+```
+
 ## AIErrorCode
 
 Standardized error codes that [AIError](./ai.aierror.md#aierror_class) can have.
@@ -589,6 +663,7 @@ Generation modalities to be returned in generation responses.
 ResponseModality: {
     readonly TEXT: "TEXT";
     readonly IMAGE: "IMAGE";
+    readonly AUDIO: "AUDIO";
 }
 ```
 
diff --git a/docs-devsite/ai.prebuiltvoiceconfig.md b/docs-devsite/ai.prebuiltvoiceconfig.md
index a5600e9a650..8627ae184b3 100644
--- a/docs-devsite/ai.prebuiltvoiceconfig.md
+++ b/docs-devsite/ai.prebuiltvoiceconfig.md
@@ -25,9 +25,9 @@ export interface PrebuiltVoiceConfig
 
 |  Property | Type | Description |
 |  --- | --- | --- |
-|  [voiceConfig](./ai.prebuiltvoiceconfig.md#prebuiltvoiceconfigvoiceconfig) | string | <b><i>(Public Preview)</i></b> The voice name to use for speech synthesis.<!-- -->For a full list of names and demos of what each voice sounds like, see [Chirp 3: HD Voices](https://cloud.google.com/text-to-speech/docs/chirp3-hd)<!-- -->. |
+|  [voiceName](./ai.prebuiltvoiceconfig.md#prebuiltvoiceconfigvoicename) | string | <b><i>(Public Preview)</i></b> The voice name to use for speech synthesis.<!-- -->For a full list of names and demos of what each voice sounds like, see [Chirp 3: HD Voices](https://cloud.google.com/text-to-speech/docs/chirp3-hd)<!-- -->. |
 
-## PrebuiltVoiceConfig.voiceConfig
+## PrebuiltVoiceConfig.voiceName
 
 > This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
 > 
@@ -39,5 +39,5 @@ For a full list of names and demos of what each voice sounds like, see [Chirp 3:
 <b>Signature:</b>
 
 ```typescript
-voiceConfig?: string;
+voiceName?: string;
 ```
diff --git a/docs-devsite/ai.startaudioconversationoptions.md b/docs-devsite/ai.startaudioconversationoptions.md
new file mode 100644
index 00000000000..08e91d2c7b5
--- /dev/null
+++ b/docs-devsite/ai.startaudioconversationoptions.md
@@ -0,0 +1,41 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# StartAudioConversationOptions interface
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+Options for [startAudioConversation()](./ai.md#startaudioconversation_01c8e7f)<!-- -->.
+
+<b>Signature:</b>
+
+```typescript
+export interface StartAudioConversationOptions 
+```
+
+## Properties
+
+|  Property | Type | Description |
+|  --- | --- | --- |
+|  [functionCallingHandler](./ai.startaudioconversationoptions.md#startaudioconversationoptionsfunctioncallinghandler) | (functionCalls: [LiveServerToolCall](./ai.liveservertoolcall.md#liveservertoolcall_interface)<!-- -->\['functionCalls'\]) =&gt; Promise&lt;[Part](./ai.md#part)<!-- -->&gt; | <b><i>(Public Preview)</i></b> An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a <code>Part</code>, which will then be sent back to the model. |
+
+## StartAudioConversationOptions.functionCallingHandler
+
+> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
+> 
+
+An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a `Part`<!-- -->, which will then be sent back to the model.
+
+<b>Signature:</b>
+
+```typescript
+functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
+```
diff --git a/packages/ai/src/api.ts b/packages/ai/src/api.ts
index 418c17bb49c..5b7d02c934a 100644
--- a/packages/ai/src/api.ts
+++ b/packages/ai/src/api.ts
@@ -45,6 +45,11 @@ export * from './requests/schema-builder';
 export { ImagenImageFormat } from './requests/imagen-image-format';
 export { AIModel, GenerativeModel, LiveGenerativeModel, ImagenModel, AIError };
 export { Backend, VertexAIBackend, GoogleAIBackend } from './backend';
+export {
+  startAudioConversation,
+  AudioConversationController,
+  StartAudioConversationOptions
+} from './methods/live-session-helpers';
 
 declare module '@firebase/component' {
   interface NameServiceMapping {
diff --git a/packages/ai/src/methods/live-session-helpers.test.ts b/packages/ai/src/methods/live-session-helpers.test.ts
new file mode 100644
index 00000000000..d7d1e2aabbf
--- /dev/null
+++ b/packages/ai/src/methods/live-session-helpers.test.ts
@@ -0,0 +1,356 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { expect, use } from 'chai';
+import sinon, { SinonFakeTimers, SinonStub, SinonStubbedInstance } from 'sinon';
+import sinonChai from 'sinon-chai';
+import chaiAsPromised from 'chai-as-promised';
+import { AIError } from '../errors';
+import { startAudioConversation } from './live-session-helpers';
+import { LiveServerContent, LiveServerToolCall, Part } from '../types';
+import { logger } from '../logger';
+import { isNode } from '@firebase/util';
+
+use(sinonChai);
+use(chaiAsPromised);
+
+// A mock message generator to simulate receiving messages from the server.
+class MockMessageGenerator {
+  private resolvers: Array<(result: IteratorResult<any>) => void> = [];
+  isDone = false;
+
+  next(): Promise<IteratorResult<any>> {
+    return new Promise(resolve => this.resolvers.push(resolve));
+  }
+
+  simulateMessage(message: any): void {
+    const resolver = this.resolvers.shift();
+    if (resolver) {
+      resolver({ value: message, done: false });
+    }
+  }
+
+  endStream(): void {
+    if (this.isDone) {
+      return;
+    }
+    this.isDone = true;
+    this.resolvers.forEach(resolve =>
+      resolve({ value: undefined, done: true })
+    );
+    this.resolvers = [];
+  }
+}
+
+// A mock LiveSession to intercept calls to the server.
+class MockLiveSession {
+  isClosed = false;
+  inConversation = false;
+  send = sinon.stub();
+  sendMediaChunks = sinon.stub();
+  messageGenerator = new MockMessageGenerator();
+  receive = (): MockMessageGenerator => this.messageGenerator;
+}
+
+// Stubs and mocks for Web APIs used by the helpers.
+let mockAudioContext: SinonStubbedInstance<AudioContext>;
+let mockMediaStream: SinonStubbedInstance<MediaStream>;
+let getUserMediaStub: SinonStub;
+let mockWorkletNode: SinonStubbedInstance<AudioWorkletNode>;
+let mockSourceNode: SinonStubbedInstance<MediaStreamAudioSourceNode>;
+let mockAudioBufferSource: any;
+
+function setupGlobalMocks(): void {
+  // Mock AudioWorkletNode
+  mockWorkletNode = {
+    port: {
+      postMessage: sinon.stub(),
+      onmessage: null
+    },
+    connect: sinon.stub(),
+    disconnect: sinon.stub()
+  } as any;
+  sinon.stub(global, 'AudioWorkletNode').returns(mockWorkletNode);
+
+  // Mock AudioContext
+  mockAudioBufferSource = {
+    connect: sinon.stub(),
+    start: sinon.stub(),
+    stop: sinon.stub(),
+    onended: null,
+    buffer: { duration: 0.5 } // Mock duration for scheduling
+  };
+  mockSourceNode = {
+    connect: sinon.stub(),
+    disconnect: sinon.stub()
+  } as any;
+  mockAudioContext = {
+    resume: sinon.stub().resolves(),
+    close: sinon.stub().resolves(),
+    createBuffer: sinon.stub().returns({
+      getChannelData: sinon.stub().returns(new Float32Array(1))
+    } as any),
+    createBufferSource: sinon.stub().returns(mockAudioBufferSource),
+    createMediaStreamSource: sinon.stub().returns(mockSourceNode),
+    audioWorklet: {
+      addModule: sinon.stub().resolves()
+    },
+    state: 'suspended' as AudioContextState,
+    currentTime: 0
+  } as any;
+  sinon.stub(global, 'AudioContext').returns(mockAudioContext);
+
+  // Mock other globals
+  sinon.stub(global, 'Blob').returns({} as Blob);
+  sinon.stub(URL, 'createObjectURL').returns('blob:http://localhost/fake-url');
+
+  // Mock getUserMedia
+  mockMediaStream = {
+    getTracks: sinon.stub().returns([{ stop: sinon.stub() } as any])
+  } as any;
+  getUserMediaStub = sinon.stub().resolves(mockMediaStream);
+  if (typeof navigator === 'undefined') {
+    (global as any).navigator = {
+      mediaDevices: { getUserMedia: getUserMediaStub }
+    };
+  } else {
+    if (!navigator.mediaDevices) {
+      (navigator as any).mediaDevices = {};
+    }
+    sinon
+      .stub(navigator.mediaDevices, 'getUserMedia')
+      .callsFake(getUserMediaStub);
+  }
+}
+
+describe('Audio Conversation Helpers', () => {
+  let clock: SinonFakeTimers;
+
+  if (isNode()) {
+    return;
+  }
+
+  beforeEach(() => {
+    clock = sinon.useFakeTimers();
+    setupGlobalMocks();
+  });
+
+  afterEach(() => {
+    sinon.restore();
+    clock.restore();
+  });
+
+  describe('startAudioConversation', () => {
+    let liveSession: MockLiveSession;
+    beforeEach(() => {
+      liveSession = new MockLiveSession();
+    });
+
+    it('should throw if the session is closed.', async () => {
+      liveSession.isClosed = true;
+      await expect(
+        startAudioConversation(liveSession as any)
+      ).to.be.rejectedWith(AIError, /on a closed LiveSession/);
+    });
+
+    it('should throw if a conversation is in progress.', async () => {
+      liveSession.inConversation = true;
+      await expect(
+        startAudioConversation(liveSession as any)
+      ).to.be.rejectedWith(AIError, /is already in progress/);
+    });
+
+    it('should throw if APIs are not supported.', async () => {
+      (global as any).AudioWorkletNode = undefined; // Simulate lack of support
+      await expect(
+        startAudioConversation(liveSession as any)
+      ).to.be.rejectedWith(AIError, /not supported in this environment/);
+    });
+
+    it('should throw if microphone permissions are denied.', async () => {
+      getUserMediaStub.rejects(
+        new DOMException('Permission denied', 'NotAllowedError')
+      );
+      await expect(
+        startAudioConversation(liveSession as any)
+      ).to.be.rejectedWith(DOMException, /Permission denied/);
+    });
+
+    it('should return a controller with a stop method on success.', async () => {
+      const controller = await startAudioConversation(liveSession as any);
+      expect(controller).to.have.property('stop').that.is.a('function');
+      // Ensure it doesn't throw during cleanup
+      await expect(controller.stop()).to.be.fulfilled;
+    });
+  });
+
+  describe('AudioConversationRunner', () => {
+    let liveSession: MockLiveSession;
+    let warnStub: SinonStub;
+
+    beforeEach(() => {
+      liveSession = new MockLiveSession();
+      warnStub = sinon.stub(logger, 'warn');
+    });
+
+    afterEach(() => {
+      warnStub.restore();
+    });
+
+    it('should send processed audio chunks received from the worklet.', async () => {
+      const controller = await startAudioConversation(liveSession as any);
+      expect(mockWorkletNode.port.onmessage).to.be.a('function');
+
+      // Simulate the worklet sending a message
+      const fakeAudioData = new Int16Array(128);
+      mockWorkletNode.port.onmessage!({ data: fakeAudioData } as MessageEvent);
+
+      await clock.tickAsync(1);
+
+      expect(liveSession.sendMediaChunks).to.have.been.calledOnce;
+      const [sentChunk] = liveSession.sendMediaChunks.getCall(0).args[0];
+      expect(sentChunk.mimeType).to.equal('audio/pcm');
+      expect(sentChunk.data).to.be.a('string');
+      await controller.stop();
+    });
+
+    it('should queue and play audio from a serverContent message.', async () => {
+      const controller = await startAudioConversation(liveSession as any);
+      const serverMessage: LiveServerContent = {
+        type: 'serverContent',
+        modelTurn: {
+          role: 'model',
+          parts: [
+            { inlineData: { mimeType: 'audio/pcm', data: '1111222233334444' } }
+          ] // base64 for dummy data
+        }
+      };
+
+      liveSession.messageGenerator.simulateMessage(serverMessage);
+      await clock.tickAsync(1); // allow message processing
+
+      expect(mockAudioContext.createBuffer).to.have.been.calledOnce;
+      expect(mockAudioBufferSource.start).to.have.been.calledOnce;
+      await controller.stop();
+    });
+
+    it('should call function handler and send result on toolCall message.', async () => {
+      const handlerStub = sinon.stub().resolves({
+        functionResponse: { name: 'get_weather', response: { temp: '72F' } }
+      } as Part);
+      const controller = await startAudioConversation(liveSession as any, {
+        functionCallingHandler: handlerStub
+      });
+
+      const toolCallMessage: LiveServerToolCall = {
+        type: 'toolCall',
+        functionCalls: [{ name: 'get_weather', args: { location: 'LA' } }]
+      };
+
+      liveSession.messageGenerator.simulateMessage(toolCallMessage);
+      await clock.tickAsync(1);
+
+      expect(handlerStub).to.have.been.calledOnceWith(
+        toolCallMessage.functionCalls
+      );
+      expect(liveSession.send).to.have.been.calledOnceWith([
+        { functionResponse: { name: 'get_weather', response: { temp: '72F' } } }
+      ]);
+      await controller.stop();
+    });
+
+    it('should clear queue and stop sources on an interruption message.', async () => {
+      const controller = await startAudioConversation(liveSession as any);
+
+      // 1. Enqueue some audio that is "playing"
+      const playingMessage: LiveServerContent = {
+        type: 'serverContent',
+        modelTurn: {
+          parts: [
+            { inlineData: { mimeType: 'audio/pcm', data: '1111222233334444' } }
+          ],
+          role: 'model'
+        }
+      };
+      liveSession.messageGenerator.simulateMessage(playingMessage);
+      await clock.tickAsync(1);
+      expect(mockAudioBufferSource.start).to.have.been.calledOnce;
+
+      // 2. Enqueue another chunk that is now scheduled
+      liveSession.messageGenerator.simulateMessage(playingMessage);
+      await clock.tickAsync(1);
+      expect(mockAudioBufferSource.start).to.have.been.calledTwice;
+
+      // 3. Send interruption message
+      const interruptionMessage: LiveServerContent = {
+        type: 'serverContent',
+        interrupted: true
+      };
+      liveSession.messageGenerator.simulateMessage(interruptionMessage);
+      await clock.tickAsync(1);
+
+      // Assert that all scheduled sources were stopped.
+      expect(mockAudioBufferSource.stop).to.have.been.calledTwice;
+
+      // 4. Send new audio post-interruption
+      const newMessage: LiveServerContent = {
+        type: 'serverContent',
+        modelTurn: {
+          parts: [
+            { inlineData: { mimeType: 'audio/pcm', data: '1111222233334444' } }
+          ],
+          role: 'model'
+        }
+      };
+      liveSession.messageGenerator.simulateMessage(newMessage);
+      await clock.tickAsync(1);
+
+      // Assert a new source was created and started (total of 3 starts)
+      expect(mockAudioBufferSource.start).to.have.been.calledThrice;
+
+      await controller.stop();
+    });
+
+    it('should warn if no function handler is provided for a toolCall message.', async () => {
+      const controller = await startAudioConversation(liveSession as any);
+      liveSession.messageGenerator.simulateMessage({
+        type: 'toolCall',
+        functionCalls: [{ name: 'test' }]
+      });
+      await clock.tickAsync(1);
+
+      expect(warnStub).to.have.been.calledWithMatch(
+        /functionCallingHandler is undefined/
+      );
+      await controller.stop();
+    });
+
+    it('stop() should call cleanup and release all resources.', async () => {
+      const controller = await startAudioConversation(liveSession as any);
+
+      // Need to spy on the internal runner's cleanup method. This is a bit tricky.
+      // We can't do it directly. Instead, we'll just check the mock results.
+      await controller.stop();
+
+      expect(mockWorkletNode.disconnect).to.have.been.calledOnce;
+      expect(mockSourceNode.disconnect).to.have.been.calledOnce;
+      expect(mockMediaStream.getTracks()[0].stop).to.have.been.calledOnce;
+      expect(mockAudioContext.close).to.have.been.calledOnce;
+      expect(liveSession.inConversation).to.be.false;
+    });
+  });
+});
diff --git a/packages/ai/src/methods/live-session-helpers.ts b/packages/ai/src/methods/live-session-helpers.ts
new file mode 100644
index 00000000000..e52715de36e
--- /dev/null
+++ b/packages/ai/src/methods/live-session-helpers.ts
@@ -0,0 +1,497 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { AIError } from '../errors';
+import { logger } from '../logger';
+import {
+  AIErrorCode,
+  GenerativeContentBlob,
+  LiveServerContent,
+  LiveServerToolCall,
+  Part
+} from '../types';
+import { LiveSession } from './live-session';
+import { Deferred } from '@firebase/util';
+
+const SERVER_INPUT_SAMPLE_RATE = 16_000;
+const SERVER_OUTPUT_SAMPLE_RATE = 24_000;
+
+const AUDIO_PROCESSOR_NAME = 'audio-processor';
+
+/**
+ * The JS for an `AudioWorkletProcessor`.
+ * This processor is responsible for taking raw audio from the microphone,
+ * converting it to the required 16-bit 16kHz PCM, and posting it back to the main thread.
+ *
+ * See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletProcessor
+ *
+ * It is defined as a string here so that it can be converted into a `Blob`
+ * and loaded at runtime.
+ */
+const audioProcessorWorkletString = `
+  class AudioProcessor extends AudioWorkletProcessor {
+    constructor(options) {
+      super();
+      this.targetSampleRate = options.processorOptions.targetSampleRate;
+      // 'sampleRate' is a global variable available inside the AudioWorkletGlobalScope,
+      // representing the native sample rate of the AudioContext.
+      this.inputSampleRate = sampleRate;
+    }
+
+    /**
+     * This method is called by the browser's audio engine for each block of audio data.
+     * Input is a single input, with a single channel (input[0][0]).
+     */
+    process(inputs) {
+      const input = inputs[0];
+      if (input && input.length > 0 && input[0].length > 0) {
+        const pcmData = input[0]; // Float32Array of raw audio samples.
+        
+        // Simple linear interpolation for resampling.
+        const resampled = new Float32Array(Math.round(pcmData.length * this.targetSampleRate / this.inputSampleRate));
+        const ratio = pcmData.length / resampled.length;
+        for (let i = 0; i < resampled.length; i++) {
+          resampled[i] = pcmData[Math.floor(i * ratio)];
+        }
+
+        // Convert Float32 (-1, 1) samples to Int16 (-32768, 32767)
+        const resampledInt16 = new Int16Array(resampled.length);
+        for (let i = 0; i < resampled.length; i++) {
+          const sample = Math.max(-1, Math.min(1, resampled[i]));
+          if (sample < 0) {
+            resampledInt16[i] = sample * 32768;
+          } else {
+            resampledInt16[i] = sample * 32767;
+          }
+        }
+        
+        this.port.postMessage(resampledInt16);
+      }
+      // Return true to keep the processor alive and processing the next audio block.
+      return true;
+    }
+  }
+
+  // Register the processor with a name that can be used to instantiate it from the main thread.
+  registerProcessor('${AUDIO_PROCESSOR_NAME}', AudioProcessor);
+`;
+
+/**
+ * A controller for managing an active audio conversation.
+ *
+ * @beta
+ */
+export interface AudioConversationController {
+  /**
+   * Stops the audio conversation, closes the microphone connection, and
+   * cleans up resources. Returns a promise that resolves when cleanup is complete.
+   */
+  stop: () => Promise<void>;
+}
+
+/**
+ * Options for {@link startAudioConversation}.
+ *
+ * @beta
+ */
+export interface StartAudioConversationOptions {
+  /**
+   * An async handler that is called when the model requests a function to be executed.
+   * The handler should perform the function call and return the result as a `Part`,
+   * which will then be sent back to the model.
+   */
+  functionCallingHandler?: (
+    functionCalls: LiveServerToolCall['functionCalls']
+  ) => Promise<Part>;
+}
+
+/**
+ * Dependencies needed by the {@link AudioConversationRunner}.
+ *
+ * @internal
+ */
+interface RunnerDependencies {
+  audioContext: AudioContext;
+  mediaStream: MediaStream;
+  sourceNode: MediaStreamAudioSourceNode;
+  workletNode: AudioWorkletNode;
+}
+
+/**
+ * Encapsulates the core logic of an audio conversation.
+ *
+ * @internal
+ */
+export class AudioConversationRunner {
+  /** A flag to indicate if the conversation has been stopped. */
+  private isStopped = false;
+  /** A deferred that contains a promise that is resolved when stop() is called, to unblock the receive loop. */
+  private readonly stopDeferred = new Deferred<void>();
+  /** A promise that tracks the lifecycle of the main `runReceiveLoop`. */
+  private readonly receiveLoopPromise: Promise<void>;
+
+  /** A FIFO queue of 24kHz, 16-bit PCM audio chunks received from the server. */
+  private readonly playbackQueue: ArrayBuffer[] = [];
+  /** Tracks scheduled audio sources. Used to cancel scheduled audio when the model is interrupted. */
+  private scheduledSources: AudioBufferSourceNode[] = [];
+  /** A high-precision timeline pointer for scheduling gapless audio playback. */
+  private nextStartTime = 0;
+  /** A mutex to prevent the playback processing loop from running multiple times concurrently. */
+  private isPlaybackLoopRunning = false;
+
+  constructor(
+    private readonly liveSession: LiveSession,
+    private readonly options: StartAudioConversationOptions,
+    private readonly deps: RunnerDependencies
+  ) {
+    this.liveSession.inConversation = true;
+
+    // Start listening for messages from the server.
+    this.receiveLoopPromise = this.runReceiveLoop().finally(() =>
+      this.cleanup()
+    );
+
+    // Set up the handler for receiving processed audio data from the worklet.
+    // Message data has been resampled to 16kHz 16-bit PCM.
+    this.deps.workletNode.port.onmessage = event => {
+      if (this.isStopped) {
+        return;
+      }
+
+      const pcm16 = event.data as Int16Array;
+      const base64 = btoa(
+        String.fromCharCode.apply(
+          null,
+          Array.from(new Uint8Array(pcm16.buffer))
+        )
+      );
+
+      const chunk: GenerativeContentBlob = {
+        mimeType: 'audio/pcm',
+        data: base64
+      };
+      void this.liveSession.sendMediaChunks([chunk]);
+    };
+  }
+
+  /**
+   * Stops the conversation and unblocks the main receive loop.
+   */
+  async stop(): Promise<void> {
+    if (this.isStopped) {
+      return;
+    }
+    this.isStopped = true;
+    this.stopDeferred.resolve(); // Unblock the receive loop
+    await this.receiveLoopPromise; // Wait for the loop and cleanup to finish
+  }
+
+  /**
+   * Cleans up all audio resources (nodes, stream tracks, context) and marks the
+   * session as no longer in a conversation.
+   */
+  private cleanup(): void {
+    this.interruptPlayback(); // Ensure all audio is stopped on final cleanup.
+    this.deps.workletNode.port.onmessage = null;
+    this.deps.workletNode.disconnect();
+    this.deps.sourceNode.disconnect();
+    this.deps.mediaStream.getTracks().forEach(track => track.stop());
+    if (this.deps.audioContext.state !== 'closed') {
+      void this.deps.audioContext.close();
+    }
+    this.liveSession.inConversation = false;
+  }
+
+  /**
+   * Adds audio data to the queue and ensures the playback loop is running.
+   */
+  private enqueueAndPlay(audioData: ArrayBuffer): void {
+    this.playbackQueue.push(audioData);
+    // Will no-op if it's already running.
+    void this.processPlaybackQueue();
+  }
+
+  /**
+   * Stops all current and pending audio playback and clears the queue. This is
+   * called when the server indicates the model's speech was interrupted with
+   * `LiveServerContent.modelTurn.interrupted`.
+   */
+  private interruptPlayback(): void {
+    // Stop all sources that have been scheduled. The onended event will fire for each,
+    // which will clean up the scheduledSources array.
+    [...this.scheduledSources].forEach(source => source.stop(0));
+
+    // Clear the internal buffer of unprocessed audio chunks.
+    this.playbackQueue.length = 0;
+
+    // Reset the playback clock to start fresh.
+    this.nextStartTime = this.deps.audioContext.currentTime;
+  }
+
+  /**
+   * Processes the playback queue in a loop, scheduling each chunk in a gapless sequence.
+   */
+  private async processPlaybackQueue(): Promise<void> {
+    if (this.isPlaybackLoopRunning) {
+      return;
+    }
+    this.isPlaybackLoopRunning = true;
+
+    while (this.playbackQueue.length > 0 && !this.isStopped) {
+      const pcmRawBuffer = this.playbackQueue.shift()!;
+      try {
+        const pcm16 = new Int16Array(pcmRawBuffer);
+        const frameCount = pcm16.length;
+
+        const audioBuffer = this.deps.audioContext.createBuffer(
+          1,
+          frameCount,
+          SERVER_OUTPUT_SAMPLE_RATE
+        );
+
+        // Convert 16-bit PCM to 32-bit PCM, required by the Web Audio API.
+        const channelData = audioBuffer.getChannelData(0);
+        for (let i = 0; i < frameCount; i++) {
+          channelData[i] = pcm16[i] / 32768; // Normalize to Float32 range [-1.0, 1.0]
+        }
+
+        const source = this.deps.audioContext.createBufferSource();
+        source.buffer = audioBuffer;
+        source.connect(this.deps.audioContext.destination);
+
+        // Track the source and set up a handler to remove it from tracking when it finishes.
+        this.scheduledSources.push(source);
+        source.onended = () => {
+          this.scheduledSources = this.scheduledSources.filter(
+            s => s !== source
+          );
+        };
+
+        // To prevent gaps, schedule the next chunk to start either now (if we're catching up)
+        // or exactly when the previous chunk is scheduled to end.
+        this.nextStartTime = Math.max(
+          this.deps.audioContext.currentTime,
+          this.nextStartTime
+        );
+        source.start(this.nextStartTime);
+
+        // Update the schedule for the *next* chunk.
+        this.nextStartTime += audioBuffer.duration;
+      } catch (e) {
+        logger.error('Error playing audio:', e);
+      }
+    }
+
+    this.isPlaybackLoopRunning = false;
+  }
+
+  /**
+   * The main loop that listens for and processes messages from the server.
+   */
+  private async runReceiveLoop(): Promise<void> {
+    const messageGenerator = this.liveSession.receive();
+    while (!this.isStopped) {
+      const result = await Promise.race([
+        messageGenerator.next(),
+        this.stopDeferred.promise
+      ]);
+
+      if (this.isStopped || !result || result.done) {
+        break;
+      }
+
+      const message = result.value;
+      if (message.type === 'serverContent') {
+        const serverContent = message as LiveServerContent;
+        if (serverContent.interrupted) {
+          this.interruptPlayback();
+        }
+
+        const audioPart = serverContent.modelTurn?.parts.find(part =>
+          part.inlineData?.mimeType.startsWith('audio/')
+        );
+        if (audioPart?.inlineData) {
+          const audioData = Uint8Array.from(
+            atob(audioPart.inlineData.data),
+            c => c.charCodeAt(0)
+          ).buffer;
+          this.enqueueAndPlay(audioData);
+        }
+      } else if (message.type === 'toolCall') {
+        if (!this.options.functionCallingHandler) {
+          logger.warn(
+            'Received tool call message, but StartAudioConversationOptions.functionCallingHandler is undefined. Ignoring tool call.'
+          );
+        } else {
+          try {
+            const resultPart = await this.options.functionCallingHandler(
+              message.functionCalls
+            );
+            if (!this.isStopped) {
+              void this.liveSession.send([resultPart]);
+            }
+          } catch (e) {
+            throw new AIError(
+              AIErrorCode.ERROR,
+              `Function calling handler failed: ${(e as Error).message}`
+            );
+          }
+        }
+      }
+    }
+  }
+}
+
+/**
+ * Starts a real-time, bidirectional audio conversation with the model. This helper function manages
+ * the complexities of microphone access, audio recording, playback, and interruptions.
+ *
+ * @remarks Important: This function must be called in response to a user gesture
+ * (for example, a button click) to comply with {@link https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy | browser autoplay policies}.
+ *
+ * @example
+ * ```javascript
+ * const liveSession = await model.connect();
+ * let conversationController;
+ *
+ * // This function must be called from within a click handler.
+ * async function startConversation() {
+ *   try {
+ *     conversationController = await startAudioConversation(liveSession);
+ *   } catch (e) {
+ *     // Handle AI-specific errors
+ *     if (e instanceof AIError) {
+ *       console.error("AI Error:", e.message);
+ *     }
+ *     // Handle microphone permission and hardware errors
+ *     else if (e instanceof DOMException) {
+ *       console.error("Microphone Error:", e.message);
+ *     }
+ *     // Handle other unexpected errors
+ *     else {
+ *       console.error("An unexpected error occurred:", e);
+ *     }
+ *   }
+ * }
+ *
+ * // Later, to stop the conversation:
+ * // if (conversationController) {
+ * //   await conversationController.stop();
+ * // }
+ * ```
+ *
+ * @param liveSession - An active {@link LiveSession} instance.
+ * @param options - Configuration options for the audio conversation.
+ * @returns A `Promise` that resolves with an {@link AudioConversationController}.
+ * @throws `AIError` if the environment does not support required Web APIs (`UNSUPPORTED`), if a conversation is already active (`REQUEST_ERROR`), the session is closed (`SESSION_CLOSED`), or if an unexpected initialization error occurs (`ERROR`).
+ * @throws `DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`) or no compatible hardware being found (`NotFoundError`). See the {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions | MDN documentation} for a full list of exceptions.
+ *
+ * @beta
+ */
+export async function startAudioConversation(
+  liveSession: LiveSession,
+  options: StartAudioConversationOptions = {}
+): Promise<AudioConversationController> {
+  if (liveSession.isClosed) {
+    throw new AIError(
+      AIErrorCode.SESSION_CLOSED,
+      'Cannot start audio conversation on a closed LiveSession.'
+    );
+  }
+
+  if (liveSession.inConversation) {
+    throw new AIError(
+      AIErrorCode.REQUEST_ERROR,
+      'An audio conversation is already in progress for this session.'
+    );
+  }
+
+  // Check for necessary Web API support.
+  if (
+    typeof AudioWorkletNode === 'undefined' ||
+    typeof AudioContext === 'undefined' ||
+    typeof navigator === 'undefined' ||
+    !navigator.mediaDevices
+  ) {
+    throw new AIError(
+      AIErrorCode.UNSUPPORTED,
+      'Audio conversation is not supported in this environment. It requires the Web Audio API and AudioWorklet support.'
+    );
+  }
+
+  let audioContext: AudioContext | undefined;
+  try {
+    // 1. Set up the audio context. This must be in response to a user gesture.
+    // See: https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy
+    audioContext = new AudioContext();
+    if (audioContext.state === 'suspended') {
+      await audioContext.resume();
+    }
+
+    // 2. Prompt for microphone access and get the media stream.
+    // This can throw a variety of permission or hardware-related errors.
+    const mediaStream = await navigator.mediaDevices.getUserMedia({
+      audio: true
+    });
+
+    // 3. Load the AudioWorklet processor.
+    // See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet
+    const workletBlob = new Blob([audioProcessorWorkletString], {
+      type: 'application/javascript'
+    });
+    const workletURL = URL.createObjectURL(workletBlob);
+    await audioContext.audioWorklet.addModule(workletURL);
+
+    // 4. Create the audio graph: Microphone -> Source Node -> Worklet Node
+    const sourceNode = audioContext.createMediaStreamSource(mediaStream);
+    const workletNode = new AudioWorkletNode(
+      audioContext,
+      AUDIO_PROCESSOR_NAME,
+      {
+        processorOptions: { targetSampleRate: SERVER_INPUT_SAMPLE_RATE }
+      }
+    );
+    sourceNode.connect(workletNode);
+
+    // 5. Instantiate and return the runner which manages the conversation.
+    const runner = new AudioConversationRunner(liveSession, options, {
+      audioContext,
+      mediaStream,
+      sourceNode,
+      workletNode
+    });
+
+    return { stop: () => runner.stop() };
+  } catch (e) {
+    // Ensure the audio context is closed on any setup error.
+    if (audioContext && audioContext.state !== 'closed') {
+      void audioContext.close();
+    }
+
+    // Re-throw specific, known error types directly. The user may want to handle `DOMException`
+    // errors differently (for example, if permission to access audio device was denied).
+    if (e instanceof AIError || e instanceof DOMException) {
+      throw e;
+    }
+
+    // Wrap any other unexpected errors in a standard AIError.
+    throw new AIError(
+      AIErrorCode.ERROR,
+      `Failed to initialize audio recording: ${(e as Error).message}`
+    );
+  }
+}
diff --git a/packages/ai/src/methods/live-session.ts b/packages/ai/src/methods/live-session.ts
index b257d0a5787..11e5346adc0 100644
--- a/packages/ai/src/methods/live-session.ts
+++ b/packages/ai/src/methods/live-session.ts
@@ -47,6 +47,12 @@ export class LiveSession {
    * @beta
    */
   isClosed = false;
+  /**
+   * Indicates whether this Live session is being controlled by an `AudioConversationController`.
+   *
+   * @beta
+   */
+  inConversation = false;
 
   /**
    * @internal
diff --git a/packages/ai/src/types/enums.ts b/packages/ai/src/types/enums.ts
index b5e4e60ab4f..7196aad81a0 100644
--- a/packages/ai/src/types/enums.ts
+++ b/packages/ai/src/types/enums.ts
@@ -325,7 +325,12 @@ export const ResponseModality = {
    * Image.
    * @beta
    */
-  IMAGE: 'IMAGE'
+  IMAGE: 'IMAGE',
+  /**
+   * Audio.
+   * @beta
+   */
+  AUDIO: 'AUDIO'
 } as const;
 
 /**
diff --git a/packages/ai/src/types/requests.ts b/packages/ai/src/types/requests.ts
index f081149649c..80c0911c328 100644
--- a/packages/ai/src/types/requests.ts
+++ b/packages/ai/src/types/requests.ts
@@ -178,7 +178,7 @@ export interface LiveGenerationConfig {
   /**
    * The modalities of the response.
    */
-  responseModalities?: [ResponseModality];
+  responseModalities?: ResponseModality[];
 }
 
 /**
@@ -369,7 +369,7 @@ export interface PrebuiltVoiceConfig {
    *
    * For a full list of names and demos of what each voice sounds like, see {@link https://cloud.google.com/text-to-speech/docs/chirp3-hd | Chirp 3: HD Voices}.
    */
-  voiceConfig?: string;
+  voiceName?: string;
 }
 
 /**